Source code for pinecone.inference.inference_asyncio

from __future__ import annotations

from typing import Dict, Any, TYPE_CHECKING

from pinecone.core.openapi.inference.api.inference_api import AsyncioInferenceApi
from .models import EmbeddingsList, RerankResult, ModelInfoList, ModelInfo
from pinecone.utils import require_kwargs, parse_non_empty_args

from .inference_request_builder import (
    InferenceRequestBuilder,
    EmbedModel as EmbedModelEnum,
    RerankModel as RerankModelEnum,
)

if TYPE_CHECKING:
    from .resources.asyncio.model import ModelAsyncio as ModelAsyncioResource


class AsyncioInference:
    """
    The ``AsyncioInference`` class configures and uses the Pinecone Inference API to generate embeddings and
    rank documents.

    This class is generally not instantiated directly, but rather accessed through a parent ``PineconeAsyncio`` client
    object that is responsible for managing shared configurations.

    .. code-block:: python

        from pinecone import PineconeAsyncio

        async with PineconeAsyncio() as pc:
            embeddings = await pc.inference.embed(
                model="text-embedding-3-small",
                inputs=["Hello, world!"],
                parameters={"input_type": "passage", "truncate": "END"}
            )

    :param config: A ``pinecone.config.Config`` object, configured and built in the PineconeAsyncio class.
    :type config: ``pinecone.config.Config``, required
    """

    EmbedModel = EmbedModelEnum
    RerankModel = RerankModelEnum

    def __init__(self, api_client, **kwargs) -> None:
        self.api_client = api_client
        """ :meta private: """

        self._model: "ModelAsyncioResource" | None = None
        """ :meta private: """

        self.__inference_api = AsyncioInferenceApi(api_client)
        """ :meta private: """


[docs]
    async def embed(
        self,
        model: str,
        inputs: str | list[Dict] | list[str],
        parameters: dict[str, Any] | None = None,
    ) -> EmbeddingsList:
        """
        Generates embeddings for the provided inputs using the specified model and (optional) parameters.

        :param model: The model to use for generating embeddings.
        :type model: str, required

        :param inputs: A list of items to generate embeddings for.
        :type inputs: list, required

        :param parameters: A dictionary of parameters to use when generating embeddings.
        :type parameters: dict, optional

        :return: ``EmbeddingsList`` object with keys ``data``, ``model``, and ``usage``. The ``data`` key contains a list of
            ``n`` embeddings, where ``n`` = len(inputs). Precision of returned embeddings is either
            float16 or float32, with float32 being the default. ``model`` key is the model used to generate the embeddings.
            ``usage`` key contains the total number of tokens used at request-time.
        :rtype: EmbeddingsList

        .. code-block:: python

            import asyncio
            from pinecone import PineconeAsyncio

            async def main():
                async with PineconeAsyncio() as pc:
                    inputs = ["Who created the first computer?"]
                    outputs = await pc.inference.embed(
                        model="multilingual-e5-large",
                        inputs=inputs,
                        parameters={"input_type": "passage", "truncate": "END"}
                    )
                    print(outputs)
                    # EmbeddingsList(
                    #     model='multilingual-e5-large',
                    #     data=[
                    #         {'values': [0.1, ...., 0.2]},
                    #     ],
                    #     usage={'total_tokens': 6}
                    # )

            asyncio.run(main())

        You can also use a single string input:

        .. code-block:: python

            import asyncio
            from pinecone import PineconeAsyncio

            async def main():
                async with PineconeAsyncio() as pc:
                    output = await pc.inference.embed(
                        model="text-embedding-3-small",
                        inputs="Hello, world!"
                    )

            asyncio.run(main())

        Or use the EmbedModel enum:

        .. code-block:: python

            import asyncio
            from pinecone import PineconeAsyncio
            from pinecone.inference import EmbedModel

            async def main():
                async with PineconeAsyncio() as pc:
                    outputs = await pc.inference.embed(
                        model=EmbedModel.TEXT_EMBEDDING_3_SMALL,
                        inputs=["Document 1", "Document 2"]
                    )

            asyncio.run(main())

        """
        request_body = InferenceRequestBuilder.embed_request(
            model=model, inputs=inputs, parameters=parameters
        )
        resp = await self.__inference_api.embed(embed_request=request_body)
        return EmbeddingsList(resp)


    @property
    def model(self) -> "ModelAsyncioResource":
        """
        Model is a resource that describes models available in the Pinecone Inference API.

        Currently you can get or list models.

        .. code-block:: python

            import asyncio
            from pinecone import PineconeAsyncio

            async def main():
                async with PineconeAsyncio() as pc:
                    # List all models
                    models = await pc.inference.model.list()

                    # List models, with model type filtering
                    models = await pc.inference.model.list(type="embed")
                    models = await pc.inference.model.list(type="rerank")

                    # List models, with vector type filtering
                    models = await pc.inference.model.list(vector_type="dense")
                    models = await pc.inference.model.list(vector_type="sparse")

                    # List models, with both type and vector type filtering
                    models = await pc.inference.model.list(type="rerank", vector_type="dense")

                    # Get details on a specific model
                    model = await pc.inference.model.get("text-embedding-3-small")

            asyncio.run(main())
        """
        if self._model is None:
            from .resources.asyncio.model import ModelAsyncio as ModelAsyncioResource

            self._model = ModelAsyncioResource(inference_api=self.__inference_api)
        return self._model


[docs]
    async def rerank(
        self,
        model: str,
        query: str,
        documents: list[str] | list[dict[str, Any]],
        rank_fields: list[str] = ["text"],
        return_documents: bool = True,
        top_n: int | None = None,
        parameters: dict[str, Any] | None = None,
    ) -> RerankResult:
        """
        Rerank documents with associated relevance scores that represent the relevance of each document
        to the provided query using the specified model.

        :param model: The model to use for reranking.
        :type model: str, required

        :param query: The query to compare with documents.
        :type query: str, required

        :param documents: A list of documents or strings to rank.
        :type documents: list, required

        :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"].
        :type rank_fields: list, optional

        :param return_documents: Whether to include the documents in the response. Defaults to True.
        :type return_documents: bool, optional

        :param top_n: How many documents to return. Defaults to len(documents).
        :type top_n: int, optional

        :param parameters: A dictionary of parameters to use when ranking documents.
        :type parameters: dict, optional

        :return: ``RerankResult`` object with keys ``data`` and ``usage``. The ``data`` key contains a list of
            ``n`` documents, where ``n`` = ``top_n``. The documents are sorted in order of
            relevance, with the first being the most relevant. The ``index`` field can be used to locate the document
            relative to the list of documents specified in the request. Each document contains a ``score`` key
            representing how close the document relates to the query.
        :rtype: RerankResult

        .. code-block:: python

            import asyncio
            from pinecone import PineconeAsyncio

            async def main():
                async with PineconeAsyncio() as pc:
                    result = await pc.inference.rerank(
                        model="bge-reranker-v2-m3",
                        query="Tell me about tech companies",
                        documents=[
                            "Apple is a popular fruit known for its sweetness and crisp texture.",
                            "Software is still eating the world.",
                            "Many people enjoy eating apples as a healthy snack.",
                            "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
                            "An apple a day keeps the doctor away, as the saying goes.",
                        ],
                        top_n=2,
                        return_documents=True,
                    )
                    print(result)
                    # RerankResult(
                    #     model='bge-reranker-v2-m3',
                    #     data=[
                    #         { index=3, score=0.020980744,
                    #           document={text="Acme Inc. has rev..."} },
                    #         { index=1, score=0.00034015716,
                    #           document={text="Software is still..."} }
                    #     ],
                    #     usage={'rerank_units': 1}
                    # )

            asyncio.run(main())

        You can also use document dictionaries with custom fields:

        .. code-block:: python

            import asyncio
            from pinecone import PineconeAsyncio

            async def main():
                async with PineconeAsyncio() as pc:
                    result = await pc.inference.rerank(
                        model="pinecone-rerank-v0",
                        query="What is machine learning?",
                        documents=[
                            {"text": "Machine learning is a subset of AI.", "category": "tech"},
                            {"text": "Cooking recipes for pasta.", "category": "food"},
                        ],
                        rank_fields=["text"],
                        top_n=1
                    )

            asyncio.run(main())

        Or use the RerankModel enum:

        .. code-block:: python

            import asyncio
            from pinecone import PineconeAsyncio
            from pinecone.inference import RerankModel

            async def main():
                async with PineconeAsyncio() as pc:
                    result = await pc.inference.rerank(
                        model=RerankModel.PINECONE_RERANK_V0,
                        query="Your query here",
                        documents=["doc1", "doc2", "doc3"]
                    )

            asyncio.run(main())

        """
        rerank_request = InferenceRequestBuilder.rerank(
            model=model,
            query=query,
            documents=documents,
            rank_fields=rank_fields,
            return_documents=return_documents,
            top_n=top_n,
            parameters=parameters,
        )
        resp = await self.__inference_api.rerank(rerank_request=rerank_request)
        return RerankResult(resp)



[docs]
    @require_kwargs
    async def list_models(
        self, *, type: str | None = None, vector_type: str | None = None
    ) -> ModelInfoList:
        """
        List all available models.

        :param type: The type of model to list. Either "embed" or "rerank".
        :type type: str, optional

        :param vector_type: The type of vector to list. Either "dense" or "sparse".
        :type vector_type: str, optional

        :return: A list of models.
        :rtype: ModelInfoList

        .. code-block:: python

            import asyncio
            from pinecone import PineconeAsyncio

            async def main():
                async with PineconeAsyncio() as pc:
                    # List all models
                    models = await pc.inference.list_models()

                    # List models, with model type filtering
                    models = await pc.inference.list_models(type="embed")
                    models = await pc.inference.list_models(type="rerank")

                    # List models, with vector type filtering
                    models = await pc.inference.list_models(vector_type="dense")
                    models = await pc.inference.list_models(vector_type="sparse")

                    # List models, with both type and vector type filtering
                    models = await pc.inference.list_models(type="rerank", vector_type="dense")

            asyncio.run(main())

        """
        args = parse_non_empty_args([("type", type), ("vector_type", vector_type)])
        resp = await self.__inference_api.list_models(**args)
        return ModelInfoList(resp)



[docs]
    @require_kwargs
    async def get_model(self, model_name: str) -> ModelInfo:
        """
        Get details on a specific model.

        :param model_name: The name of the model to get details on.
        :type model_name: str, required
        :return: A ModelInfo object.
        :rtype: ModelInfo

        .. code-block:: python

            import asyncio
            from pinecone import PineconeAsyncio

            async def main():
                async with PineconeAsyncio() as pc:
                    model_info = await pc.inference.get_model(model_name="text-embedding-3-small")
                    print(model_info)
                    # {
                    #     "model": "text-embedding-3-small",
                    #     "short_description": "...",
                    #     "type": "embed",
                    #     ...
                    # }

            asyncio.run(main())

        """
        resp = await self.__inference_api.get_model(model_name=model_name)
        return ModelInfo(resp)
Navigation

Related Topics

Source code for pinecone.inference.inference_asyncio