pinecone.data.features.inference.inference_asyncio

  1from typing import Optional, Dict, List, Union, Any
  2
  3from pinecone.core.openapi.inference.api.inference_api import AsyncioInferenceApi
  4from .models import EmbeddingsList, RerankResult
  5
  6from .inference_request_builder import (
  7    InferenceRequestBuilder,
  8    EmbedModel as EmbedModelEnum,
  9    RerankModel as RerankModelEnum,
 10)
 11
 12
 13class AsyncioInference:
 14    """
 15    The `AsyncioInference` class configures and uses the Pinecone Inference API to generate embeddings and
 16    rank documents.
 17
 18    This class is generally not instantiated directly, but rather accessed through a parent `Pinecone` client
 19    object that is responsible for managing shared configurations.
 20
 21    ```python
 22    from pinecone import PineconeAsyncio
 23
 24    pc = PineconeAsyncio()
 25    embeddings = await pc.inference.embed(
 26        model="text-embedding-3-small",
 27        inputs=["Hello, world!"],
 28        parameters={"input_type": "passage", "truncate": "END"}
 29    )
 30    ```
 31
 32    :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class.
 33    :type config: `pinecone.config.Config`, required
 34    """
 35
 36    EmbedModel = EmbedModelEnum
 37    RerankModel = RerankModelEnum
 38
 39    def __init__(self, api_client, **kwargs) -> None:
 40        self.api_client = api_client
 41        """ @private """
 42
 43        self.__inference_api = AsyncioInferenceApi(api_client)
 44        """ @private """
 45
 46    async def embed(
 47        self,
 48        model: str,
 49        inputs: Union[str, List[Dict], List[str]],
 50        parameters: Optional[Dict[str, Any]] = None,
 51    ) -> EmbeddingsList:
 52        """
 53        Generates embeddings for the provided inputs using the specified model and (optional) parameters.
 54
 55        :param model: The model to use for generating embeddings.
 56        :type model: str, required
 57
 58        :param inputs: A list of items to generate embeddings for.
 59        :type inputs: list, required
 60
 61        :param parameters: A dictionary of parameters to use when generating embeddings.
 62        :type parameters: dict, optional
 63
 64        :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of
 65        `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either
 66        float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings.
 67        `usage` key contains the total number of tokens used at request-time.
 68
 69        Example:
 70        >>> inputs = ["Who created the first computer?"]
 71        >>> outputs = await pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"})
 72        >>> print(outputs)
 73        EmbeddingsList(
 74            model='multilingual-e5-large',
 75            data=[
 76                {'values': [0.1, ...., 0.2]},
 77              ],
 78            usage={'total_tokens': 6}
 79        )
 80        """
 81        request_body = InferenceRequestBuilder.embed_request(
 82            model=model, inputs=inputs, parameters=parameters
 83        )
 84        resp = await self.__inference_api.embed(embed_request=request_body)
 85        return EmbeddingsList(resp)
 86
 87    async def rerank(
 88        self,
 89        model: str,
 90        query: str,
 91        documents: Union[List[str], List[Dict[str, Any]]],
 92        rank_fields: List[str] = ["text"],
 93        return_documents: bool = True,
 94        top_n: Optional[int] = None,
 95        parameters: Optional[Dict[str, Any]] = None,
 96    ) -> RerankResult:
 97        """
 98        Rerank documents with associated relevance scores that represent the relevance of each document
 99        to the provided query using the specified model.
100
101        :param model: The model to use for reranking.
102        :type model: str, required
103
104        :param query: The query to compare with documents.
105        :type query: str, required
106
107        :param documents: A list of documents or strings to rank.
108        :type documents: list, required
109
110        :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"].
111        :type rank_fields: list, optional
112
113        :param return_documents: Whether to include the documents in the response. Defaults to True.
114        :type return_documents: bool, optional
115
116        :param top_n: How many documents to return. Defaults to len(documents).
117        :type top_n: int, optional
118
119        :param parameters: A dictionary of parameters to use when ranking documents.
120        :type parameters: dict, optional
121
122        :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of
123        `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of
124        relevance, with the first being the most relevant. The `index` field can be used to locate the document
125        relative to the list of documents specified in the request. Each document contains a `score` key
126        representing how close the document relates to the query.
127
128        Example:
129        >>> result = await pc.inference.rerank(
130                model="bge-reranker-v2-m3",
131                query="Tell me about tech companies",
132                documents=[
133                    "Apple is a popular fruit known for its sweetness and crisp texture.",
134                    "Software is still eating the world.",
135                    "Many people enjoy eating apples as a healthy snack.",
136                    "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
137                    "An apple a day keeps the doctor away, as the saying goes.",
138                ],
139                top_n=2,
140                return_documents=True,
141            )
142        >>> print(result)
143        RerankResult(
144          model='bge-reranker-v2-m3',
145          data=[
146            { index=3, score=0.020980744,
147              document={text="Acme Inc. has rev..."} },
148            { index=1, score=0.00034015716,
149              document={text="Software is still..."} }
150          ],
151          usage={'rerank_units': 1}
152        )
153        """
154        rerank_request = InferenceRequestBuilder.rerank(
155            model=model,
156            query=query,
157            documents=documents,
158            rank_fields=rank_fields,
159            return_documents=return_documents,
160            top_n=top_n,
161            parameters=parameters,
162        )
163        resp = await self.__inference_api.rerank(rerank_request=rerank_request)
164        return RerankResult(resp)
class AsyncioInference:
 14class AsyncioInference:
 15    """
 16    The `AsyncioInference` class configures and uses the Pinecone Inference API to generate embeddings and
 17    rank documents.
 18
 19    This class is generally not instantiated directly, but rather accessed through a parent `Pinecone` client
 20    object that is responsible for managing shared configurations.
 21
 22    ```python
 23    from pinecone import PineconeAsyncio
 24
 25    pc = PineconeAsyncio()
 26    embeddings = await pc.inference.embed(
 27        model="text-embedding-3-small",
 28        inputs=["Hello, world!"],
 29        parameters={"input_type": "passage", "truncate": "END"}
 30    )
 31    ```
 32
 33    :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class.
 34    :type config: `pinecone.config.Config`, required
 35    """
 36
 37    EmbedModel = EmbedModelEnum
 38    RerankModel = RerankModelEnum
 39
 40    def __init__(self, api_client, **kwargs) -> None:
 41        self.api_client = api_client
 42        """ @private """
 43
 44        self.__inference_api = AsyncioInferenceApi(api_client)
 45        """ @private """
 46
 47    async def embed(
 48        self,
 49        model: str,
 50        inputs: Union[str, List[Dict], List[str]],
 51        parameters: Optional[Dict[str, Any]] = None,
 52    ) -> EmbeddingsList:
 53        """
 54        Generates embeddings for the provided inputs using the specified model and (optional) parameters.
 55
 56        :param model: The model to use for generating embeddings.
 57        :type model: str, required
 58
 59        :param inputs: A list of items to generate embeddings for.
 60        :type inputs: list, required
 61
 62        :param parameters: A dictionary of parameters to use when generating embeddings.
 63        :type parameters: dict, optional
 64
 65        :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of
 66        `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either
 67        float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings.
 68        `usage` key contains the total number of tokens used at request-time.
 69
 70        Example:
 71        >>> inputs = ["Who created the first computer?"]
 72        >>> outputs = await pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"})
 73        >>> print(outputs)
 74        EmbeddingsList(
 75            model='multilingual-e5-large',
 76            data=[
 77                {'values': [0.1, ...., 0.2]},
 78              ],
 79            usage={'total_tokens': 6}
 80        )
 81        """
 82        request_body = InferenceRequestBuilder.embed_request(
 83            model=model, inputs=inputs, parameters=parameters
 84        )
 85        resp = await self.__inference_api.embed(embed_request=request_body)
 86        return EmbeddingsList(resp)
 87
 88    async def rerank(
 89        self,
 90        model: str,
 91        query: str,
 92        documents: Union[List[str], List[Dict[str, Any]]],
 93        rank_fields: List[str] = ["text"],
 94        return_documents: bool = True,
 95        top_n: Optional[int] = None,
 96        parameters: Optional[Dict[str, Any]] = None,
 97    ) -> RerankResult:
 98        """
 99        Rerank documents with associated relevance scores that represent the relevance of each document
100        to the provided query using the specified model.
101
102        :param model: The model to use for reranking.
103        :type model: str, required
104
105        :param query: The query to compare with documents.
106        :type query: str, required
107
108        :param documents: A list of documents or strings to rank.
109        :type documents: list, required
110
111        :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"].
112        :type rank_fields: list, optional
113
114        :param return_documents: Whether to include the documents in the response. Defaults to True.
115        :type return_documents: bool, optional
116
117        :param top_n: How many documents to return. Defaults to len(documents).
118        :type top_n: int, optional
119
120        :param parameters: A dictionary of parameters to use when ranking documents.
121        :type parameters: dict, optional
122
123        :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of
124        `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of
125        relevance, with the first being the most relevant. The `index` field can be used to locate the document
126        relative to the list of documents specified in the request. Each document contains a `score` key
127        representing how close the document relates to the query.
128
129        Example:
130        >>> result = await pc.inference.rerank(
131                model="bge-reranker-v2-m3",
132                query="Tell me about tech companies",
133                documents=[
134                    "Apple is a popular fruit known for its sweetness and crisp texture.",
135                    "Software is still eating the world.",
136                    "Many people enjoy eating apples as a healthy snack.",
137                    "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
138                    "An apple a day keeps the doctor away, as the saying goes.",
139                ],
140                top_n=2,
141                return_documents=True,
142            )
143        >>> print(result)
144        RerankResult(
145          model='bge-reranker-v2-m3',
146          data=[
147            { index=3, score=0.020980744,
148              document={text="Acme Inc. has rev..."} },
149            { index=1, score=0.00034015716,
150              document={text="Software is still..."} }
151          ],
152          usage={'rerank_units': 1}
153        )
154        """
155        rerank_request = InferenceRequestBuilder.rerank(
156            model=model,
157            query=query,
158            documents=documents,
159            rank_fields=rank_fields,
160            return_documents=return_documents,
161            top_n=top_n,
162            parameters=parameters,
163        )
164        resp = await self.__inference_api.rerank(rerank_request=rerank_request)
165        return RerankResult(resp)

The AsyncioInference class configures and uses the Pinecone Inference API to generate embeddings and rank documents.

This class is generally not instantiated directly, but rather accessed through a parent Pinecone client object that is responsible for managing shared configurations.

from pinecone import PineconeAsyncio

pc = PineconeAsyncio()
embeddings = await pc.inference.embed(
    model="text-embedding-3-small",
    inputs=["Hello, world!"],
    parameters={"input_type": "passage", "truncate": "END"}
)
Parameters
  • config: A pinecone.config.Config object, configured and built in the Pinecone class.
AsyncioInference(api_client, **kwargs)
40    def __init__(self, api_client, **kwargs) -> None:
41        self.api_client = api_client
42        """ @private """
43
44        self.__inference_api = AsyncioInferenceApi(api_client)
45        """ @private """
async def embed( self, model: str, inputs: Union[str, List[Dict], List[str]], parameters: Optional[Dict[str, Any]] = None) -> pinecone.data.features.inference.models.embedding_list.EmbeddingsList:
47    async def embed(
48        self,
49        model: str,
50        inputs: Union[str, List[Dict], List[str]],
51        parameters: Optional[Dict[str, Any]] = None,
52    ) -> EmbeddingsList:
53        """
54        Generates embeddings for the provided inputs using the specified model and (optional) parameters.
55
56        :param model: The model to use for generating embeddings.
57        :type model: str, required
58
59        :param inputs: A list of items to generate embeddings for.
60        :type inputs: list, required
61
62        :param parameters: A dictionary of parameters to use when generating embeddings.
63        :type parameters: dict, optional
64
65        :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of
66        `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either
67        float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings.
68        `usage` key contains the total number of tokens used at request-time.
69
70        Example:
71        >>> inputs = ["Who created the first computer?"]
72        >>> outputs = await pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"})
73        >>> print(outputs)
74        EmbeddingsList(
75            model='multilingual-e5-large',
76            data=[
77                {'values': [0.1, ...., 0.2]},
78              ],
79            usage={'total_tokens': 6}
80        )
81        """
82        request_body = InferenceRequestBuilder.embed_request(
83            model=model, inputs=inputs, parameters=parameters
84        )
85        resp = await self.__inference_api.embed(embed_request=request_body)
86        return EmbeddingsList(resp)

Generates embeddings for the provided inputs using the specified model and (optional) parameters.

Parameters
  • model: The model to use for generating embeddings.

  • inputs: A list of items to generate embeddings for.

  • parameters: A dictionary of parameters to use when generating embeddings.

Returns

EmbeddingsList object with keys data, model, and usage. The data key contains a list of n embeddings, where n = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either float16 or float32, with float32 being the default. model key is the model used to generate the embeddings. usage key contains the total number of tokens used at request-time.

Example:

>>> inputs = ["Who created the first computer?"]
>>> outputs = await pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"})
>>> print(outputs)
EmbeddingsList(
    model='multilingual-e5-large',
    data=[
        {'values': [0.1, ...., 0.2]},
      ],
    usage={'total_tokens': 6}
)
async def rerank( self, model: str, query: str, documents: Union[List[str], List[Dict[str, Any]]], rank_fields: List[str] = ['text'], return_documents: bool = True, top_n: Optional[int] = None, parameters: Optional[Dict[str, Any]] = None) -> pinecone.data.features.inference.models.rerank_result.RerankResult:
 88    async def rerank(
 89        self,
 90        model: str,
 91        query: str,
 92        documents: Union[List[str], List[Dict[str, Any]]],
 93        rank_fields: List[str] = ["text"],
 94        return_documents: bool = True,
 95        top_n: Optional[int] = None,
 96        parameters: Optional[Dict[str, Any]] = None,
 97    ) -> RerankResult:
 98        """
 99        Rerank documents with associated relevance scores that represent the relevance of each document
100        to the provided query using the specified model.
101
102        :param model: The model to use for reranking.
103        :type model: str, required
104
105        :param query: The query to compare with documents.
106        :type query: str, required
107
108        :param documents: A list of documents or strings to rank.
109        :type documents: list, required
110
111        :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"].
112        :type rank_fields: list, optional
113
114        :param return_documents: Whether to include the documents in the response. Defaults to True.
115        :type return_documents: bool, optional
116
117        :param top_n: How many documents to return. Defaults to len(documents).
118        :type top_n: int, optional
119
120        :param parameters: A dictionary of parameters to use when ranking documents.
121        :type parameters: dict, optional
122
123        :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of
124        `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of
125        relevance, with the first being the most relevant. The `index` field can be used to locate the document
126        relative to the list of documents specified in the request. Each document contains a `score` key
127        representing how close the document relates to the query.
128
129        Example:
130        >>> result = await pc.inference.rerank(
131                model="bge-reranker-v2-m3",
132                query="Tell me about tech companies",
133                documents=[
134                    "Apple is a popular fruit known for its sweetness and crisp texture.",
135                    "Software is still eating the world.",
136                    "Many people enjoy eating apples as a healthy snack.",
137                    "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
138                    "An apple a day keeps the doctor away, as the saying goes.",
139                ],
140                top_n=2,
141                return_documents=True,
142            )
143        >>> print(result)
144        RerankResult(
145          model='bge-reranker-v2-m3',
146          data=[
147            { index=3, score=0.020980744,
148              document={text="Acme Inc. has rev..."} },
149            { index=1, score=0.00034015716,
150              document={text="Software is still..."} }
151          ],
152          usage={'rerank_units': 1}
153        )
154        """
155        rerank_request = InferenceRequestBuilder.rerank(
156            model=model,
157            query=query,
158            documents=documents,
159            rank_fields=rank_fields,
160            return_documents=return_documents,
161            top_n=top_n,
162            parameters=parameters,
163        )
164        resp = await self.__inference_api.rerank(rerank_request=rerank_request)
165        return RerankResult(resp)

Rerank documents with associated relevance scores that represent the relevance of each document to the provided query using the specified model.

Parameters
  • model: The model to use for reranking.

  • query: The query to compare with documents.

  • documents: A list of documents or strings to rank.

  • rank_fields: A list of document fields to use for ranking. Defaults to ["text"].

  • return_documents: Whether to include the documents in the response. Defaults to True.

  • top_n: How many documents to return. Defaults to len(documents).

  • parameters: A dictionary of parameters to use when ranking documents.

Returns

RerankResult object with keys data and usage. The data key contains a list of n documents, where n = top_n and type(n) = Document. The documents are sorted in order of relevance, with the first being the most relevant. The index field can be used to locate the document relative to the list of documents specified in the request. Each document contains a score key representing how close the document relates to the query.

Example:

>>> result = await pc.inference.rerank(
        model="bge-reranker-v2-m3",
        query="Tell me about tech companies",
        documents=[
            "Apple is a popular fruit known for its sweetness and crisp texture.",
            "Software is still eating the world.",
            "Many people enjoy eating apples as a healthy snack.",
            "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
            "An apple a day keeps the doctor away, as the saying goes.",
        ],
        top_n=2,
        return_documents=True,
    )
>>> print(result)
RerankResult(
  model='bge-reranker-v2-m3',
  data=[
    { index=3, score=0.020980744,
      document={text="Acme Inc. has rev..."} },
    { index=1, score=0.00034015716,
      document={text="Software is still..."} }
  ],
  usage={'rerank_units': 1}
)
class AsyncioInference.EmbedModel(enum.Enum):
14class EmbedModel(Enum):
15    Multilingual_E5_Large = "multilingual-e5-large"
16    Pinecone_Sparse_English_V0 = "pinecone-sparse-english-v0"

Create a collection of name/value pairs.

Example enumeration:

>>> class Color(Enum):
...     RED = 1
...     BLUE = 2
...     GREEN = 3

Access them by:

  • attribute access::
>>> Color.RED
<Color.RED: 1>
  • value lookup:
>>> Color(1)
<Color.RED: 1>
  • name lookup:
>>> Color['RED']
<Color.RED: 1>

Enumerations can be iterated over, and know how many members they have:

>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]

Methods can be added to enumerations, and members can have their own attributes -- see the documentation for details.

class AsyncioInference.RerankModel(enum.Enum):
19class RerankModel(Enum):
20    Bge_Reranker_V2_M3 = "bge-reranker-v2-m3"
21    Cohere_Rerank_3_5 = "cohere-rerank-3.5"
22    Pinecone_Rerank_V0 = "pinecone-rerank-v0"

Create a collection of name/value pairs.

Example enumeration:

>>> class Color(Enum):
...     RED = 1
...     BLUE = 2
...     GREEN = 3

Access them by:

  • attribute access::
>>> Color.RED
<Color.RED: 1>
  • value lookup:
>>> Color(1)
<Color.RED: 1>
  • name lookup:
>>> Color['RED']
<Color.RED: 1>

Enumerations can be iterated over, and know how many members they have:

>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]

Methods can be added to enumerations, and members can have their own attributes -- see the documentation for details.