pinecone .data .features .inference .inference_asyncio
1from typing import Optional, Dict, List, Union, Any 2 3from pinecone.core.openapi.inference.api.inference_api import AsyncioInferenceApi 4from .models import EmbeddingsList, RerankResult 5 6from .inference_request_builder import ( 7 InferenceRequestBuilder, 8 EmbedModel as EmbedModelEnum, 9 RerankModel as RerankModelEnum, 10) 11 12 13class AsyncioInference: 14 """ 15 The `AsyncioInference` class configures and uses the Pinecone Inference API to generate embeddings and 16 rank documents. 17 18 This class is generally not instantiated directly, but rather accessed through a parent `Pinecone` client 19 object that is responsible for managing shared configurations. 20 21 ```python 22 from pinecone import PineconeAsyncio 23 24 pc = PineconeAsyncio() 25 embeddings = await pc.inference.embed( 26 model="text-embedding-3-small", 27 inputs=["Hello, world!"], 28 parameters={"input_type": "passage", "truncate": "END"} 29 ) 30 ``` 31 32 :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class. 33 :type config: `pinecone.config.Config`, required 34 """ 35 36 EmbedModel = EmbedModelEnum 37 RerankModel = RerankModelEnum 38 39 def __init__(self, api_client, **kwargs) -> None: 40 self.api_client = api_client 41 """ @private """ 42 43 self.__inference_api = AsyncioInferenceApi(api_client) 44 """ @private """ 45 46 async def embed( 47 self, 48 model: str, 49 inputs: Union[str, List[Dict], List[str]], 50 parameters: Optional[Dict[str, Any]] = None, 51 ) -> EmbeddingsList: 52 """ 53 Generates embeddings for the provided inputs using the specified model and (optional) parameters. 54 55 :param model: The model to use for generating embeddings. 56 :type model: str, required 57 58 :param inputs: A list of items to generate embeddings for. 59 :type inputs: list, required 60 61 :param parameters: A dictionary of parameters to use when generating embeddings. 62 :type parameters: dict, optional 63 64 :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of 65 `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either 66 float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings. 67 `usage` key contains the total number of tokens used at request-time. 68 69 Example: 70 >>> inputs = ["Who created the first computer?"] 71 >>> outputs = await pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"}) 72 >>> print(outputs) 73 EmbeddingsList( 74 model='multilingual-e5-large', 75 data=[ 76 {'values': [0.1, ...., 0.2]}, 77 ], 78 usage={'total_tokens': 6} 79 ) 80 """ 81 request_body = InferenceRequestBuilder.embed_request( 82 model=model, inputs=inputs, parameters=parameters 83 ) 84 resp = await self.__inference_api.embed(embed_request=request_body) 85 return EmbeddingsList(resp) 86 87 async def rerank( 88 self, 89 model: str, 90 query: str, 91 documents: Union[List[str], List[Dict[str, Any]]], 92 rank_fields: List[str] = ["text"], 93 return_documents: bool = True, 94 top_n: Optional[int] = None, 95 parameters: Optional[Dict[str, Any]] = None, 96 ) -> RerankResult: 97 """ 98 Rerank documents with associated relevance scores that represent the relevance of each document 99 to the provided query using the specified model. 100 101 :param model: The model to use for reranking. 102 :type model: str, required 103 104 :param query: The query to compare with documents. 105 :type query: str, required 106 107 :param documents: A list of documents or strings to rank. 108 :type documents: list, required 109 110 :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"]. 111 :type rank_fields: list, optional 112 113 :param return_documents: Whether to include the documents in the response. Defaults to True. 114 :type return_documents: bool, optional 115 116 :param top_n: How many documents to return. Defaults to len(documents). 117 :type top_n: int, optional 118 119 :param parameters: A dictionary of parameters to use when ranking documents. 120 :type parameters: dict, optional 121 122 :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of 123 `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of 124 relevance, with the first being the most relevant. The `index` field can be used to locate the document 125 relative to the list of documents specified in the request. Each document contains a `score` key 126 representing how close the document relates to the query. 127 128 Example: 129 >>> result = await pc.inference.rerank( 130 model="bge-reranker-v2-m3", 131 query="Tell me about tech companies", 132 documents=[ 133 "Apple is a popular fruit known for its sweetness and crisp texture.", 134 "Software is still eating the world.", 135 "Many people enjoy eating apples as a healthy snack.", 136 "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 137 "An apple a day keeps the doctor away, as the saying goes.", 138 ], 139 top_n=2, 140 return_documents=True, 141 ) 142 >>> print(result) 143 RerankResult( 144 model='bge-reranker-v2-m3', 145 data=[ 146 { index=3, score=0.020980744, 147 document={text="Acme Inc. has rev..."} }, 148 { index=1, score=0.00034015716, 149 document={text="Software is still..."} } 150 ], 151 usage={'rerank_units': 1} 152 ) 153 """ 154 rerank_request = InferenceRequestBuilder.rerank( 155 model=model, 156 query=query, 157 documents=documents, 158 rank_fields=rank_fields, 159 return_documents=return_documents, 160 top_n=top_n, 161 parameters=parameters, 162 ) 163 resp = await self.__inference_api.rerank(rerank_request=rerank_request) 164 return RerankResult(resp)
14class AsyncioInference: 15 """ 16 The `AsyncioInference` class configures and uses the Pinecone Inference API to generate embeddings and 17 rank documents. 18 19 This class is generally not instantiated directly, but rather accessed through a parent `Pinecone` client 20 object that is responsible for managing shared configurations. 21 22 ```python 23 from pinecone import PineconeAsyncio 24 25 pc = PineconeAsyncio() 26 embeddings = await pc.inference.embed( 27 model="text-embedding-3-small", 28 inputs=["Hello, world!"], 29 parameters={"input_type": "passage", "truncate": "END"} 30 ) 31 ``` 32 33 :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class. 34 :type config: `pinecone.config.Config`, required 35 """ 36 37 EmbedModel = EmbedModelEnum 38 RerankModel = RerankModelEnum 39 40 def __init__(self, api_client, **kwargs) -> None: 41 self.api_client = api_client 42 """ @private """ 43 44 self.__inference_api = AsyncioInferenceApi(api_client) 45 """ @private """ 46 47 async def embed( 48 self, 49 model: str, 50 inputs: Union[str, List[Dict], List[str]], 51 parameters: Optional[Dict[str, Any]] = None, 52 ) -> EmbeddingsList: 53 """ 54 Generates embeddings for the provided inputs using the specified model and (optional) parameters. 55 56 :param model: The model to use for generating embeddings. 57 :type model: str, required 58 59 :param inputs: A list of items to generate embeddings for. 60 :type inputs: list, required 61 62 :param parameters: A dictionary of parameters to use when generating embeddings. 63 :type parameters: dict, optional 64 65 :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of 66 `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either 67 float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings. 68 `usage` key contains the total number of tokens used at request-time. 69 70 Example: 71 >>> inputs = ["Who created the first computer?"] 72 >>> outputs = await pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"}) 73 >>> print(outputs) 74 EmbeddingsList( 75 model='multilingual-e5-large', 76 data=[ 77 {'values': [0.1, ...., 0.2]}, 78 ], 79 usage={'total_tokens': 6} 80 ) 81 """ 82 request_body = InferenceRequestBuilder.embed_request( 83 model=model, inputs=inputs, parameters=parameters 84 ) 85 resp = await self.__inference_api.embed(embed_request=request_body) 86 return EmbeddingsList(resp) 87 88 async def rerank( 89 self, 90 model: str, 91 query: str, 92 documents: Union[List[str], List[Dict[str, Any]]], 93 rank_fields: List[str] = ["text"], 94 return_documents: bool = True, 95 top_n: Optional[int] = None, 96 parameters: Optional[Dict[str, Any]] = None, 97 ) -> RerankResult: 98 """ 99 Rerank documents with associated relevance scores that represent the relevance of each document 100 to the provided query using the specified model. 101 102 :param model: The model to use for reranking. 103 :type model: str, required 104 105 :param query: The query to compare with documents. 106 :type query: str, required 107 108 :param documents: A list of documents or strings to rank. 109 :type documents: list, required 110 111 :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"]. 112 :type rank_fields: list, optional 113 114 :param return_documents: Whether to include the documents in the response. Defaults to True. 115 :type return_documents: bool, optional 116 117 :param top_n: How many documents to return. Defaults to len(documents). 118 :type top_n: int, optional 119 120 :param parameters: A dictionary of parameters to use when ranking documents. 121 :type parameters: dict, optional 122 123 :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of 124 `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of 125 relevance, with the first being the most relevant. The `index` field can be used to locate the document 126 relative to the list of documents specified in the request. Each document contains a `score` key 127 representing how close the document relates to the query. 128 129 Example: 130 >>> result = await pc.inference.rerank( 131 model="bge-reranker-v2-m3", 132 query="Tell me about tech companies", 133 documents=[ 134 "Apple is a popular fruit known for its sweetness and crisp texture.", 135 "Software is still eating the world.", 136 "Many people enjoy eating apples as a healthy snack.", 137 "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 138 "An apple a day keeps the doctor away, as the saying goes.", 139 ], 140 top_n=2, 141 return_documents=True, 142 ) 143 >>> print(result) 144 RerankResult( 145 model='bge-reranker-v2-m3', 146 data=[ 147 { index=3, score=0.020980744, 148 document={text="Acme Inc. has rev..."} }, 149 { index=1, score=0.00034015716, 150 document={text="Software is still..."} } 151 ], 152 usage={'rerank_units': 1} 153 ) 154 """ 155 rerank_request = InferenceRequestBuilder.rerank( 156 model=model, 157 query=query, 158 documents=documents, 159 rank_fields=rank_fields, 160 return_documents=return_documents, 161 top_n=top_n, 162 parameters=parameters, 163 ) 164 resp = await self.__inference_api.rerank(rerank_request=rerank_request) 165 return RerankResult(resp)
The AsyncioInference
class configures and uses the Pinecone Inference API to generate embeddings and
rank documents.
This class is generally not instantiated directly, but rather accessed through a parent Pinecone
client
object that is responsible for managing shared configurations.
from pinecone import PineconeAsyncio
pc = PineconeAsyncio()
embeddings = await pc.inference.embed(
model="text-embedding-3-small",
inputs=["Hello, world!"],
parameters={"input_type": "passage", "truncate": "END"}
)
Parameters
- config: A
pinecone.config.Config
object, configured and built in the Pinecone class.
47 async def embed( 48 self, 49 model: str, 50 inputs: Union[str, List[Dict], List[str]], 51 parameters: Optional[Dict[str, Any]] = None, 52 ) -> EmbeddingsList: 53 """ 54 Generates embeddings for the provided inputs using the specified model and (optional) parameters. 55 56 :param model: The model to use for generating embeddings. 57 :type model: str, required 58 59 :param inputs: A list of items to generate embeddings for. 60 :type inputs: list, required 61 62 :param parameters: A dictionary of parameters to use when generating embeddings. 63 :type parameters: dict, optional 64 65 :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of 66 `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either 67 float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings. 68 `usage` key contains the total number of tokens used at request-time. 69 70 Example: 71 >>> inputs = ["Who created the first computer?"] 72 >>> outputs = await pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"}) 73 >>> print(outputs) 74 EmbeddingsList( 75 model='multilingual-e5-large', 76 data=[ 77 {'values': [0.1, ...., 0.2]}, 78 ], 79 usage={'total_tokens': 6} 80 ) 81 """ 82 request_body = InferenceRequestBuilder.embed_request( 83 model=model, inputs=inputs, parameters=parameters 84 ) 85 resp = await self.__inference_api.embed(embed_request=request_body) 86 return EmbeddingsList(resp)
Generates embeddings for the provided inputs using the specified model and (optional) parameters.
Parameters
model: The model to use for generating embeddings.
inputs: A list of items to generate embeddings for.
parameters: A dictionary of parameters to use when generating embeddings.
Returns
EmbeddingsList object with keys
data
,model
, andusage
. Thedata
key contains a list ofn
embeddings, wheren
= len(inputs) and type(n) = Embedding. Precision of returned embeddings is either float16 or float32, with float32 being the default.model
key is the model used to generate the embeddings.usage
key contains the total number of tokens used at request-time.
Example:
>>> inputs = ["Who created the first computer?"]
>>> outputs = await pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"})
>>> print(outputs)
EmbeddingsList(
model='multilingual-e5-large',
data=[
{'values': [0.1, ...., 0.2]},
],
usage={'total_tokens': 6}
)
88 async def rerank( 89 self, 90 model: str, 91 query: str, 92 documents: Union[List[str], List[Dict[str, Any]]], 93 rank_fields: List[str] = ["text"], 94 return_documents: bool = True, 95 top_n: Optional[int] = None, 96 parameters: Optional[Dict[str, Any]] = None, 97 ) -> RerankResult: 98 """ 99 Rerank documents with associated relevance scores that represent the relevance of each document 100 to the provided query using the specified model. 101 102 :param model: The model to use for reranking. 103 :type model: str, required 104 105 :param query: The query to compare with documents. 106 :type query: str, required 107 108 :param documents: A list of documents or strings to rank. 109 :type documents: list, required 110 111 :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"]. 112 :type rank_fields: list, optional 113 114 :param return_documents: Whether to include the documents in the response. Defaults to True. 115 :type return_documents: bool, optional 116 117 :param top_n: How many documents to return. Defaults to len(documents). 118 :type top_n: int, optional 119 120 :param parameters: A dictionary of parameters to use when ranking documents. 121 :type parameters: dict, optional 122 123 :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of 124 `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of 125 relevance, with the first being the most relevant. The `index` field can be used to locate the document 126 relative to the list of documents specified in the request. Each document contains a `score` key 127 representing how close the document relates to the query. 128 129 Example: 130 >>> result = await pc.inference.rerank( 131 model="bge-reranker-v2-m3", 132 query="Tell me about tech companies", 133 documents=[ 134 "Apple is a popular fruit known for its sweetness and crisp texture.", 135 "Software is still eating the world.", 136 "Many people enjoy eating apples as a healthy snack.", 137 "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 138 "An apple a day keeps the doctor away, as the saying goes.", 139 ], 140 top_n=2, 141 return_documents=True, 142 ) 143 >>> print(result) 144 RerankResult( 145 model='bge-reranker-v2-m3', 146 data=[ 147 { index=3, score=0.020980744, 148 document={text="Acme Inc. has rev..."} }, 149 { index=1, score=0.00034015716, 150 document={text="Software is still..."} } 151 ], 152 usage={'rerank_units': 1} 153 ) 154 """ 155 rerank_request = InferenceRequestBuilder.rerank( 156 model=model, 157 query=query, 158 documents=documents, 159 rank_fields=rank_fields, 160 return_documents=return_documents, 161 top_n=top_n, 162 parameters=parameters, 163 ) 164 resp = await self.__inference_api.rerank(rerank_request=rerank_request) 165 return RerankResult(resp)
Rerank documents with associated relevance scores that represent the relevance of each document to the provided query using the specified model.
Parameters
model: The model to use for reranking.
query: The query to compare with documents.
documents: A list of documents or strings to rank.
rank_fields: A list of document fields to use for ranking. Defaults to ["text"].
return_documents: Whether to include the documents in the response. Defaults to True.
top_n: How many documents to return. Defaults to len(documents).
parameters: A dictionary of parameters to use when ranking documents.
Returns
RerankResult object with keys
data
andusage
. Thedata
key contains a list ofn
documents, wheren
=top_n
and type(n) = Document. The documents are sorted in order of relevance, with the first being the most relevant. Theindex
field can be used to locate the document relative to the list of documents specified in the request. Each document contains ascore
key representing how close the document relates to the query.
Example:
>>> result = await pc.inference.rerank(
model="bge-reranker-v2-m3",
query="Tell me about tech companies",
documents=[
"Apple is a popular fruit known for its sweetness and crisp texture.",
"Software is still eating the world.",
"Many people enjoy eating apples as a healthy snack.",
"Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
"An apple a day keeps the doctor away, as the saying goes.",
],
top_n=2,
return_documents=True,
)
>>> print(result)
RerankResult(
model='bge-reranker-v2-m3',
data=[
{ index=3, score=0.020980744,
document={text="Acme Inc. has rev..."} },
{ index=1, score=0.00034015716,
document={text="Software is still..."} }
],
usage={'rerank_units': 1}
)
14class EmbedModel(Enum): 15 Multilingual_E5_Large = "multilingual-e5-large" 16 Pinecone_Sparse_English_V0 = "pinecone-sparse-english-v0"
Create a collection of name/value pairs.
Example enumeration:
>>> class Color(Enum):
... RED = 1
... BLUE = 2
... GREEN = 3
Access them by:
- attribute access::
>>> Color.RED
<Color.RED: 1>
- value lookup:
>>> Color(1)
<Color.RED: 1>
- name lookup:
>>> Color['RED']
<Color.RED: 1>
Enumerations can be iterated over, and know how many members they have:
>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]
Methods can be added to enumerations, and members can have their own attributes -- see the documentation for details.
Inherited Members
19class RerankModel(Enum): 20 Bge_Reranker_V2_M3 = "bge-reranker-v2-m3" 21 Cohere_Rerank_3_5 = "cohere-rerank-3.5" 22 Pinecone_Rerank_V0 = "pinecone-rerank-v0"
Create a collection of name/value pairs.
Example enumeration:
>>> class Color(Enum):
... RED = 1
... BLUE = 2
... GREEN = 3
Access them by:
- attribute access::
>>> Color.RED
<Color.RED: 1>
- value lookup:
>>> Color(1)
<Color.RED: 1>
- name lookup:
>>> Color['RED']
<Color.RED: 1>
Enumerations can be iterated over, and know how many members they have:
>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]
Methods can be added to enumerations, and members can have their own attributes -- see the documentation for details.