pinecone .data .features .inference .inference
1import logging 2from typing import Optional, Dict, List, Union, Any 3 4from pinecone.openapi_support import ApiClient 5from pinecone.core.openapi.inference.apis import InferenceApi 6from .models import EmbeddingsList, RerankResult 7from pinecone.core.openapi.inference import API_VERSION 8from pinecone.utils import setup_openapi_client, PluginAware 9 10 11from .inference_request_builder import ( 12 InferenceRequestBuilder, 13 EmbedModel as EmbedModelEnum, 14 RerankModel as RerankModelEnum, 15) 16 17logger = logging.getLogger(__name__) 18""" @private """ 19 20 21class Inference(PluginAware): 22 """ 23 The `Inference` class configures and uses the Pinecone Inference API to generate embeddings and 24 rank documents. 25 26 It is generally not instantiated directly, but rather accessed through a parent `Pinecone` client 27 object that is responsible for managing shared configurations. 28 29 ```python 30 from pinecone import Pinecone 31 32 pc = Pinecone() 33 embeddings = pc.inference.embed( 34 model="text-embedding-3-small", 35 inputs=["Hello, world!"], 36 parameters={"input_type": "passage", "truncate": "END"} 37 ) 38 ``` 39 40 :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class. 41 :type config: `pinecone.config.Config`, required 42 """ 43 44 EmbedModel = EmbedModelEnum 45 RerankModel = RerankModelEnum 46 47 def __init__(self, config, openapi_config, **kwargs) -> None: 48 self.config = config 49 """ @private """ 50 51 self.openapi_config = openapi_config 52 """ @private """ 53 54 self.pool_threads = kwargs.get("pool_threads", 1) 55 """ @private """ 56 57 self.__inference_api = setup_openapi_client( 58 api_client_klass=ApiClient, 59 api_klass=InferenceApi, 60 config=config, 61 openapi_config=openapi_config, 62 pool_threads=kwargs.get("pool_threads", 1), 63 api_version=API_VERSION, 64 ) 65 66 self.load_plugins( 67 config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads 68 ) 69 70 def embed( 71 self, 72 model: Union[EmbedModelEnum, str], 73 inputs: Union[str, List[Dict], List[str]], 74 parameters: Optional[Dict[str, Any]] = None, 75 ) -> EmbeddingsList: 76 """ 77 Generates embeddings for the provided inputs using the specified model and (optional) parameters. 78 79 :param model: The model to use for generating embeddings. 80 :type model: str, required 81 82 :param inputs: A list of items to generate embeddings for. 83 :type inputs: list, required 84 85 :param parameters: A dictionary of parameters to use when generating embeddings. 86 :type parameters: dict, optional 87 88 :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of 89 `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either 90 float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings. 91 `usage` key contains the total number of tokens used at request-time. 92 93 Example: 94 >>> inputs = ["Who created the first computer?"] 95 >>> outputs = pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"}) 96 >>> print(outputs) 97 EmbeddingsList( 98 model='multilingual-e5-large', 99 data=[ 100 {'values': [0.1, ...., 0.2]}, 101 ], 102 usage={'total_tokens': 6} 103 ) 104 """ 105 request_body = InferenceRequestBuilder.embed_request( 106 model=model, inputs=inputs, parameters=parameters 107 ) 108 resp = self.__inference_api.embed(embed_request=request_body) 109 return EmbeddingsList(resp) 110 111 def rerank( 112 self, 113 model: Union[RerankModelEnum, str], 114 query: str, 115 documents: Union[List[str], List[Dict[str, Any]]], 116 rank_fields: List[str] = ["text"], 117 return_documents: bool = True, 118 top_n: Optional[int] = None, 119 parameters: Optional[Dict[str, Any]] = None, 120 ) -> RerankResult: 121 """ 122 Rerank documents with associated relevance scores that represent the relevance of each document 123 to the provided query using the specified model. 124 125 :param model: The model to use for reranking. 126 :type model: str, required 127 128 :param query: The query to compare with documents. 129 :type query: str, required 130 131 :param documents: A list of documents or strings to rank. 132 :type documents: list, required 133 134 :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"]. 135 :type rank_fields: list, optional 136 137 :param return_documents: Whether to include the documents in the response. Defaults to True. 138 :type return_documents: bool, optional 139 140 :param top_n: How many documents to return. Defaults to len(documents). 141 :type top_n: int, optional 142 143 :param parameters: A dictionary of parameters to use when ranking documents. 144 :type parameters: dict, optional 145 146 :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of 147 `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of 148 relevance, with the first being the most relevant. The `index` field can be used to locate the document 149 relative to the list of documents specified in the request. Each document contains a `score` key 150 representing how close the document relates to the query. 151 152 Example: 153 >>> result = pc.inference.rerank( 154 model="bge-reranker-v2-m3", 155 query="Tell me about tech companies", 156 documents=[ 157 "Apple is a popular fruit known for its sweetness and crisp texture.", 158 "Software is still eating the world.", 159 "Many people enjoy eating apples as a healthy snack.", 160 "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 161 "An apple a day keeps the doctor away, as the saying goes.", 162 ], 163 top_n=2, 164 return_documents=True, 165 ) 166 >>> print(result) 167 RerankResult( 168 model='bge-reranker-v2-m3', 169 data=[ 170 { index=3, score=0.020980744, 171 document={text="Acme Inc. has rev..."} }, 172 { index=1, score=0.00034015716, 173 document={text="Software is still..."} } 174 ], 175 usage={'rerank_units': 1} 176 ) 177 """ 178 rerank_request = InferenceRequestBuilder.rerank( 179 model=model, 180 query=query, 181 documents=documents, 182 rank_fields=rank_fields, 183 return_documents=return_documents, 184 top_n=top_n, 185 parameters=parameters, 186 ) 187 resp = self.__inference_api.rerank(rerank_request=rerank_request) 188 return RerankResult(resp)
22class Inference(PluginAware): 23 """ 24 The `Inference` class configures and uses the Pinecone Inference API to generate embeddings and 25 rank documents. 26 27 It is generally not instantiated directly, but rather accessed through a parent `Pinecone` client 28 object that is responsible for managing shared configurations. 29 30 ```python 31 from pinecone import Pinecone 32 33 pc = Pinecone() 34 embeddings = pc.inference.embed( 35 model="text-embedding-3-small", 36 inputs=["Hello, world!"], 37 parameters={"input_type": "passage", "truncate": "END"} 38 ) 39 ``` 40 41 :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class. 42 :type config: `pinecone.config.Config`, required 43 """ 44 45 EmbedModel = EmbedModelEnum 46 RerankModel = RerankModelEnum 47 48 def __init__(self, config, openapi_config, **kwargs) -> None: 49 self.config = config 50 """ @private """ 51 52 self.openapi_config = openapi_config 53 """ @private """ 54 55 self.pool_threads = kwargs.get("pool_threads", 1) 56 """ @private """ 57 58 self.__inference_api = setup_openapi_client( 59 api_client_klass=ApiClient, 60 api_klass=InferenceApi, 61 config=config, 62 openapi_config=openapi_config, 63 pool_threads=kwargs.get("pool_threads", 1), 64 api_version=API_VERSION, 65 ) 66 67 self.load_plugins( 68 config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads 69 ) 70 71 def embed( 72 self, 73 model: Union[EmbedModelEnum, str], 74 inputs: Union[str, List[Dict], List[str]], 75 parameters: Optional[Dict[str, Any]] = None, 76 ) -> EmbeddingsList: 77 """ 78 Generates embeddings for the provided inputs using the specified model and (optional) parameters. 79 80 :param model: The model to use for generating embeddings. 81 :type model: str, required 82 83 :param inputs: A list of items to generate embeddings for. 84 :type inputs: list, required 85 86 :param parameters: A dictionary of parameters to use when generating embeddings. 87 :type parameters: dict, optional 88 89 :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of 90 `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either 91 float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings. 92 `usage` key contains the total number of tokens used at request-time. 93 94 Example: 95 >>> inputs = ["Who created the first computer?"] 96 >>> outputs = pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"}) 97 >>> print(outputs) 98 EmbeddingsList( 99 model='multilingual-e5-large', 100 data=[ 101 {'values': [0.1, ...., 0.2]}, 102 ], 103 usage={'total_tokens': 6} 104 ) 105 """ 106 request_body = InferenceRequestBuilder.embed_request( 107 model=model, inputs=inputs, parameters=parameters 108 ) 109 resp = self.__inference_api.embed(embed_request=request_body) 110 return EmbeddingsList(resp) 111 112 def rerank( 113 self, 114 model: Union[RerankModelEnum, str], 115 query: str, 116 documents: Union[List[str], List[Dict[str, Any]]], 117 rank_fields: List[str] = ["text"], 118 return_documents: bool = True, 119 top_n: Optional[int] = None, 120 parameters: Optional[Dict[str, Any]] = None, 121 ) -> RerankResult: 122 """ 123 Rerank documents with associated relevance scores that represent the relevance of each document 124 to the provided query using the specified model. 125 126 :param model: The model to use for reranking. 127 :type model: str, required 128 129 :param query: The query to compare with documents. 130 :type query: str, required 131 132 :param documents: A list of documents or strings to rank. 133 :type documents: list, required 134 135 :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"]. 136 :type rank_fields: list, optional 137 138 :param return_documents: Whether to include the documents in the response. Defaults to True. 139 :type return_documents: bool, optional 140 141 :param top_n: How many documents to return. Defaults to len(documents). 142 :type top_n: int, optional 143 144 :param parameters: A dictionary of parameters to use when ranking documents. 145 :type parameters: dict, optional 146 147 :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of 148 `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of 149 relevance, with the first being the most relevant. The `index` field can be used to locate the document 150 relative to the list of documents specified in the request. Each document contains a `score` key 151 representing how close the document relates to the query. 152 153 Example: 154 >>> result = pc.inference.rerank( 155 model="bge-reranker-v2-m3", 156 query="Tell me about tech companies", 157 documents=[ 158 "Apple is a popular fruit known for its sweetness and crisp texture.", 159 "Software is still eating the world.", 160 "Many people enjoy eating apples as a healthy snack.", 161 "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 162 "An apple a day keeps the doctor away, as the saying goes.", 163 ], 164 top_n=2, 165 return_documents=True, 166 ) 167 >>> print(result) 168 RerankResult( 169 model='bge-reranker-v2-m3', 170 data=[ 171 { index=3, score=0.020980744, 172 document={text="Acme Inc. has rev..."} }, 173 { index=1, score=0.00034015716, 174 document={text="Software is still..."} } 175 ], 176 usage={'rerank_units': 1} 177 ) 178 """ 179 rerank_request = InferenceRequestBuilder.rerank( 180 model=model, 181 query=query, 182 documents=documents, 183 rank_fields=rank_fields, 184 return_documents=return_documents, 185 top_n=top_n, 186 parameters=parameters, 187 ) 188 resp = self.__inference_api.rerank(rerank_request=rerank_request) 189 return RerankResult(resp)
The Inference
class configures and uses the Pinecone Inference API to generate embeddings and
rank documents.
It is generally not instantiated directly, but rather accessed through a parent Pinecone
client
object that is responsible for managing shared configurations.
from pinecone import Pinecone
pc = Pinecone()
embeddings = pc.inference.embed(
model="text-embedding-3-small",
inputs=["Hello, world!"],
parameters={"input_type": "passage", "truncate": "END"}
)
Parameters
- config: A
pinecone.config.Config
object, configured and built in the Pinecone class.
48 def __init__(self, config, openapi_config, **kwargs) -> None: 49 self.config = config 50 """ @private """ 51 52 self.openapi_config = openapi_config 53 """ @private """ 54 55 self.pool_threads = kwargs.get("pool_threads", 1) 56 """ @private """ 57 58 self.__inference_api = setup_openapi_client( 59 api_client_klass=ApiClient, 60 api_klass=InferenceApi, 61 config=config, 62 openapi_config=openapi_config, 63 pool_threads=kwargs.get("pool_threads", 1), 64 api_version=API_VERSION, 65 ) 66 67 self.load_plugins( 68 config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads 69 )
71 def embed( 72 self, 73 model: Union[EmbedModelEnum, str], 74 inputs: Union[str, List[Dict], List[str]], 75 parameters: Optional[Dict[str, Any]] = None, 76 ) -> EmbeddingsList: 77 """ 78 Generates embeddings for the provided inputs using the specified model and (optional) parameters. 79 80 :param model: The model to use for generating embeddings. 81 :type model: str, required 82 83 :param inputs: A list of items to generate embeddings for. 84 :type inputs: list, required 85 86 :param parameters: A dictionary of parameters to use when generating embeddings. 87 :type parameters: dict, optional 88 89 :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of 90 `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either 91 float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings. 92 `usage` key contains the total number of tokens used at request-time. 93 94 Example: 95 >>> inputs = ["Who created the first computer?"] 96 >>> outputs = pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"}) 97 >>> print(outputs) 98 EmbeddingsList( 99 model='multilingual-e5-large', 100 data=[ 101 {'values': [0.1, ...., 0.2]}, 102 ], 103 usage={'total_tokens': 6} 104 ) 105 """ 106 request_body = InferenceRequestBuilder.embed_request( 107 model=model, inputs=inputs, parameters=parameters 108 ) 109 resp = self.__inference_api.embed(embed_request=request_body) 110 return EmbeddingsList(resp)
Generates embeddings for the provided inputs using the specified model and (optional) parameters.
Parameters
model: The model to use for generating embeddings.
inputs: A list of items to generate embeddings for.
parameters: A dictionary of parameters to use when generating embeddings.
Returns
EmbeddingsList object with keys
data
,model
, andusage
. Thedata
key contains a list ofn
embeddings, wheren
= len(inputs) and type(n) = Embedding. Precision of returned embeddings is either float16 or float32, with float32 being the default.model
key is the model used to generate the embeddings.usage
key contains the total number of tokens used at request-time.
Example:
>>> inputs = ["Who created the first computer?"]
>>> outputs = pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"})
>>> print(outputs)
EmbeddingsList(
model='multilingual-e5-large',
data=[
{'values': [0.1, ...., 0.2]},
],
usage={'total_tokens': 6}
)
112 def rerank( 113 self, 114 model: Union[RerankModelEnum, str], 115 query: str, 116 documents: Union[List[str], List[Dict[str, Any]]], 117 rank_fields: List[str] = ["text"], 118 return_documents: bool = True, 119 top_n: Optional[int] = None, 120 parameters: Optional[Dict[str, Any]] = None, 121 ) -> RerankResult: 122 """ 123 Rerank documents with associated relevance scores that represent the relevance of each document 124 to the provided query using the specified model. 125 126 :param model: The model to use for reranking. 127 :type model: str, required 128 129 :param query: The query to compare with documents. 130 :type query: str, required 131 132 :param documents: A list of documents or strings to rank. 133 :type documents: list, required 134 135 :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"]. 136 :type rank_fields: list, optional 137 138 :param return_documents: Whether to include the documents in the response. Defaults to True. 139 :type return_documents: bool, optional 140 141 :param top_n: How many documents to return. Defaults to len(documents). 142 :type top_n: int, optional 143 144 :param parameters: A dictionary of parameters to use when ranking documents. 145 :type parameters: dict, optional 146 147 :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of 148 `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of 149 relevance, with the first being the most relevant. The `index` field can be used to locate the document 150 relative to the list of documents specified in the request. Each document contains a `score` key 151 representing how close the document relates to the query. 152 153 Example: 154 >>> result = pc.inference.rerank( 155 model="bge-reranker-v2-m3", 156 query="Tell me about tech companies", 157 documents=[ 158 "Apple is a popular fruit known for its sweetness and crisp texture.", 159 "Software is still eating the world.", 160 "Many people enjoy eating apples as a healthy snack.", 161 "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 162 "An apple a day keeps the doctor away, as the saying goes.", 163 ], 164 top_n=2, 165 return_documents=True, 166 ) 167 >>> print(result) 168 RerankResult( 169 model='bge-reranker-v2-m3', 170 data=[ 171 { index=3, score=0.020980744, 172 document={text="Acme Inc. has rev..."} }, 173 { index=1, score=0.00034015716, 174 document={text="Software is still..."} } 175 ], 176 usage={'rerank_units': 1} 177 ) 178 """ 179 rerank_request = InferenceRequestBuilder.rerank( 180 model=model, 181 query=query, 182 documents=documents, 183 rank_fields=rank_fields, 184 return_documents=return_documents, 185 top_n=top_n, 186 parameters=parameters, 187 ) 188 resp = self.__inference_api.rerank(rerank_request=rerank_request) 189 return RerankResult(resp)
Rerank documents with associated relevance scores that represent the relevance of each document to the provided query using the specified model.
Parameters
model: The model to use for reranking.
query: The query to compare with documents.
documents: A list of documents or strings to rank.
rank_fields: A list of document fields to use for ranking. Defaults to ["text"].
return_documents: Whether to include the documents in the response. Defaults to True.
top_n: How many documents to return. Defaults to len(documents).
parameters: A dictionary of parameters to use when ranking documents.
Returns
RerankResult object with keys
data
andusage
. Thedata
key contains a list ofn
documents, wheren
=top_n
and type(n) = Document. The documents are sorted in order of relevance, with the first being the most relevant. Theindex
field can be used to locate the document relative to the list of documents specified in the request. Each document contains ascore
key representing how close the document relates to the query.
Example:
>>> result = pc.inference.rerank(
model="bge-reranker-v2-m3",
query="Tell me about tech companies",
documents=[
"Apple is a popular fruit known for its sweetness and crisp texture.",
"Software is still eating the world.",
"Many people enjoy eating apples as a healthy snack.",
"Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
"An apple a day keeps the doctor away, as the saying goes.",
],
top_n=2,
return_documents=True,
)
>>> print(result)
RerankResult(
model='bge-reranker-v2-m3',
data=[
{ index=3, score=0.020980744,
document={text="Acme Inc. has rev..."} },
{ index=1, score=0.00034015716,
document={text="Software is still..."} }
],
usage={'rerank_units': 1}
)
14class EmbedModel(Enum): 15 Multilingual_E5_Large = "multilingual-e5-large" 16 Pinecone_Sparse_English_V0 = "pinecone-sparse-english-v0"
Create a collection of name/value pairs.
Example enumeration:
>>> class Color(Enum):
... RED = 1
... BLUE = 2
... GREEN = 3
Access them by:
- attribute access::
>>> Color.RED
<Color.RED: 1>
- value lookup:
>>> Color(1)
<Color.RED: 1>
- name lookup:
>>> Color['RED']
<Color.RED: 1>
Enumerations can be iterated over, and know how many members they have:
>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]
Methods can be added to enumerations, and members can have their own attributes -- see the documentation for details.
Inherited Members
19class RerankModel(Enum): 20 Bge_Reranker_V2_M3 = "bge-reranker-v2-m3" 21 Cohere_Rerank_3_5 = "cohere-rerank-3.5" 22 Pinecone_Rerank_V0 = "pinecone-rerank-v0"
Create a collection of name/value pairs.
Example enumeration:
>>> class Color(Enum):
... RED = 1
... BLUE = 2
... GREEN = 3
Access them by:
- attribute access::
>>> Color.RED
<Color.RED: 1>
- value lookup:
>>> Color(1)
<Color.RED: 1>
- name lookup:
>>> Color['RED']
<Color.RED: 1>
Enumerations can be iterated over, and know how many members they have:
>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]
Methods can be added to enumerations, and members can have their own attributes -- see the documentation for details.