pinecone.data.features.inference.inference

  1import logging
  2from typing import Optional, Dict, List, Union, Any
  3
  4from pinecone.openapi_support import ApiClient
  5from pinecone.core.openapi.inference.apis import InferenceApi
  6from .models import EmbeddingsList, RerankResult
  7from pinecone.core.openapi.inference import API_VERSION
  8from pinecone.utils import setup_openapi_client, PluginAware
  9
 10
 11from .inference_request_builder import (
 12    InferenceRequestBuilder,
 13    EmbedModel as EmbedModelEnum,
 14    RerankModel as RerankModelEnum,
 15)
 16
 17logger = logging.getLogger(__name__)
 18""" @private """
 19
 20
 21class Inference(PluginAware):
 22    """
 23    The `Inference` class configures and uses the Pinecone Inference API to generate embeddings and
 24    rank documents.
 25
 26    It is generally not instantiated directly, but rather accessed through a parent `Pinecone` client
 27    object that is responsible for managing shared configurations.
 28
 29    ```python
 30    from pinecone import Pinecone
 31
 32    pc = Pinecone()
 33    embeddings = pc.inference.embed(
 34        model="text-embedding-3-small",
 35        inputs=["Hello, world!"],
 36        parameters={"input_type": "passage", "truncate": "END"}
 37    )
 38    ```
 39
 40    :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class.
 41    :type config: `pinecone.config.Config`, required
 42    """
 43
 44    EmbedModel = EmbedModelEnum
 45    RerankModel = RerankModelEnum
 46
 47    def __init__(self, config, openapi_config, **kwargs) -> None:
 48        self.config = config
 49        """ @private """
 50
 51        self.openapi_config = openapi_config
 52        """ @private """
 53
 54        self.pool_threads = kwargs.get("pool_threads", 1)
 55        """ @private """
 56
 57        self.__inference_api = setup_openapi_client(
 58            api_client_klass=ApiClient,
 59            api_klass=InferenceApi,
 60            config=config,
 61            openapi_config=openapi_config,
 62            pool_threads=kwargs.get("pool_threads", 1),
 63            api_version=API_VERSION,
 64        )
 65
 66        self.load_plugins(
 67            config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads
 68        )
 69
 70    def embed(
 71        self,
 72        model: Union[EmbedModelEnum, str],
 73        inputs: Union[str, List[Dict], List[str]],
 74        parameters: Optional[Dict[str, Any]] = None,
 75    ) -> EmbeddingsList:
 76        """
 77        Generates embeddings for the provided inputs using the specified model and (optional) parameters.
 78
 79        :param model: The model to use for generating embeddings.
 80        :type model: str, required
 81
 82        :param inputs: A list of items to generate embeddings for.
 83        :type inputs: list, required
 84
 85        :param parameters: A dictionary of parameters to use when generating embeddings.
 86        :type parameters: dict, optional
 87
 88        :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of
 89        `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either
 90        float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings.
 91        `usage` key contains the total number of tokens used at request-time.
 92
 93        Example:
 94        >>> inputs = ["Who created the first computer?"]
 95        >>> outputs = pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"})
 96        >>> print(outputs)
 97        EmbeddingsList(
 98            model='multilingual-e5-large',
 99            data=[
100                {'values': [0.1, ...., 0.2]},
101              ],
102            usage={'total_tokens': 6}
103        )
104        """
105        request_body = InferenceRequestBuilder.embed_request(
106            model=model, inputs=inputs, parameters=parameters
107        )
108        resp = self.__inference_api.embed(embed_request=request_body)
109        return EmbeddingsList(resp)
110
111    def rerank(
112        self,
113        model: Union[RerankModelEnum, str],
114        query: str,
115        documents: Union[List[str], List[Dict[str, Any]]],
116        rank_fields: List[str] = ["text"],
117        return_documents: bool = True,
118        top_n: Optional[int] = None,
119        parameters: Optional[Dict[str, Any]] = None,
120    ) -> RerankResult:
121        """
122        Rerank documents with associated relevance scores that represent the relevance of each document
123        to the provided query using the specified model.
124
125        :param model: The model to use for reranking.
126        :type model: str, required
127
128        :param query: The query to compare with documents.
129        :type query: str, required
130
131        :param documents: A list of documents or strings to rank.
132        :type documents: list, required
133
134        :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"].
135        :type rank_fields: list, optional
136
137        :param return_documents: Whether to include the documents in the response. Defaults to True.
138        :type return_documents: bool, optional
139
140        :param top_n: How many documents to return. Defaults to len(documents).
141        :type top_n: int, optional
142
143        :param parameters: A dictionary of parameters to use when ranking documents.
144        :type parameters: dict, optional
145
146        :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of
147        `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of
148        relevance, with the first being the most relevant. The `index` field can be used to locate the document
149        relative to the list of documents specified in the request. Each document contains a `score` key
150        representing how close the document relates to the query.
151
152        Example:
153        >>> result = pc.inference.rerank(
154                model="bge-reranker-v2-m3",
155                query="Tell me about tech companies",
156                documents=[
157                    "Apple is a popular fruit known for its sweetness and crisp texture.",
158                    "Software is still eating the world.",
159                    "Many people enjoy eating apples as a healthy snack.",
160                    "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
161                    "An apple a day keeps the doctor away, as the saying goes.",
162                ],
163                top_n=2,
164                return_documents=True,
165            )
166        >>> print(result)
167        RerankResult(
168          model='bge-reranker-v2-m3',
169          data=[
170            { index=3, score=0.020980744,
171              document={text="Acme Inc. has rev..."} },
172            { index=1, score=0.00034015716,
173              document={text="Software is still..."} }
174          ],
175          usage={'rerank_units': 1}
176        )
177        """
178        rerank_request = InferenceRequestBuilder.rerank(
179            model=model,
180            query=query,
181            documents=documents,
182            rank_fields=rank_fields,
183            return_documents=return_documents,
184            top_n=top_n,
185            parameters=parameters,
186        )
187        resp = self.__inference_api.rerank(rerank_request=rerank_request)
188        return RerankResult(resp)
class Inference(pinecone.utils.plugin_aware.PluginAware):
 22class Inference(PluginAware):
 23    """
 24    The `Inference` class configures and uses the Pinecone Inference API to generate embeddings and
 25    rank documents.
 26
 27    It is generally not instantiated directly, but rather accessed through a parent `Pinecone` client
 28    object that is responsible for managing shared configurations.
 29
 30    ```python
 31    from pinecone import Pinecone
 32
 33    pc = Pinecone()
 34    embeddings = pc.inference.embed(
 35        model="text-embedding-3-small",
 36        inputs=["Hello, world!"],
 37        parameters={"input_type": "passage", "truncate": "END"}
 38    )
 39    ```
 40
 41    :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class.
 42    :type config: `pinecone.config.Config`, required
 43    """
 44
 45    EmbedModel = EmbedModelEnum
 46    RerankModel = RerankModelEnum
 47
 48    def __init__(self, config, openapi_config, **kwargs) -> None:
 49        self.config = config
 50        """ @private """
 51
 52        self.openapi_config = openapi_config
 53        """ @private """
 54
 55        self.pool_threads = kwargs.get("pool_threads", 1)
 56        """ @private """
 57
 58        self.__inference_api = setup_openapi_client(
 59            api_client_klass=ApiClient,
 60            api_klass=InferenceApi,
 61            config=config,
 62            openapi_config=openapi_config,
 63            pool_threads=kwargs.get("pool_threads", 1),
 64            api_version=API_VERSION,
 65        )
 66
 67        self.load_plugins(
 68            config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads
 69        )
 70
 71    def embed(
 72        self,
 73        model: Union[EmbedModelEnum, str],
 74        inputs: Union[str, List[Dict], List[str]],
 75        parameters: Optional[Dict[str, Any]] = None,
 76    ) -> EmbeddingsList:
 77        """
 78        Generates embeddings for the provided inputs using the specified model and (optional) parameters.
 79
 80        :param model: The model to use for generating embeddings.
 81        :type model: str, required
 82
 83        :param inputs: A list of items to generate embeddings for.
 84        :type inputs: list, required
 85
 86        :param parameters: A dictionary of parameters to use when generating embeddings.
 87        :type parameters: dict, optional
 88
 89        :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of
 90        `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either
 91        float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings.
 92        `usage` key contains the total number of tokens used at request-time.
 93
 94        Example:
 95        >>> inputs = ["Who created the first computer?"]
 96        >>> outputs = pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"})
 97        >>> print(outputs)
 98        EmbeddingsList(
 99            model='multilingual-e5-large',
100            data=[
101                {'values': [0.1, ...., 0.2]},
102              ],
103            usage={'total_tokens': 6}
104        )
105        """
106        request_body = InferenceRequestBuilder.embed_request(
107            model=model, inputs=inputs, parameters=parameters
108        )
109        resp = self.__inference_api.embed(embed_request=request_body)
110        return EmbeddingsList(resp)
111
112    def rerank(
113        self,
114        model: Union[RerankModelEnum, str],
115        query: str,
116        documents: Union[List[str], List[Dict[str, Any]]],
117        rank_fields: List[str] = ["text"],
118        return_documents: bool = True,
119        top_n: Optional[int] = None,
120        parameters: Optional[Dict[str, Any]] = None,
121    ) -> RerankResult:
122        """
123        Rerank documents with associated relevance scores that represent the relevance of each document
124        to the provided query using the specified model.
125
126        :param model: The model to use for reranking.
127        :type model: str, required
128
129        :param query: The query to compare with documents.
130        :type query: str, required
131
132        :param documents: A list of documents or strings to rank.
133        :type documents: list, required
134
135        :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"].
136        :type rank_fields: list, optional
137
138        :param return_documents: Whether to include the documents in the response. Defaults to True.
139        :type return_documents: bool, optional
140
141        :param top_n: How many documents to return. Defaults to len(documents).
142        :type top_n: int, optional
143
144        :param parameters: A dictionary of parameters to use when ranking documents.
145        :type parameters: dict, optional
146
147        :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of
148        `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of
149        relevance, with the first being the most relevant. The `index` field can be used to locate the document
150        relative to the list of documents specified in the request. Each document contains a `score` key
151        representing how close the document relates to the query.
152
153        Example:
154        >>> result = pc.inference.rerank(
155                model="bge-reranker-v2-m3",
156                query="Tell me about tech companies",
157                documents=[
158                    "Apple is a popular fruit known for its sweetness and crisp texture.",
159                    "Software is still eating the world.",
160                    "Many people enjoy eating apples as a healthy snack.",
161                    "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
162                    "An apple a day keeps the doctor away, as the saying goes.",
163                ],
164                top_n=2,
165                return_documents=True,
166            )
167        >>> print(result)
168        RerankResult(
169          model='bge-reranker-v2-m3',
170          data=[
171            { index=3, score=0.020980744,
172              document={text="Acme Inc. has rev..."} },
173            { index=1, score=0.00034015716,
174              document={text="Software is still..."} }
175          ],
176          usage={'rerank_units': 1}
177        )
178        """
179        rerank_request = InferenceRequestBuilder.rerank(
180            model=model,
181            query=query,
182            documents=documents,
183            rank_fields=rank_fields,
184            return_documents=return_documents,
185            top_n=top_n,
186            parameters=parameters,
187        )
188        resp = self.__inference_api.rerank(rerank_request=rerank_request)
189        return RerankResult(resp)

The Inference class configures and uses the Pinecone Inference API to generate embeddings and rank documents.

It is generally not instantiated directly, but rather accessed through a parent Pinecone client object that is responsible for managing shared configurations.

from pinecone import Pinecone

pc = Pinecone()
embeddings = pc.inference.embed(
    model="text-embedding-3-small",
    inputs=["Hello, world!"],
    parameters={"input_type": "passage", "truncate": "END"}
)
Parameters
  • config: A pinecone.config.Config object, configured and built in the Pinecone class.
Inference(config, openapi_config, **kwargs)
48    def __init__(self, config, openapi_config, **kwargs) -> None:
49        self.config = config
50        """ @private """
51
52        self.openapi_config = openapi_config
53        """ @private """
54
55        self.pool_threads = kwargs.get("pool_threads", 1)
56        """ @private """
57
58        self.__inference_api = setup_openapi_client(
59            api_client_klass=ApiClient,
60            api_klass=InferenceApi,
61            config=config,
62            openapi_config=openapi_config,
63            pool_threads=kwargs.get("pool_threads", 1),
64            api_version=API_VERSION,
65        )
66
67        self.load_plugins(
68            config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads
69        )
def embed( self, model: Union[pinecone.data.features.inference.inference_request_builder.EmbedModel, str], inputs: Union[str, List[Dict], List[str]], parameters: Optional[Dict[str, Any]] = None) -> pinecone.data.features.inference.models.embedding_list.EmbeddingsList:
 71    def embed(
 72        self,
 73        model: Union[EmbedModelEnum, str],
 74        inputs: Union[str, List[Dict], List[str]],
 75        parameters: Optional[Dict[str, Any]] = None,
 76    ) -> EmbeddingsList:
 77        """
 78        Generates embeddings for the provided inputs using the specified model and (optional) parameters.
 79
 80        :param model: The model to use for generating embeddings.
 81        :type model: str, required
 82
 83        :param inputs: A list of items to generate embeddings for.
 84        :type inputs: list, required
 85
 86        :param parameters: A dictionary of parameters to use when generating embeddings.
 87        :type parameters: dict, optional
 88
 89        :return: EmbeddingsList object with keys `data`, `model`, and `usage`. The `data` key contains a list of
 90        `n` embeddings, where `n` = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either
 91        float16 or float32, with float32 being the default. `model` key is the model used to generate the embeddings.
 92        `usage` key contains the total number of tokens used at request-time.
 93
 94        Example:
 95        >>> inputs = ["Who created the first computer?"]
 96        >>> outputs = pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"})
 97        >>> print(outputs)
 98        EmbeddingsList(
 99            model='multilingual-e5-large',
100            data=[
101                {'values': [0.1, ...., 0.2]},
102              ],
103            usage={'total_tokens': 6}
104        )
105        """
106        request_body = InferenceRequestBuilder.embed_request(
107            model=model, inputs=inputs, parameters=parameters
108        )
109        resp = self.__inference_api.embed(embed_request=request_body)
110        return EmbeddingsList(resp)

Generates embeddings for the provided inputs using the specified model and (optional) parameters.

Parameters
  • model: The model to use for generating embeddings.

  • inputs: A list of items to generate embeddings for.

  • parameters: A dictionary of parameters to use when generating embeddings.

Returns

EmbeddingsList object with keys data, model, and usage. The data key contains a list of n embeddings, where n = len(inputs) and type(n) = Embedding. Precision of returned embeddings is either float16 or float32, with float32 being the default. model key is the model used to generate the embeddings. usage key contains the total number of tokens used at request-time.

Example:

>>> inputs = ["Who created the first computer?"]
>>> outputs = pc.inference.embed(model="multilingual-e5-large", inputs=inputs, parameters={"input_type": "passage", "truncate": "END"})
>>> print(outputs)
EmbeddingsList(
    model='multilingual-e5-large',
    data=[
        {'values': [0.1, ...., 0.2]},
      ],
    usage={'total_tokens': 6}
)
def rerank( self, model: Union[pinecone.data.features.inference.inference_request_builder.RerankModel, str], query: str, documents: Union[List[str], List[Dict[str, Any]]], rank_fields: List[str] = ['text'], return_documents: bool = True, top_n: Optional[int] = None, parameters: Optional[Dict[str, Any]] = None) -> pinecone.data.features.inference.models.rerank_result.RerankResult:
112    def rerank(
113        self,
114        model: Union[RerankModelEnum, str],
115        query: str,
116        documents: Union[List[str], List[Dict[str, Any]]],
117        rank_fields: List[str] = ["text"],
118        return_documents: bool = True,
119        top_n: Optional[int] = None,
120        parameters: Optional[Dict[str, Any]] = None,
121    ) -> RerankResult:
122        """
123        Rerank documents with associated relevance scores that represent the relevance of each document
124        to the provided query using the specified model.
125
126        :param model: The model to use for reranking.
127        :type model: str, required
128
129        :param query: The query to compare with documents.
130        :type query: str, required
131
132        :param documents: A list of documents or strings to rank.
133        :type documents: list, required
134
135        :param rank_fields: A list of document fields to use for ranking. Defaults to ["text"].
136        :type rank_fields: list, optional
137
138        :param return_documents: Whether to include the documents in the response. Defaults to True.
139        :type return_documents: bool, optional
140
141        :param top_n: How many documents to return. Defaults to len(documents).
142        :type top_n: int, optional
143
144        :param parameters: A dictionary of parameters to use when ranking documents.
145        :type parameters: dict, optional
146
147        :return: RerankResult object with keys `data` and `usage`. The `data` key contains a list of
148        `n` documents, where `n` = `top_n` and type(n) = Document. The documents are sorted in order of
149        relevance, with the first being the most relevant. The `index` field can be used to locate the document
150        relative to the list of documents specified in the request. Each document contains a `score` key
151        representing how close the document relates to the query.
152
153        Example:
154        >>> result = pc.inference.rerank(
155                model="bge-reranker-v2-m3",
156                query="Tell me about tech companies",
157                documents=[
158                    "Apple is a popular fruit known for its sweetness and crisp texture.",
159                    "Software is still eating the world.",
160                    "Many people enjoy eating apples as a healthy snack.",
161                    "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
162                    "An apple a day keeps the doctor away, as the saying goes.",
163                ],
164                top_n=2,
165                return_documents=True,
166            )
167        >>> print(result)
168        RerankResult(
169          model='bge-reranker-v2-m3',
170          data=[
171            { index=3, score=0.020980744,
172              document={text="Acme Inc. has rev..."} },
173            { index=1, score=0.00034015716,
174              document={text="Software is still..."} }
175          ],
176          usage={'rerank_units': 1}
177        )
178        """
179        rerank_request = InferenceRequestBuilder.rerank(
180            model=model,
181            query=query,
182            documents=documents,
183            rank_fields=rank_fields,
184            return_documents=return_documents,
185            top_n=top_n,
186            parameters=parameters,
187        )
188        resp = self.__inference_api.rerank(rerank_request=rerank_request)
189        return RerankResult(resp)

Rerank documents with associated relevance scores that represent the relevance of each document to the provided query using the specified model.

Parameters
  • model: The model to use for reranking.

  • query: The query to compare with documents.

  • documents: A list of documents or strings to rank.

  • rank_fields: A list of document fields to use for ranking. Defaults to ["text"].

  • return_documents: Whether to include the documents in the response. Defaults to True.

  • top_n: How many documents to return. Defaults to len(documents).

  • parameters: A dictionary of parameters to use when ranking documents.

Returns

RerankResult object with keys data and usage. The data key contains a list of n documents, where n = top_n and type(n) = Document. The documents are sorted in order of relevance, with the first being the most relevant. The index field can be used to locate the document relative to the list of documents specified in the request. Each document contains a score key representing how close the document relates to the query.

Example:

>>> result = pc.inference.rerank(
        model="bge-reranker-v2-m3",
        query="Tell me about tech companies",
        documents=[
            "Apple is a popular fruit known for its sweetness and crisp texture.",
            "Software is still eating the world.",
            "Many people enjoy eating apples as a healthy snack.",
            "Acme Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
            "An apple a day keeps the doctor away, as the saying goes.",
        ],
        top_n=2,
        return_documents=True,
    )
>>> print(result)
RerankResult(
  model='bge-reranker-v2-m3',
  data=[
    { index=3, score=0.020980744,
      document={text="Acme Inc. has rev..."} },
    { index=1, score=0.00034015716,
      document={text="Software is still..."} }
  ],
  usage={'rerank_units': 1}
)
class Inference.EmbedModel(enum.Enum):
14class EmbedModel(Enum):
15    Multilingual_E5_Large = "multilingual-e5-large"
16    Pinecone_Sparse_English_V0 = "pinecone-sparse-english-v0"

Create a collection of name/value pairs.

Example enumeration:

>>> class Color(Enum):
...     RED = 1
...     BLUE = 2
...     GREEN = 3

Access them by:

  • attribute access::
>>> Color.RED
<Color.RED: 1>
  • value lookup:
>>> Color(1)
<Color.RED: 1>
  • name lookup:
>>> Color['RED']
<Color.RED: 1>

Enumerations can be iterated over, and know how many members they have:

>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]

Methods can be added to enumerations, and members can have their own attributes -- see the documentation for details.

class Inference.RerankModel(enum.Enum):
19class RerankModel(Enum):
20    Bge_Reranker_V2_M3 = "bge-reranker-v2-m3"
21    Cohere_Rerank_3_5 = "cohere-rerank-3.5"
22    Pinecone_Rerank_V0 = "pinecone-rerank-v0"

Create a collection of name/value pairs.

Example enumeration:

>>> class Color(Enum):
...     RED = 1
...     BLUE = 2
...     GREEN = 3

Access them by:

  • attribute access::
>>> Color.RED
<Color.RED: 1>
  • value lookup:
>>> Color(1)
<Color.RED: 1>
  • name lookup:
>>> Color['RED']
<Color.RED: 1>

Enumerations can be iterated over, and know how many members they have:

>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]

Methods can be added to enumerations, and members can have their own attributes -- see the documentation for details.