"""Async Inference namespace — embed, rerank, and model operations."""
from __future__ import annotations
import logging
from functools import cached_property
from typing import TYPE_CHECKING, Any
from pinecone._internal.adapters.inference_adapter import (
InferenceAdapter,
normalize_embed_inputs,
normalize_rerank_documents,
)
from pinecone._internal.constants import INFERENCE_API_VERSION
from pinecone._internal.validation import require_non_empty, require_one_of
from pinecone.errors.exceptions import ValidationError
from pinecone.models import enums as _enums
if TYPE_CHECKING:
from pinecone._internal.config import PineconeConfig
from pinecone.models.inference.embed import EmbeddingsList
from pinecone.models.inference.model_list import ModelInfoList
from pinecone.models.inference.models import ModelInfo
from pinecone.models.inference.rerank import RerankResult
logger = logging.getLogger(__name__)
class AsyncModelResource:
"""Lazily-initialized resource for listing and getting inference model info.
Accessed via ``pc.inference.model``.
Args:
inference (AsyncInference): The parent async inference namespace that
handles HTTP requests on behalf of this resource.
Examples:
.. code-block:: python
# List all available models
from pinecone import AsyncPinecone
async with AsyncPinecone(api_key="your-api-key") as pc:
models = await pc.inference.model.list()
models.names()
.. code-block:: python
# Get details about a specific model
async with AsyncPinecone(api_key="your-api-key") as pc:
info = await pc.inference.model.get("multilingual-e5-large")
print(info.type)
"""
def __init__(self, inference: AsyncInference) -> None:
self._inference = inference
async def list(
self,
*,
type: str | None = None,
vector_type: str | None = None,
) -> ModelInfoList:
"""List available inference models.
Delegates to :meth:`~AsyncInference.list_models`.
Args:
type (str | None): Filter by model type (``"embed"`` or ``"rerank"``).
vector_type (str | None): Filter by vector type
(``"dense"`` or ``"sparse"``). Only relevant when ``type="embed"``.
Returns:
A :class:`ModelInfoList` supporting iteration, len(), and ``.names()``.
Raises:
:exc:`ApiError`: If the API returns an error response.
Examples:
.. code-block:: python
from pinecone import AsyncPinecone
async with AsyncPinecone(api_key="your-api-key") as pc:
models = await pc.inference.model.list()
embed_models = await pc.inference.model.list(type="embed")
"""
return await self._inference.list_models(type=type, vector_type=vector_type)
async def get(self, model: str | None = None, **kwargs: str) -> ModelInfo:
"""Get detailed information about a specific model.
Delegates to :meth:`~AsyncInference.get_model`.
Args:
model (str): The model identifier to look up.
Returns:
A :class:`ModelInfo` with full model details.
Raises:
:exc:`NotFoundError`: If the model does not exist.
:exc:`ApiError`: If the API returns another error response.
Examples:
.. code-block:: python
from pinecone import AsyncPinecone
async with AsyncPinecone(api_key="your-api-key") as pc:
info = await pc.inference.model.get("multilingual-e5-large")
print(info.type)
"""
model_name: str | None = kwargs.pop("model_name", None)
if kwargs:
raise TypeError(f"get() got unexpected keyword arguments: {sorted(kwargs)!r}")
if model is not None and model_name is not None:
raise ValidationError("Provide either model= or model_name=, not both")
effective: str = model or model_name or ""
return await self._inference.get_model(model=effective)
[docs]
class AsyncInference:
"""Asynchronous operations for Pinecone inference (embed & rerank).
Provides async methods to generate embeddings and rerank documents
using Pinecone's hosted models.
Args:
config (PineconeConfig): SDK configuration used to construct an
async HTTP client targeting the inference API version.
Examples:
.. code-block:: python
from pinecone import AsyncPinecone
async with AsyncPinecone(api_key="your-api-key") as pc:
embeddings = await pc.inference.embed(
model="multilingual-e5-large",
inputs=["Hello, world!"],
)
"""
EmbedModel = _enums.EmbedModel
RerankModel = _enums.RerankModel
[docs]
def __init__(self, config: PineconeConfig) -> None:
from pinecone._internal.http_client import AsyncHTTPClient
self._http = AsyncHTTPClient(config, INFERENCE_API_VERSION)
self._adapter = InferenceAdapter()
[docs]
async def close(self) -> None:
"""Close the underlying HTTP client."""
await self._http.close()
def __repr__(self) -> str:
"""Return developer-friendly representation."""
return "AsyncInference()"
@cached_property
def model(self) -> AsyncModelResource:
"""Lazily-initialized resource for listing and getting model info.
Returns:
A :class:`AsyncModelResource` that exposes ``.list()`` and ``.get()`` methods.
Examples:
.. code-block:: python
from pinecone import AsyncPinecone
async with AsyncPinecone(api_key="your-api-key") as pc:
models = await pc.inference.model.list()
info = await pc.inference.model.get("multilingual-e5-large")
"""
return AsyncModelResource(self)
[docs]
async def embed(
self,
model: _enums.EmbedModel | str,
inputs: str | list[str] | list[dict[str, Any]],
parameters: dict[str, Any] | None = None,
) -> EmbeddingsList:
"""Generate embeddings for the provided inputs.
Args:
model (EmbedModel | str): Embedding model name.
inputs (str | list[str] | list[dict[str, Any]]): Text inputs.
A single string is automatically wrapped.
parameters (dict[str, Any] | None): Model-specific parameters
(e.g., ``{"input_type": "passage", "truncate": "END"}``).
Returns:
An :class:`EmbeddingsList` with ``.data``, ``.model``, and ``.usage``.
Raises:
:exc:`PineconeValueError`: If *model* is empty or *inputs* is empty.
:exc:`PineconeTypeError`: If *inputs* has an invalid type.
:exc:`ApiError`: If the API returns an error response.
:exc:`PineconeConnectionError`: If a network-level connection
fails (DNS, refused, transport error).
:exc:`PineconeTimeoutError`: If the request exceeds the configured timeout.
Examples:
.. code-block:: python
from pinecone import AsyncPinecone
async with AsyncPinecone(api_key="your-api-key") as pc:
embeddings = await pc.inference.embed(
model="multilingual-e5-large",
inputs=["Hello, world!"],
parameters={"input_type": "passage"},
)
"""
require_non_empty("model", str(model))
normalized_inputs = normalize_embed_inputs(inputs)
body: dict[str, Any] = {
"model": str(model),
"inputs": normalized_inputs,
}
if parameters is not None:
body["parameters"] = parameters
logger.info("Generating embeddings with model %r", str(model))
response = await self._http.post("/embed", json=body)
result = self._adapter.to_embeddings_list(response.content)
logger.debug("Generated %d embeddings", len(result.data))
return result
[docs]
async def rerank(
self,
model: _enums.RerankModel | str,
query: str,
documents: list[str] | list[dict[str, Any]],
rank_fields: list[str] | None = None,
return_documents: bool = True,
top_n: int | None = None,
parameters: dict[str, Any] | None = None,
) -> RerankResult:
"""Rerank documents by relevance to a query.
Args:
model (RerankModel | str): Reranking model name.
query (str): Query text to rank against.
documents (list[str] | list[dict[str, Any]]): Documents to rank.
Strings are auto-wrapped as ``{"text": ...}``.
rank_fields (list[str] | None): Document fields to rank on.
Defaults to ``["text"]``.
return_documents (bool): Include document text in response.
Defaults to ``True``.
top_n (int | None): Number of top documents to return.
``None`` returns all.
parameters (dict[str, Any] | None): Model-specific parameters.
Returns:
A :class:`RerankResult` with ``.data`` and ``.usage``.
Raises:
:exc:`PineconeValueError`: If *model*, *query*, or *documents* is empty.
:exc:`PineconeTypeError`: If *documents* has an invalid type.
:exc:`ApiError`: If the API returns an error response.
:exc:`PineconeConnectionError`: If a network-level connection
fails (DNS, refused, transport error).
:exc:`PineconeTimeoutError`: If the request exceeds the configured timeout.
Examples:
.. code-block:: python
from pinecone import AsyncPinecone
async with AsyncPinecone(api_key="your-api-key") as pc:
result = await pc.inference.rerank(
model="bge-reranker-v2-m3",
query="Tell me about tech companies",
documents=["Apple is a fruit.", "Acme Inc. revolutionized tech."],
top_n=1,
)
"""
require_non_empty("model", str(model))
require_non_empty("query", query)
normalized_docs = normalize_rerank_documents(documents)
rank_fields = rank_fields if rank_fields is not None else ["text"]
body: dict[str, Any] = {
"model": str(model),
"query": query,
"documents": normalized_docs,
"rank_fields": rank_fields,
"return_documents": return_documents,
}
if top_n is not None:
body["top_n"] = top_n
if parameters is not None:
body["parameters"] = parameters
logger.info("Reranking %d documents with model %r", len(normalized_docs), str(model))
response = await self._http.post("/rerank", json=body)
result = self._adapter.to_rerank_result(response.content)
logger.debug("Reranked documents, got %d results", len(result.data))
return result
[docs]
async def list_models(
self,
*,
type: str | None = None,
vector_type: str | None = None,
) -> ModelInfoList:
"""List available inference models.
Args:
type (str | None): Filter by model type (``"embed"`` or ``"rerank"``).
vector_type (str | None): Filter by vector type
(``"dense"`` or ``"sparse"``). Only relevant when ``type="embed"``.
Returns:
A :class:`ModelInfoList` supporting iteration, len(), and ``.names()``.
Raises:
:exc:`PineconeValueError`: If *type* or *vector_type* is not a valid value.
:exc:`ApiError`: If the API returns an error response.
:exc:`PineconeConnectionError`: If a network-level connection
fails (DNS, refused, transport error).
:exc:`PineconeTimeoutError`: If the request exceeds the configured timeout.
Examples:
.. code-block:: python
from pinecone import AsyncPinecone
async with AsyncPinecone(api_key="your-api-key") as pc:
models = await pc.inference.list_models()
models.names()
"""
if type is not None:
require_one_of("type", type, ("embed", "rerank"))
if vector_type is not None:
require_one_of("vector_type", vector_type, ("dense", "sparse"))
params: dict[str, Any] = {}
if type is not None:
params["type"] = type
if vector_type is not None:
params["vector_type"] = vector_type
logger.info("Listing models")
response = await self._http.get("/models", params=params)
result = self._adapter.to_model_info_list(response.content)
logger.debug("Listed %d models", len(result))
return result
[docs]
async def get_model(
self,
*,
model: str | None = None,
**kwargs: str,
) -> ModelInfo:
"""Get detailed information about a specific model.
Args:
model (str): The model identifier to look up.
Returns:
A :class:`ModelInfo` with full model details.
Raises:
:exc:`PineconeValueError`: If *model* is empty.
:exc:`NotFoundError`: If the model does not exist.
:exc:`ApiError`: If the API returns another error response.
:exc:`PineconeConnectionError`: If a network-level connection
fails (DNS, refused, transport error).
:exc:`PineconeTimeoutError`: If the request exceeds the configured timeout.
Examples:
.. code-block:: python
from pinecone import AsyncPinecone
async with AsyncPinecone(api_key="your-api-key") as pc:
model_info = await pc.inference.get_model(
model="multilingual-e5-large",
)
model_info.type
"""
model_name: str | None = kwargs.pop("model_name", None)
if kwargs:
raise TypeError(f"get_model() got unexpected keyword arguments: {sorted(kwargs)!r}")
if model is not None and model_name is not None:
raise ValidationError("Provide either model= or model_name=, not both")
effective: str = model or model_name or ""
require_non_empty("model", effective)
logger.info("Describing model %r", effective)
response = await self._http.get(f"/models/{effective}")
result = self._adapter.to_model_info(response.content)
logger.debug("Described model %r", effective)
return result