Source code for pinecone.models.inference.embed

"""Embedding response models for the Inference API."""

from __future__ import annotations

from collections.abc import Iterator
from typing import Any, cast, overload

import msgspec
from msgspec import Struct

from pinecone.models._mixin import DictLikeStruct, StructDictMixin


class EmbedUsage(StructDictMixin, Struct, kw_only=True):
    """Token usage information for an embedding request.

    Attributes:
        total_tokens: Total number of tokens processed.
    """

    total_tokens: int


[docs] class DenseEmbedding(DictLikeStruct, Struct, kw_only=True): """A dense embedding vector. Attributes: values: The embedding values as a list of floats. vector_type: The type of embedding, always ``"dense"``. """ values: list[float] vector_type: str = "dense" def __repr__(self) -> str: if len(self.values) > 5: preview = ", ".join(repr(v) for v in self.values[:3]) values_str = f"[{preview}, ...{len(self.values) - 3} more]" else: values_str = repr(self.values) return f"DenseEmbedding(values={values_str}, vector_type={self.vector_type!r})"
[docs] class SparseEmbedding(StructDictMixin, Struct, kw_only=True): """A sparse embedding vector. Attributes: sparse_values: The non-zero values of the sparse embedding. sparse_indices: The indices of the non-zero values. sparse_tokens: Optional token strings corresponding to each index. vector_type: The type of embedding, always ``"sparse"``. """ sparse_values: list[float] sparse_indices: list[int] sparse_tokens: list[str] | None = None vector_type: str = "sparse" def __repr__(self) -> str: if len(self.sparse_indices) > 5: idx_preview = ", ".join(repr(v) for v in self.sparse_indices[:3]) indices_str = f"[{idx_preview}, ...{len(self.sparse_indices) - 3} more]" else: indices_str = repr(self.sparse_indices) if len(self.sparse_values) > 5: val_preview = ", ".join(repr(v) for v in self.sparse_values[:3]) values_str = f"[{val_preview}, ...{len(self.sparse_values) - 3} more]" else: values_str = repr(self.sparse_values) parts = [ f"sparse_indices={indices_str}", f"sparse_values={values_str}", f"vector_type={self.vector_type!r}", ] if self.sparse_tokens is not None: parts.insert(2, f"sparse_tokens={self.sparse_tokens!r}") return f"SparseEmbedding({', '.join(parts)})"
Embedding = DenseEmbedding | SparseEmbedding
[docs] class EmbeddingsList(Struct, kw_only=True): """Response from the embed endpoint. Supports integer indexing, iteration, and ``len()`` over the embedded data items, as well as bracket access for field names. Attributes: model: The model used to generate embeddings. vector_type: The type of embeddings returned (``"dense"`` or ``"sparse"``). data: The list of embedding objects. usage: Token usage information. """ model: str vector_type: str data: list[DenseEmbedding] | list[SparseEmbedding] usage: EmbedUsage @overload def __getitem__(self, key: int) -> DenseEmbedding | SparseEmbedding: ... @overload def __getitem__(self, key: str) -> Any: ... def __getitem__(self, key: int | str) -> Any: """Support integer indexing into data and string bracket access. Args: key: An integer index into ``data``, or a string field name. Returns: The embedding at the given index, or the field value. """ if isinstance(key, int): return self.data[key] if key not in self.__struct_fields__: raise KeyError(key) return getattr(self, key) def __contains__(self, key: object) -> bool: """Support ``in`` for field names (str) and embedding membership.""" if isinstance(key, str): return key in self.__struct_fields__ return key in self.data def __len__(self) -> int: return len(self.data) def __iter__(self) -> Iterator[DenseEmbedding | SparseEmbedding]: return iter(self.data)
[docs] def to_dict(self) -> dict[str, Any]: """Return a plain dict representation of this object.""" return cast(dict[str, Any], msgspec.to_builtins(self))
def __getattr__(self, name: str) -> Any: """Raise AttributeError for unknown attributes (backward compat hook).""" raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") def __repr__(self) -> str: return ( f"EmbeddingsList(" f"model={self.model!r}, " f"vector_type={self.vector_type!r}, " f"count={len(self.data)}, " f"usage={self.usage!r})" )