pinecone.data.vector_factory

  1import numbers
  2
  3from collections.abc import Iterable, Mapping
  4from typing import Union, Tuple
  5
  6from ..utils import fix_tuple_length, convert_to_list, parse_non_empty_args
  7from ..utils.constants import REQUIRED_VECTOR_FIELDS, OPTIONAL_VECTOR_FIELDS
  8
  9from .sparse_values_factory import SparseValuesFactory
 10
 11from pinecone.core.openapi.db_data.models import (
 12    Vector as OpenApiVector,
 13    SparseValues as OpenApiSparseValues,
 14)
 15from .dataclasses import Vector
 16
 17from .errors import (
 18    VectorDictionaryMissingKeysError,
 19    VectorDictionaryExcessKeysError,
 20    VectorTupleLengthError,
 21    MetadataDictionaryExpectedError,
 22)
 23
 24from .types import VectorTuple, VectorTypedDict
 25
 26
 27class VectorFactory:
 28    """VectorFactory is used to convert various types of input into vector objects used in generated request code."""
 29
 30    @staticmethod
 31    def build(
 32        item: Union[OpenApiVector, VectorTuple, VectorTypedDict], check_type: bool = True
 33    ) -> OpenApiVector:
 34        if isinstance(item, OpenApiVector):
 35            return item
 36        elif isinstance(item, Vector):
 37            args = parse_non_empty_args(
 38                [
 39                    ("id", item.id),
 40                    ("values", item.values),
 41                    ("metadata", item.metadata),
 42                    ("sparse_values", SparseValuesFactory.build(item.sparse_values)),
 43                ]
 44            )
 45
 46            return OpenApiVector(**args)
 47        elif isinstance(item, tuple):
 48            return VectorFactory._tuple_to_vector(item, check_type)
 49        elif isinstance(item, Mapping):
 50            return VectorFactory._dict_to_vector(item, check_type)
 51        else:
 52            raise ValueError(f"Invalid vector value passed: cannot interpret type {type(item)}")
 53
 54    @staticmethod
 55    def _tuple_to_vector(item: Tuple, check_type: bool) -> OpenApiVector:
 56        if len(item) < 2 or len(item) > 3:
 57            raise VectorTupleLengthError(item)
 58        id, values, metadata = fix_tuple_length(item, 3)
 59        if isinstance(values, OpenApiSparseValues):
 60            raise ValueError(
 61                "Sparse values are not supported in tuples. Please use either dicts or OpenApiVector objects as inputs."
 62            )
 63        else:
 64            return OpenApiVector(
 65                id=id,
 66                values=convert_to_list(values),
 67                metadata=metadata or {},
 68                _check_type=check_type,
 69            )
 70
 71    @staticmethod
 72    def _dict_to_vector(item, check_type: bool) -> OpenApiVector:
 73        item_keys = set(item.keys())
 74        if not item_keys.issuperset(REQUIRED_VECTOR_FIELDS):
 75            raise VectorDictionaryMissingKeysError(item)
 76
 77        if "sparse_values" not in item_keys and "values" not in item_keys:
 78            raise ValueError(
 79                "At least one of 'values' or 'sparse_values' must be provided in the vector dictionary."
 80            )
 81
 82        excessive_keys = item_keys - (REQUIRED_VECTOR_FIELDS | OPTIONAL_VECTOR_FIELDS)
 83        if len(excessive_keys) > 0:
 84            raise VectorDictionaryExcessKeysError(item)
 85
 86        values = item.get("values")
 87        if "values" in item:
 88            item["values"] = convert_to_list(values)
 89        else:
 90            item["values"] = []
 91
 92        sparse_values = item.get("sparse_values")
 93        if sparse_values is None:
 94            item.pop("sparse_values", None)
 95        else:
 96            item["sparse_values"] = SparseValuesFactory.build(sparse_values)
 97
 98        metadata = item.get("metadata")
 99        if metadata and not isinstance(metadata, Mapping):
100            raise MetadataDictionaryExpectedError(item)
101
102        try:
103            return OpenApiVector(**item, _check_type=check_type)
104        except TypeError as e:
105            if not isinstance(item["values"], Iterable) or not isinstance(
106                item["values"].__iter__().__next__(), numbers.Real
107            ):
108                raise TypeError("Column `values` is expected to be a list of floats")
109            raise e
class VectorFactory:
 28class VectorFactory:
 29    """VectorFactory is used to convert various types of input into vector objects used in generated request code."""
 30
 31    @staticmethod
 32    def build(
 33        item: Union[OpenApiVector, VectorTuple, VectorTypedDict], check_type: bool = True
 34    ) -> OpenApiVector:
 35        if isinstance(item, OpenApiVector):
 36            return item
 37        elif isinstance(item, Vector):
 38            args = parse_non_empty_args(
 39                [
 40                    ("id", item.id),
 41                    ("values", item.values),
 42                    ("metadata", item.metadata),
 43                    ("sparse_values", SparseValuesFactory.build(item.sparse_values)),
 44                ]
 45            )
 46
 47            return OpenApiVector(**args)
 48        elif isinstance(item, tuple):
 49            return VectorFactory._tuple_to_vector(item, check_type)
 50        elif isinstance(item, Mapping):
 51            return VectorFactory._dict_to_vector(item, check_type)
 52        else:
 53            raise ValueError(f"Invalid vector value passed: cannot interpret type {type(item)}")
 54
 55    @staticmethod
 56    def _tuple_to_vector(item: Tuple, check_type: bool) -> OpenApiVector:
 57        if len(item) < 2 or len(item) > 3:
 58            raise VectorTupleLengthError(item)
 59        id, values, metadata = fix_tuple_length(item, 3)
 60        if isinstance(values, OpenApiSparseValues):
 61            raise ValueError(
 62                "Sparse values are not supported in tuples. Please use either dicts or OpenApiVector objects as inputs."
 63            )
 64        else:
 65            return OpenApiVector(
 66                id=id,
 67                values=convert_to_list(values),
 68                metadata=metadata or {},
 69                _check_type=check_type,
 70            )
 71
 72    @staticmethod
 73    def _dict_to_vector(item, check_type: bool) -> OpenApiVector:
 74        item_keys = set(item.keys())
 75        if not item_keys.issuperset(REQUIRED_VECTOR_FIELDS):
 76            raise VectorDictionaryMissingKeysError(item)
 77
 78        if "sparse_values" not in item_keys and "values" not in item_keys:
 79            raise ValueError(
 80                "At least one of 'values' or 'sparse_values' must be provided in the vector dictionary."
 81            )
 82
 83        excessive_keys = item_keys - (REQUIRED_VECTOR_FIELDS | OPTIONAL_VECTOR_FIELDS)
 84        if len(excessive_keys) > 0:
 85            raise VectorDictionaryExcessKeysError(item)
 86
 87        values = item.get("values")
 88        if "values" in item:
 89            item["values"] = convert_to_list(values)
 90        else:
 91            item["values"] = []
 92
 93        sparse_values = item.get("sparse_values")
 94        if sparse_values is None:
 95            item.pop("sparse_values", None)
 96        else:
 97            item["sparse_values"] = SparseValuesFactory.build(sparse_values)
 98
 99        metadata = item.get("metadata")
100        if metadata and not isinstance(metadata, Mapping):
101            raise MetadataDictionaryExpectedError(item)
102
103        try:
104            return OpenApiVector(**item, _check_type=check_type)
105        except TypeError as e:
106            if not isinstance(item["values"], Iterable) or not isinstance(
107                item["values"].__iter__().__next__(), numbers.Real
108            ):
109                raise TypeError("Column `values` is expected to be a list of floats")
110            raise e

VectorFactory is used to convert various types of input into vector objects used in generated request code.

@staticmethod
def build( item: Union[pinecone.core.openapi.db_data.model.vector.Vector, Tuple[str, List[float]], pinecone.data.types.vector_typed_dict.VectorTypedDict], check_type: bool = True) -> pinecone.core.openapi.db_data.model.vector.Vector:
31    @staticmethod
32    def build(
33        item: Union[OpenApiVector, VectorTuple, VectorTypedDict], check_type: bool = True
34    ) -> OpenApiVector:
35        if isinstance(item, OpenApiVector):
36            return item
37        elif isinstance(item, Vector):
38            args = parse_non_empty_args(
39                [
40                    ("id", item.id),
41                    ("values", item.values),
42                    ("metadata", item.metadata),
43                    ("sparse_values", SparseValuesFactory.build(item.sparse_values)),
44                ]
45            )
46
47            return OpenApiVector(**args)
48        elif isinstance(item, tuple):
49            return VectorFactory._tuple_to_vector(item, check_type)
50        elif isinstance(item, Mapping):
51            return VectorFactory._dict_to_vector(item, check_type)
52        else:
53            raise ValueError(f"Invalid vector value passed: cannot interpret type {type(item)}")