pinecone.grpc.vector_factory_grpc

  1import numbers
  2
  3from collections.abc import Iterable, Mapping
  4from typing import Union, Tuple, Dict
  5
  6from google.protobuf.struct_pb2 import Struct
  7
  8from .utils import dict_to_proto_struct
  9from ..utils import fix_tuple_length, convert_to_list
 10from ..utils.constants import REQUIRED_VECTOR_FIELDS, OPTIONAL_VECTOR_FIELDS
 11from ..data import (
 12    VectorDictionaryMissingKeysError,
 13    VectorDictionaryExcessKeysError,
 14    VectorTupleLengthError,
 15    MetadataDictionaryExpectedError,
 16)
 17from .sparse_values_factory import SparseValuesFactory
 18
 19from pinecone.core.grpc.protos.vector_service_pb2 import (
 20    Vector as GRPCVector,
 21    SparseValues as GRPCSparseValues,
 22)
 23from pinecone import (
 24    Vector as NonGRPCVector,
 25    SparseValues as NonGRPCSparseValues,
 26)
 27
 28
 29class VectorFactoryGRPC:
 30    @staticmethod
 31    def build(item: Union[GRPCVector, NonGRPCVector, Tuple, Dict]) -> GRPCVector:
 32        if isinstance(item, GRPCVector):
 33            return item
 34        elif isinstance(item, NonGRPCVector):
 35            if item.sparse_values:
 36                sv = GRPCSparseValues(
 37                    indices=item.sparse_values.indices,
 38                    values=item.sparse_values.values,
 39                )
 40                return GRPCVector(
 41                    id=item.id,
 42                    values=item.values,
 43                    metadata=dict_to_proto_struct(item.metadata or {}),
 44                    sparse_values=sv,
 45                )
 46            else:
 47                return GRPCVector(
 48                    id=item.id,
 49                    values=item.values,
 50                    metadata=dict_to_proto_struct(item.metadata or {}),
 51                )
 52        elif isinstance(item, tuple):
 53            return VectorFactoryGRPC._tuple_to_vector(item)
 54        elif isinstance(item, Mapping):
 55            return VectorFactoryGRPC._dict_to_vector(item)
 56        else:
 57            raise ValueError(f"Invalid vector value passed: cannot interpret type {type(item)}")
 58
 59    @staticmethod
 60    def _tuple_to_vector(item) -> GRPCVector:
 61        if len(item) < 2 or len(item) > 3:
 62            raise VectorTupleLengthError(item)
 63        id, values, metadata = fix_tuple_length(item, 3)
 64        if isinstance(values, GRPCSparseValues) or isinstance(values, NonGRPCSparseValues):
 65            raise ValueError(
 66                "Sparse values are not supported in tuples. Please use either dicts or Vector objects as inputs."
 67            )
 68        else:
 69            return GRPCVector(
 70                id=id,
 71                values=convert_to_list(values),
 72                metadata=dict_to_proto_struct(metadata or {}),
 73            )
 74
 75    @staticmethod
 76    def _dict_to_vector(item) -> GRPCVector:
 77        item_keys = set(item.keys())
 78        if not item_keys.issuperset(REQUIRED_VECTOR_FIELDS):
 79            raise VectorDictionaryMissingKeysError(item)
 80
 81        excessive_keys = item_keys - (REQUIRED_VECTOR_FIELDS | OPTIONAL_VECTOR_FIELDS)
 82        if len(excessive_keys) > 0:
 83            raise VectorDictionaryExcessKeysError(item)
 84
 85        values = item.get("values")
 86        if "values" in item:
 87            try:
 88                item["values"] = convert_to_list(values)
 89            except TypeError as e:
 90                raise TypeError(f"Column `values` is expected to be a list of floats") from e
 91
 92        sparse_values = item.get("sparse_values")
 93        if sparse_values != None and not isinstance(sparse_values, GRPCSparseValues):
 94            item["sparse_values"] = SparseValuesFactory.build(sparse_values)
 95
 96        metadata = item.get("metadata")
 97        if metadata:
 98            if isinstance(metadata, dict):
 99                item["metadata"] = dict_to_proto_struct(metadata)
100            elif not isinstance(metadata, Struct):
101                raise MetadataDictionaryExpectedError(item)
102        else:
103            item["metadata"] = dict_to_proto_struct({})
104
105        try:
106            return GRPCVector(**item)
107        except TypeError as e:
108            # Where possible raise a more specific error to the user.
109            vid = item.get("id")
110            if not isinstance(vid, bytes) and not isinstance(vid, str):
111                raise TypeError(
112                    f"Cannot set Vector.id to {vid}: {vid} has type {type(vid)}, "
113                    "but expected one of: (<class 'bytes'>, <class 'str'>) for field Vector.id"
114                )
115            if not isinstance(item["values"], Iterable) or not isinstance(
116                item["values"].__iter__().__next__(), numbers.Real
117            ):
118                raise TypeError(f"Column `values` is expected to be a list of floats")
119            raise e
class VectorFactoryGRPC:
 30class VectorFactoryGRPC:
 31    @staticmethod
 32    def build(item: Union[GRPCVector, NonGRPCVector, Tuple, Dict]) -> GRPCVector:
 33        if isinstance(item, GRPCVector):
 34            return item
 35        elif isinstance(item, NonGRPCVector):
 36            if item.sparse_values:
 37                sv = GRPCSparseValues(
 38                    indices=item.sparse_values.indices,
 39                    values=item.sparse_values.values,
 40                )
 41                return GRPCVector(
 42                    id=item.id,
 43                    values=item.values,
 44                    metadata=dict_to_proto_struct(item.metadata or {}),
 45                    sparse_values=sv,
 46                )
 47            else:
 48                return GRPCVector(
 49                    id=item.id,
 50                    values=item.values,
 51                    metadata=dict_to_proto_struct(item.metadata or {}),
 52                )
 53        elif isinstance(item, tuple):
 54            return VectorFactoryGRPC._tuple_to_vector(item)
 55        elif isinstance(item, Mapping):
 56            return VectorFactoryGRPC._dict_to_vector(item)
 57        else:
 58            raise ValueError(f"Invalid vector value passed: cannot interpret type {type(item)}")
 59
 60    @staticmethod
 61    def _tuple_to_vector(item) -> GRPCVector:
 62        if len(item) < 2 or len(item) > 3:
 63            raise VectorTupleLengthError(item)
 64        id, values, metadata = fix_tuple_length(item, 3)
 65        if isinstance(values, GRPCSparseValues) or isinstance(values, NonGRPCSparseValues):
 66            raise ValueError(
 67                "Sparse values are not supported in tuples. Please use either dicts or Vector objects as inputs."
 68            )
 69        else:
 70            return GRPCVector(
 71                id=id,
 72                values=convert_to_list(values),
 73                metadata=dict_to_proto_struct(metadata or {}),
 74            )
 75
 76    @staticmethod
 77    def _dict_to_vector(item) -> GRPCVector:
 78        item_keys = set(item.keys())
 79        if not item_keys.issuperset(REQUIRED_VECTOR_FIELDS):
 80            raise VectorDictionaryMissingKeysError(item)
 81
 82        excessive_keys = item_keys - (REQUIRED_VECTOR_FIELDS | OPTIONAL_VECTOR_FIELDS)
 83        if len(excessive_keys) > 0:
 84            raise VectorDictionaryExcessKeysError(item)
 85
 86        values = item.get("values")
 87        if "values" in item:
 88            try:
 89                item["values"] = convert_to_list(values)
 90            except TypeError as e:
 91                raise TypeError(f"Column `values` is expected to be a list of floats") from e
 92
 93        sparse_values = item.get("sparse_values")
 94        if sparse_values != None and not isinstance(sparse_values, GRPCSparseValues):
 95            item["sparse_values"] = SparseValuesFactory.build(sparse_values)
 96
 97        metadata = item.get("metadata")
 98        if metadata:
 99            if isinstance(metadata, dict):
100                item["metadata"] = dict_to_proto_struct(metadata)
101            elif not isinstance(metadata, Struct):
102                raise MetadataDictionaryExpectedError(item)
103        else:
104            item["metadata"] = dict_to_proto_struct({})
105
106        try:
107            return GRPCVector(**item)
108        except TypeError as e:
109            # Where possible raise a more specific error to the user.
110            vid = item.get("id")
111            if not isinstance(vid, bytes) and not isinstance(vid, str):
112                raise TypeError(
113                    f"Cannot set Vector.id to {vid}: {vid} has type {type(vid)}, "
114                    "but expected one of: (<class 'bytes'>, <class 'str'>) for field Vector.id"
115                )
116            if not isinstance(item["values"], Iterable) or not isinstance(
117                item["values"].__iter__().__next__(), numbers.Real
118            ):
119                raise TypeError(f"Column `values` is expected to be a list of floats")
120            raise e
@staticmethod
def build( item: Union[vector_service_pb2.Vector, pinecone.core.openapi.data.model.vector.Vector, Tuple, Dict]) -> vector_service_pb2.Vector:
31    @staticmethod
32    def build(item: Union[GRPCVector, NonGRPCVector, Tuple, Dict]) -> GRPCVector:
33        if isinstance(item, GRPCVector):
34            return item
35        elif isinstance(item, NonGRPCVector):
36            if item.sparse_values:
37                sv = GRPCSparseValues(
38                    indices=item.sparse_values.indices,
39                    values=item.sparse_values.values,
40                )
41                return GRPCVector(
42                    id=item.id,
43                    values=item.values,
44                    metadata=dict_to_proto_struct(item.metadata or {}),
45                    sparse_values=sv,
46                )
47            else:
48                return GRPCVector(
49                    id=item.id,
50                    values=item.values,
51                    metadata=dict_to_proto_struct(item.metadata or {}),
52                )
53        elif isinstance(item, tuple):
54            return VectorFactoryGRPC._tuple_to_vector(item)
55        elif isinstance(item, Mapping):
56            return VectorFactoryGRPC._dict_to_vector(item)
57        else:
58            raise ValueError(f"Invalid vector value passed: cannot interpret type {type(item)}")