pinecone.grpc.index_grpc

View Source

  1import logging
  2from typing import Optional, Dict, Union, List, Tuple, Any, TypedDict, cast
  3
  4from google.protobuf import json_format
  5
  6from tqdm.autonotebook import tqdm
  7
  8from .utils import (
  9    dict_to_proto_struct,
 10    parse_fetch_response,
 11    parse_query_response,
 12    parse_stats_response,
 13)
 14from .vector_factory_grpc import VectorFactoryGRPC
 15
 16from pinecone.core.openapi.data.models import (
 17    FetchResponse,
 18    QueryResponse,
 19    DescribeIndexStatsResponse,
 20)
 21from pinecone.models.list_response import (
 22    ListResponse as SimpleListResponse,
 23    Pagination,
 24)
 25from pinecone.core.grpc.protos.vector_service_pb2 import (
 26    Vector as GRPCVector,
 27    QueryVector as GRPCQueryVector,
 28    UpsertRequest,
 29    UpsertResponse,
 30    DeleteRequest,
 31    QueryRequest,
 32    FetchRequest,
 33    UpdateRequest,
 34    ListRequest,
 35    ListResponse,
 36    DescribeIndexStatsRequest,
 37    DeleteResponse,
 38    UpdateResponse,
 39    SparseValues as GRPCSparseValues,
 40)
 41from pinecone import Vector as NonGRPCVector
 42from pinecone.core.grpc.protos.vector_service_pb2_grpc import VectorServiceStub
 43from .base import GRPCIndexBase
 44from .future import PineconeGrpcFuture
 45
 46
 47__all__ = ["GRPCIndex", "GRPCVector", "GRPCQueryVector", "GRPCSparseValues"]
 48
 49_logger = logging.getLogger(__name__)
 50
 51
 52class SparseVectorTypedDict(TypedDict):
 53    indices: List[int]
 54    values: List[float]
 55
 56
 57class GRPCIndex(GRPCIndexBase):
 58    """A client for interacting with a Pinecone index via GRPC API."""
 59
 60    @property
 61    def stub_class(self):
 62        return VectorServiceStub
 63
 64    def upsert(
 65        self,
 66        vectors: Union[List[GRPCVector], List[NonGRPCVector], List[tuple], List[dict]],
 67        async_req: bool = False,
 68        namespace: Optional[str] = None,
 69        batch_size: Optional[int] = None,
 70        show_progress: bool = True,
 71        **kwargs,
 72    ) -> Union[UpsertResponse, PineconeGrpcFuture]:
 73        """
 74        The upsert operation writes vectors into a namespace.
 75        If a new value is upserted for an existing vector id, it will overwrite the previous value.
 76
 77        Examples:
 78            >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}),
 79                              ('id2', [1.0, 2.0, 3.0])
 80                              ],
 81                              namespace='ns1', async_req=True)
 82            >>> index.upsert([{'id': 'id1', 'values': [1.0, 2.0, 3.0], 'metadata': {'key': 'value'}},
 83                              {'id': 'id2',
 84                                        'values': [1.0, 2.0, 3.0],
 85                                        'sparse_values': {'indices': [1, 8], 'values': [0.2, 0.4]},
 86                              ])
 87            >>> index.upsert([GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
 88                              GRPCVector(id='id2', values=[1.0, 2.0, 3.0]),
 89                              GRPCVector(id='id3',
 90                                         values=[1.0, 2.0, 3.0],
 91                                         sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))])
 92
 93        Args:
 94            vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert.
 95
 96                     A vector can be represented by a 1) GRPCVector object, a 2) tuple or 3) a dictionary
 97                     1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values).
 98                        where id is a string, vector is a list of floats, and metadata is a dict.
 99                        Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])
100
101                    2) if a GRPCVector object is used, a GRPCVector object must be of the form
102                        GRPCVector(id, values, metadata), where metadata is an optional argument of type
103                        Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]
104                       Examples: GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
105                                 GRPCVector(id='id2', values=[1.0, 2.0, 3.0]),
106                                 GRPCVector(id='id3',
107                                            values=[1.0, 2.0, 3.0],
108                                            sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))
109
110                    3) if a dictionary is used, it must be in the form
111                       {'id': str, 'values': List[float], 'sparse_values': {'indices': List[int], 'values': List[float]},
112                        'metadata': dict}
113
114                    Note: the dimension of each vector must match the dimension of the index.
115            async_req (bool): If True, the upsert operation will be performed asynchronously.
116                              Cannot be used with batch_size.
117                              Defaults to False. See: https://docs.pinecone.io/docs/performance-tuning [optional]
118            namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional]
119            batch_size (int): The number of vectors to upsert in each batch.
120                                Cannot be used with async_req=True.
121                               If not specified, all vectors will be upserted in a single batch. [optional]
122            show_progress (bool): Whether to show a progress bar using tqdm.
123                                  Applied only if batch_size is provided. Default is True.
124
125        Returns: UpsertResponse, contains the number of vectors upserted
126        """
127        if async_req and batch_size is not None:
128            raise ValueError(
129                "async_req is not supported when batch_size is provided."
130                "To upsert in parallel, please follow: "
131                "https://docs.pinecone.io/docs/performance-tuning"
132            )
133
134        timeout = kwargs.pop("timeout", None)
135
136        vectors = list(map(VectorFactoryGRPC.build, vectors))
137        if async_req:
138            args_dict = self._parse_non_empty_args([("namespace", namespace)])
139            request = UpsertRequest(vectors=vectors, **args_dict, **kwargs)
140            future = self._wrap_grpc_call(self.stub.Upsert.future, request, timeout=timeout)
141            return PineconeGrpcFuture(future)
142
143        if batch_size is None:
144            return self._upsert_batch(vectors, namespace, timeout=timeout, **kwargs)
145
146        if not isinstance(batch_size, int) or batch_size <= 0:
147            raise ValueError("batch_size must be a positive integer")
148
149        pbar = tqdm(
150            total=len(vectors),
151            disable=not show_progress,
152            desc="Upserted vectors",
153        )
154        total_upserted = 0
155        for i in range(0, len(vectors), batch_size):
156            batch_result = self._upsert_batch(
157                vectors[i : i + batch_size],
158                namespace,
159                timeout=timeout,
160                **kwargs,
161            )
162            pbar.update(batch_result.upserted_count)
163            # we can't use here pbar.n for the case show_progress=False
164            total_upserted += batch_result.upserted_count
165
166        return UpsertResponse(upserted_count=total_upserted)
167
168    def _upsert_batch(
169        self,
170        vectors: List[GRPCVector],
171        namespace: Optional[str],
172        timeout: Optional[float],
173        **kwargs,
174    ) -> UpsertResponse:
175        args_dict = self._parse_non_empty_args([("namespace", namespace)])
176        request = UpsertRequest(vectors=vectors, **args_dict)
177        return self._wrap_grpc_call(self.stub.Upsert, request, timeout=timeout, **kwargs)
178
179    def upsert_from_dataframe(
180        self,
181        df,
182        namespace: str = "",
183        batch_size: int = 500,
184        use_async_requests: bool = True,
185        show_progress: bool = True,
186    ) -> UpsertResponse:
187        """Upserts a dataframe into the index.
188
189        Args:
190            df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata.
191            namespace: The namespace to upsert into.
192            batch_size: The number of rows to upsert in a single batch.
193            use_async_requests: Whether to upsert multiple requests at the same time using asynchronous request mechanism.
194                                Set to `False`
195            show_progress: Whether to show a progress bar.
196        """
197        try:
198            import pandas as pd
199        except ImportError:
200            raise RuntimeError(
201                "The `pandas` package is not installed. Please install pandas to use `upsert_from_dataframe()`"
202            )
203
204        if not isinstance(df, pd.DataFrame):
205            raise ValueError(f"Only pandas dataframes are supported. Found: {type(df)}")
206
207        pbar = tqdm(
208            total=len(df),
209            disable=not show_progress,
210            desc="sending upsert requests",
211        )
212        results = []
213        for chunk in self._iter_dataframe(df, batch_size=batch_size):
214            res = self.upsert(
215                vectors=chunk,
216                namespace=namespace,
217                async_req=use_async_requests,
218            )
219            pbar.update(len(chunk))
220            results.append(res)
221
222        if use_async_requests:
223            cast_results = cast(List[PineconeGrpcFuture], results)
224            results = [
225                async_result.result()
226                for async_result in tqdm(
227                    cast_results,
228                    disable=not show_progress,
229                    desc="collecting async responses",
230                )
231            ]
232
233        upserted_count = 0
234        for res in results:
235            if hasattr(res, "upserted_count") and isinstance(res.upserted_count, int):
236                upserted_count += res.upserted_count
237
238        return UpsertResponse(upserted_count=upserted_count)
239
240    @staticmethod
241    def _iter_dataframe(df, batch_size):
242        for i in range(0, len(df), batch_size):
243            batch = df.iloc[i : i + batch_size].to_dict(orient="records")
244            yield batch
245
246    def delete(
247        self,
248        ids: Optional[List[str]] = None,
249        delete_all: Optional[bool] = None,
250        namespace: Optional[str] = None,
251        filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
252        async_req: bool = False,
253        **kwargs,
254    ) -> Union[DeleteResponse, PineconeGrpcFuture]:
255        """
256        The Delete operation deletes vectors from the index, from a single namespace.
257        No error raised if the vector id does not exist.
258        Note: for any delete call, if namespace is not specified, the default namespace is used.
259
260        Delete can occur in the following mutual exclusive ways:
261        1. Delete by ids from a single namespace
262        2. Delete all vectors from a single namespace by setting delete_all to True
263        3. Delete all vectors from a single namespace by specifying a metadata filter
264           (note that for this option delete all must be set to False)
265
266        Examples:
267            >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace')
268            >>> index.delete(delete_all=True, namespace='my_namespace')
269            >>> index.delete(filter={'key': 'value'}, namespace='my_namespace', async_req=True)
270
271        Args:
272            ids (List[str]): Vector ids to delete [optional]
273            delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional]
274                               Default is False.
275            namespace (str): The namespace to delete vectors from [optional]
276                             If not specified, the default namespace is used.
277            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
278                    If specified, the metadata filter here will be used to select the vectors to delete.
279                    This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True.
280                     See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
281            async_req (bool): If True, the delete operation will be performed asynchronously.
282                              Defaults to False. [optional]
283
284        Returns: DeleteResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.
285        """
286
287        if filter is not None:
288            filter_struct = dict_to_proto_struct(filter)
289        else:
290            filter_struct = None
291
292        args_dict = self._parse_non_empty_args(
293            [
294                ("ids", ids),
295                ("delete_all", delete_all),
296                ("namespace", namespace),
297                ("filter", filter_struct),
298            ]
299        )
300        timeout = kwargs.pop("timeout", None)
301
302        request = DeleteRequest(**args_dict, **kwargs)
303        if async_req:
304            future = self._wrap_grpc_call(self.stub.Delete.future, request, timeout=timeout)
305            return PineconeGrpcFuture(future)
306        else:
307            return self._wrap_grpc_call(self.stub.Delete, request, timeout=timeout)
308
309    def fetch(
310        self,
311        ids: Optional[List[str]],
312        namespace: Optional[str] = None,
313        **kwargs,
314    ) -> FetchResponse:
315        """
316        The fetch operation looks up and returns vectors, by ID, from a single namespace.
317        The returned vectors include the vector data and/or metadata.
318
319        Examples:
320            >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace')
321            >>> index.fetch(ids=['id1', 'id2'])
322
323        Args:
324            ids (List[str]): The vector IDs to fetch.
325            namespace (str): The namespace to fetch vectors from.
326                             If not specified, the default namespace is used. [optional]
327
328        Returns: FetchResponse object which contains the list of Vector objects, and namespace name.
329        """
330        timeout = kwargs.pop("timeout", None)
331
332        args_dict = self._parse_non_empty_args([("namespace", namespace)])
333
334        request = FetchRequest(ids=ids, **args_dict, **kwargs)
335        response = self._wrap_grpc_call(self.stub.Fetch, request, timeout=timeout)
336        json_response = json_format.MessageToDict(response)
337        return parse_fetch_response(json_response)
338
339    def query(
340        self,
341        vector: Optional[List[float]] = None,
342        id: Optional[str] = None,
343        namespace: Optional[str] = None,
344        top_k: Optional[int] = None,
345        filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
346        include_values: Optional[bool] = None,
347        include_metadata: Optional[bool] = None,
348        sparse_vector: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None,
349        **kwargs,
350    ) -> QueryResponse:
351        """
352        The Query operation searches a namespace, using a query vector.
353        It retrieves the ids of the most similar items in a namespace, along with their similarity scores.
354
355        Examples:
356            >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace')
357            >>> index.query(id='id1', top_k=10, namespace='my_namespace')
358            >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'})
359            >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True)
360            >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]},
361            >>>             top_k=10, namespace='my_namespace')
362            >>> index.query(vector=[1, 2, 3], sparse_vector=GRPCSparseValues([1, 2], [0.2, 0.4]),
363            >>>             top_k=10, namespace='my_namespace')
364
365        Args:
366            vector (List[float]): The query vector. This should be the same length as the dimension of the index
367                                  being queried. Each `query()` request can contain only one of the parameters
368                                  `id` or `vector`.. [optional]
369            id (str): The unique ID of the vector to be used as a query vector.
370                      Each `query()` request can contain only one of the parameters
371                      `vector` or  `id`.. [optional]
372            top_k (int): The number of results to return for each query. Must be an integer greater than 1.
373            namespace (str): The namespace to fetch vectors from.
374                             If not specified, the default namespace is used. [optional]
375            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
376                    The filter to apply. You can use vector metadata to limit your search.
377                    See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
378            include_values (bool): Indicates whether vector values are included in the response.
379                                   If omitted the server will use the default value of False [optional]
380            include_metadata (bool): Indicates whether metadata is included in the response as well as the ids.
381                                     If omitted the server will use the default value of False  [optional]
382            sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector.
383                            Expected to be either a GRPCSparseValues object or a dict of the form:
384                             {'indices': List[int], 'values': List[float]}, where the lists each have the same length.
385
386        Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects,
387                 and namespace name.
388        """
389
390        if vector is not None and id is not None:
391            raise ValueError("Cannot specify both `id` and `vector`")
392
393        if filter is not None:
394            filter_struct = dict_to_proto_struct(filter)
395        else:
396            filter_struct = None
397
398        sparse_vector = self._parse_sparse_values_arg(sparse_vector)
399        args_dict = self._parse_non_empty_args(
400            [
401                ("vector", vector),
402                ("id", id),
403                ("namespace", namespace),
404                ("top_k", top_k),
405                ("filter", filter_struct),
406                ("include_values", include_values),
407                ("include_metadata", include_metadata),
408                ("sparse_vector", sparse_vector),
409            ]
410        )
411
412        request = QueryRequest(**args_dict)
413
414        timeout = kwargs.pop("timeout", None)
415        response = self._wrap_grpc_call(self.stub.Query, request, timeout=timeout)
416        json_response = json_format.MessageToDict(response)
417        return parse_query_response(json_response, _check_type=False)
418
419    def update(
420        self,
421        id: str,
422        async_req: bool = False,
423        values: Optional[List[float]] = None,
424        set_metadata: Optional[
425            Dict[
426                str,
427                Union[str, float, int, bool, List[int], List[float], List[str]],
428            ]
429        ] = None,
430        namespace: Optional[str] = None,
431        sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None,
432        **kwargs,
433    ) -> Union[UpdateResponse, PineconeGrpcFuture]:
434        """
435        The Update operation updates vector in a namespace.
436        If a value is included, it will overwrite the previous value.
437        If a set_metadata is included,
438        the values of the fields specified in it will be added or overwrite the previous value.
439
440        Examples:
441            >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
442            >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace', async_req=True)
443            >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
444            >>>              namespace='my_namespace')
445            >>> index.update(id='id1', values=[1, 2, 3], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]),
446            >>>              namespace='my_namespace')
447
448        Args:
449            id (str): Vector's unique id.
450            async_req (bool): If True, the update operation will be performed asynchronously.
451                              Defaults to False. [optional]
452            values (List[float]): vector values to set. [optional]
453            set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
454                metadata to set for vector. [optional]
455            namespace (str): Namespace name where to update the vector.. [optional]
456            sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector.
457                           Expected to be either a GRPCSparseValues object or a dict of the form:
458                           {'indices': List[int], 'values': List[float]} where the lists each have the same length.
459
460
461        Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.
462        """
463        if set_metadata is not None:
464            set_metadata_struct = dict_to_proto_struct(set_metadata)
465        else:
466            set_metadata_struct = None
467
468        timeout = kwargs.pop("timeout", None)
469        sparse_values = self._parse_sparse_values_arg(sparse_values)
470        args_dict = self._parse_non_empty_args(
471            [
472                ("values", values),
473                ("set_metadata", set_metadata_struct),
474                ("namespace", namespace),
475                ("sparse_values", sparse_values),
476            ]
477        )
478
479        request = UpdateRequest(id=id, **args_dict)
480        if async_req:
481            future = self._wrap_grpc_call(self.stub.Update.future, request, timeout=timeout)
482            return PineconeGrpcFuture(future)
483        else:
484            return self._wrap_grpc_call(self.stub.Update, request, timeout=timeout)
485
486    def list_paginated(
487        self,
488        prefix: Optional[str] = None,
489        limit: Optional[int] = None,
490        pagination_token: Optional[str] = None,
491        namespace: Optional[str] = None,
492        **kwargs,
493    ) -> SimpleListResponse:
494        """
495        The list_paginated operation finds vectors based on an id prefix within a single namespace.
496        It returns matching ids in a paginated form, with a pagination token to fetch the next page of results.
497        This id list can then be passed to fetch or delete operations, depending on your use case.
498
499        Consider using the `list` method to avoid having to handle pagination tokens manually.
500
501        Examples:
502            >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace')
503            >>> [v.id for v in results.vectors]
504            ['99', '990', '991', '992', '993']
505            >>> results.pagination.next
506            eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9
507            >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next)
508
509        Args:
510            prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will
511                                    be used with the effect of listing all ids in a namespace [optional]
512            limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
513            pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned
514                in the response if additional results are available. [optional]
515            namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
516
517        Returns: SimpleListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed.
518        """
519        args_dict = self._parse_non_empty_args(
520            [
521                ("prefix", prefix),
522                ("limit", limit),
523                ("namespace", namespace),
524                ("pagination_token", pagination_token),
525            ]
526        )
527        request = ListRequest(**args_dict, **kwargs)
528        timeout = kwargs.pop("timeout", None)
529        response = self._wrap_grpc_call(self.stub.List, request, timeout=timeout)
530
531        if response.pagination and response.pagination.next != "":
532            pagination = Pagination(next=response.pagination.next)
533        else:
534            pagination = None
535
536        return SimpleListResponse(
537            namespace=response.namespace,
538            vectors=response.vectors,
539            pagination=pagination,
540        )
541
542    def list(self, **kwargs):
543        """
544        The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields
545        a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your
546        behalf.
547
548        Examples:
549            >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'):
550            >>>     print(ids)
551            ['99', '990', '991', '992', '993']
552            ['994', '995', '996', '997', '998']
553            ['999']
554
555        Args:
556            prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will
557                                    be used with the effect of listing all ids in a namespace [optional]
558            limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
559            pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned
560                in the response if additional results are available. [optional]
561            namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
562        """
563        done = False
564        while not done:
565            try:
566                results = self.list_paginated(**kwargs)
567            except Exception as e:
568                raise e
569
570            if len(results.vectors) > 0:
571                yield [v.id for v in results.vectors]
572
573            if results.pagination and results.pagination.next:
574                kwargs.update({"pagination_token": results.pagination.next})
575            else:
576                done = True
577
578    def describe_index_stats(
579        self,
580        filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
581        **kwargs,
582    ) -> DescribeIndexStatsResponse:
583        """
584        The DescribeIndexStats operation returns statistics about the index's contents.
585        For example: The vector count per namespace and the number of dimensions.
586
587        Examples:
588            >>> index.describe_index_stats()
589            >>> index.describe_index_stats(filter={'key': 'value'})
590
591        Args:
592            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
593            If this parameter is present, the operation only returns statistics for vectors that satisfy the filter.
594            See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
595
596        Returns: DescribeIndexStatsResponse object which contains stats about the index.
597        """
598        if filter is not None:
599            filter_struct = dict_to_proto_struct(filter)
600        else:
601            filter_struct = None
602        args_dict = self._parse_non_empty_args([("filter", filter_struct)])
603        timeout = kwargs.pop("timeout", None)
604
605        request = DescribeIndexStatsRequest(**args_dict)
606        response = self._wrap_grpc_call(self.stub.DescribeIndexStats, request, timeout=timeout)
607        json_response = json_format.MessageToDict(response)
608        return parse_stats_response(json_response)
609
610    @staticmethod
611    def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]:
612        return {arg_name: val for arg_name, val in args if val is not None}
613
614    @staticmethod
615    def _parse_sparse_values_arg(
616        sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]]
617    ) -> Optional[GRPCSparseValues]:
618        if sparse_values is None:
619            return None
620
621        if isinstance(sparse_values, GRPCSparseValues):
622            return sparse_values
623
624        if not isinstance(sparse_values, dict) or "indices" not in sparse_values or "values" not in sparse_values:
625            raise ValueError(
626                "Invalid sparse values argument. Expected a dict of: {'indices': List[int], 'values': List[float]}."
627                f"Received: {sparse_values}"
628            )
629
630        return GRPCSparseValues(indices=sparse_values["indices"], values=sparse_values["values"])

class GRPCIndex(pinecone.grpc.base.GRPCIndexBase): View Source

 58class GRPCIndex(GRPCIndexBase):
 59    """A client for interacting with a Pinecone index via GRPC API."""
 60
 61    @property
 62    def stub_class(self):
 63        return VectorServiceStub
 64
 65    def upsert(
 66        self,
 67        vectors: Union[List[GRPCVector], List[NonGRPCVector], List[tuple], List[dict]],
 68        async_req: bool = False,
 69        namespace: Optional[str] = None,
 70        batch_size: Optional[int] = None,
 71        show_progress: bool = True,
 72        **kwargs,
 73    ) -> Union[UpsertResponse, PineconeGrpcFuture]:
 74        """
 75        The upsert operation writes vectors into a namespace.
 76        If a new value is upserted for an existing vector id, it will overwrite the previous value.
 77
 78        Examples:
 79            >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}),
 80                              ('id2', [1.0, 2.0, 3.0])
 81                              ],
 82                              namespace='ns1', async_req=True)
 83            >>> index.upsert([{'id': 'id1', 'values': [1.0, 2.0, 3.0], 'metadata': {'key': 'value'}},
 84                              {'id': 'id2',
 85                                        'values': [1.0, 2.0, 3.0],
 86                                        'sparse_values': {'indices': [1, 8], 'values': [0.2, 0.4]},
 87                              ])
 88            >>> index.upsert([GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
 89                              GRPCVector(id='id2', values=[1.0, 2.0, 3.0]),
 90                              GRPCVector(id='id3',
 91                                         values=[1.0, 2.0, 3.0],
 92                                         sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))])
 93
 94        Args:
 95            vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert.
 96
 97                     A vector can be represented by a 1) GRPCVector object, a 2) tuple or 3) a dictionary
 98                     1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values).
 99                        where id is a string, vector is a list of floats, and metadata is a dict.
100                        Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])
101
102                    2) if a GRPCVector object is used, a GRPCVector object must be of the form
103                        GRPCVector(id, values, metadata), where metadata is an optional argument of type
104                        Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]
105                       Examples: GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
106                                 GRPCVector(id='id2', values=[1.0, 2.0, 3.0]),
107                                 GRPCVector(id='id3',
108                                            values=[1.0, 2.0, 3.0],
109                                            sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))
110
111                    3) if a dictionary is used, it must be in the form
112                       {'id': str, 'values': List[float], 'sparse_values': {'indices': List[int], 'values': List[float]},
113                        'metadata': dict}
114
115                    Note: the dimension of each vector must match the dimension of the index.
116            async_req (bool): If True, the upsert operation will be performed asynchronously.
117                              Cannot be used with batch_size.
118                              Defaults to False. See: https://docs.pinecone.io/docs/performance-tuning [optional]
119            namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional]
120            batch_size (int): The number of vectors to upsert in each batch.
121                                Cannot be used with async_req=True.
122                               If not specified, all vectors will be upserted in a single batch. [optional]
123            show_progress (bool): Whether to show a progress bar using tqdm.
124                                  Applied only if batch_size is provided. Default is True.
125
126        Returns: UpsertResponse, contains the number of vectors upserted
127        """
128        if async_req and batch_size is not None:
129            raise ValueError(
130                "async_req is not supported when batch_size is provided."
131                "To upsert in parallel, please follow: "
132                "https://docs.pinecone.io/docs/performance-tuning"
133            )
134
135        timeout = kwargs.pop("timeout", None)
136
137        vectors = list(map(VectorFactoryGRPC.build, vectors))
138        if async_req:
139            args_dict = self._parse_non_empty_args([("namespace", namespace)])
140            request = UpsertRequest(vectors=vectors, **args_dict, **kwargs)
141            future = self._wrap_grpc_call(self.stub.Upsert.future, request, timeout=timeout)
142            return PineconeGrpcFuture(future)
143
144        if batch_size is None:
145            return self._upsert_batch(vectors, namespace, timeout=timeout, **kwargs)
146
147        if not isinstance(batch_size, int) or batch_size <= 0:
148            raise ValueError("batch_size must be a positive integer")
149
150        pbar = tqdm(
151            total=len(vectors),
152            disable=not show_progress,
153            desc="Upserted vectors",
154        )
155        total_upserted = 0
156        for i in range(0, len(vectors), batch_size):
157            batch_result = self._upsert_batch(
158                vectors[i : i + batch_size],
159                namespace,
160                timeout=timeout,
161                **kwargs,
162            )
163            pbar.update(batch_result.upserted_count)
164            # we can't use here pbar.n for the case show_progress=False
165            total_upserted += batch_result.upserted_count
166
167        return UpsertResponse(upserted_count=total_upserted)
168
169    def _upsert_batch(
170        self,
171        vectors: List[GRPCVector],
172        namespace: Optional[str],
173        timeout: Optional[float],
174        **kwargs,
175    ) -> UpsertResponse:
176        args_dict = self._parse_non_empty_args([("namespace", namespace)])
177        request = UpsertRequest(vectors=vectors, **args_dict)
178        return self._wrap_grpc_call(self.stub.Upsert, request, timeout=timeout, **kwargs)
179
180    def upsert_from_dataframe(
181        self,
182        df,
183        namespace: str = "",
184        batch_size: int = 500,
185        use_async_requests: bool = True,
186        show_progress: bool = True,
187    ) -> UpsertResponse:
188        """Upserts a dataframe into the index.
189
190        Args:
191            df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata.
192            namespace: The namespace to upsert into.
193            batch_size: The number of rows to upsert in a single batch.
194            use_async_requests: Whether to upsert multiple requests at the same time using asynchronous request mechanism.
195                                Set to `False`
196            show_progress: Whether to show a progress bar.
197        """
198        try:
199            import pandas as pd
200        except ImportError:
201            raise RuntimeError(
202                "The `pandas` package is not installed. Please install pandas to use `upsert_from_dataframe()`"
203            )
204
205        if not isinstance(df, pd.DataFrame):
206            raise ValueError(f"Only pandas dataframes are supported. Found: {type(df)}")
207
208        pbar = tqdm(
209            total=len(df),
210            disable=not show_progress,
211            desc="sending upsert requests",
212        )
213        results = []
214        for chunk in self._iter_dataframe(df, batch_size=batch_size):
215            res = self.upsert(
216                vectors=chunk,
217                namespace=namespace,
218                async_req=use_async_requests,
219            )
220            pbar.update(len(chunk))
221            results.append(res)
222
223        if use_async_requests:
224            cast_results = cast(List[PineconeGrpcFuture], results)
225            results = [
226                async_result.result()
227                for async_result in tqdm(
228                    cast_results,
229                    disable=not show_progress,
230                    desc="collecting async responses",
231                )
232            ]
233
234        upserted_count = 0
235        for res in results:
236            if hasattr(res, "upserted_count") and isinstance(res.upserted_count, int):
237                upserted_count += res.upserted_count
238
239        return UpsertResponse(upserted_count=upserted_count)
240
241    @staticmethod
242    def _iter_dataframe(df, batch_size):
243        for i in range(0, len(df), batch_size):
244            batch = df.iloc[i : i + batch_size].to_dict(orient="records")
245            yield batch
246
247    def delete(
248        self,
249        ids: Optional[List[str]] = None,
250        delete_all: Optional[bool] = None,
251        namespace: Optional[str] = None,
252        filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
253        async_req: bool = False,
254        **kwargs,
255    ) -> Union[DeleteResponse, PineconeGrpcFuture]:
256        """
257        The Delete operation deletes vectors from the index, from a single namespace.
258        No error raised if the vector id does not exist.
259        Note: for any delete call, if namespace is not specified, the default namespace is used.
260
261        Delete can occur in the following mutual exclusive ways:
262        1. Delete by ids from a single namespace
263        2. Delete all vectors from a single namespace by setting delete_all to True
264        3. Delete all vectors from a single namespace by specifying a metadata filter
265           (note that for this option delete all must be set to False)
266
267        Examples:
268            >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace')
269            >>> index.delete(delete_all=True, namespace='my_namespace')
270            >>> index.delete(filter={'key': 'value'}, namespace='my_namespace', async_req=True)
271
272        Args:
273            ids (List[str]): Vector ids to delete [optional]
274            delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional]
275                               Default is False.
276            namespace (str): The namespace to delete vectors from [optional]
277                             If not specified, the default namespace is used.
278            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
279                    If specified, the metadata filter here will be used to select the vectors to delete.
280                    This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True.
281                     See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
282            async_req (bool): If True, the delete operation will be performed asynchronously.
283                              Defaults to False. [optional]
284
285        Returns: DeleteResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.
286        """
287
288        if filter is not None:
289            filter_struct = dict_to_proto_struct(filter)
290        else:
291            filter_struct = None
292
293        args_dict = self._parse_non_empty_args(
294            [
295                ("ids", ids),
296                ("delete_all", delete_all),
297                ("namespace", namespace),
298                ("filter", filter_struct),
299            ]
300        )
301        timeout = kwargs.pop("timeout", None)
302
303        request = DeleteRequest(**args_dict, **kwargs)
304        if async_req:
305            future = self._wrap_grpc_call(self.stub.Delete.future, request, timeout=timeout)
306            return PineconeGrpcFuture(future)
307        else:
308            return self._wrap_grpc_call(self.stub.Delete, request, timeout=timeout)
309
310    def fetch(
311        self,
312        ids: Optional[List[str]],
313        namespace: Optional[str] = None,
314        **kwargs,
315    ) -> FetchResponse:
316        """
317        The fetch operation looks up and returns vectors, by ID, from a single namespace.
318        The returned vectors include the vector data and/or metadata.
319
320        Examples:
321            >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace')
322            >>> index.fetch(ids=['id1', 'id2'])
323
324        Args:
325            ids (List[str]): The vector IDs to fetch.
326            namespace (str): The namespace to fetch vectors from.
327                             If not specified, the default namespace is used. [optional]
328
329        Returns: FetchResponse object which contains the list of Vector objects, and namespace name.
330        """
331        timeout = kwargs.pop("timeout", None)
332
333        args_dict = self._parse_non_empty_args([("namespace", namespace)])
334
335        request = FetchRequest(ids=ids, **args_dict, **kwargs)
336        response = self._wrap_grpc_call(self.stub.Fetch, request, timeout=timeout)
337        json_response = json_format.MessageToDict(response)
338        return parse_fetch_response(json_response)
339
340    def query(
341        self,
342        vector: Optional[List[float]] = None,
343        id: Optional[str] = None,
344        namespace: Optional[str] = None,
345        top_k: Optional[int] = None,
346        filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
347        include_values: Optional[bool] = None,
348        include_metadata: Optional[bool] = None,
349        sparse_vector: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None,
350        **kwargs,
351    ) -> QueryResponse:
352        """
353        The Query operation searches a namespace, using a query vector.
354        It retrieves the ids of the most similar items in a namespace, along with their similarity scores.
355
356        Examples:
357            >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace')
358            >>> index.query(id='id1', top_k=10, namespace='my_namespace')
359            >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'})
360            >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True)
361            >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]},
362            >>>             top_k=10, namespace='my_namespace')
363            >>> index.query(vector=[1, 2, 3], sparse_vector=GRPCSparseValues([1, 2], [0.2, 0.4]),
364            >>>             top_k=10, namespace='my_namespace')
365
366        Args:
367            vector (List[float]): The query vector. This should be the same length as the dimension of the index
368                                  being queried. Each `query()` request can contain only one of the parameters
369                                  `id` or `vector`.. [optional]
370            id (str): The unique ID of the vector to be used as a query vector.
371                      Each `query()` request can contain only one of the parameters
372                      `vector` or  `id`.. [optional]
373            top_k (int): The number of results to return for each query. Must be an integer greater than 1.
374            namespace (str): The namespace to fetch vectors from.
375                             If not specified, the default namespace is used. [optional]
376            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
377                    The filter to apply. You can use vector metadata to limit your search.
378                    See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
379            include_values (bool): Indicates whether vector values are included in the response.
380                                   If omitted the server will use the default value of False [optional]
381            include_metadata (bool): Indicates whether metadata is included in the response as well as the ids.
382                                     If omitted the server will use the default value of False  [optional]
383            sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector.
384                            Expected to be either a GRPCSparseValues object or a dict of the form:
385                             {'indices': List[int], 'values': List[float]}, where the lists each have the same length.
386
387        Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects,
388                 and namespace name.
389        """
390
391        if vector is not None and id is not None:
392            raise ValueError("Cannot specify both `id` and `vector`")
393
394        if filter is not None:
395            filter_struct = dict_to_proto_struct(filter)
396        else:
397            filter_struct = None
398
399        sparse_vector = self._parse_sparse_values_arg(sparse_vector)
400        args_dict = self._parse_non_empty_args(
401            [
402                ("vector", vector),
403                ("id", id),
404                ("namespace", namespace),
405                ("top_k", top_k),
406                ("filter", filter_struct),
407                ("include_values", include_values),
408                ("include_metadata", include_metadata),
409                ("sparse_vector", sparse_vector),
410            ]
411        )
412
413        request = QueryRequest(**args_dict)
414
415        timeout = kwargs.pop("timeout", None)
416        response = self._wrap_grpc_call(self.stub.Query, request, timeout=timeout)
417        json_response = json_format.MessageToDict(response)
418        return parse_query_response(json_response, _check_type=False)
419
420    def update(
421        self,
422        id: str,
423        async_req: bool = False,
424        values: Optional[List[float]] = None,
425        set_metadata: Optional[
426            Dict[
427                str,
428                Union[str, float, int, bool, List[int], List[float], List[str]],
429            ]
430        ] = None,
431        namespace: Optional[str] = None,
432        sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None,
433        **kwargs,
434    ) -> Union[UpdateResponse, PineconeGrpcFuture]:
435        """
436        The Update operation updates vector in a namespace.
437        If a value is included, it will overwrite the previous value.
438        If a set_metadata is included,
439        the values of the fields specified in it will be added or overwrite the previous value.
440
441        Examples:
442            >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
443            >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace', async_req=True)
444            >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
445            >>>              namespace='my_namespace')
446            >>> index.update(id='id1', values=[1, 2, 3], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]),
447            >>>              namespace='my_namespace')
448
449        Args:
450            id (str): Vector's unique id.
451            async_req (bool): If True, the update operation will be performed asynchronously.
452                              Defaults to False. [optional]
453            values (List[float]): vector values to set. [optional]
454            set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
455                metadata to set for vector. [optional]
456            namespace (str): Namespace name where to update the vector.. [optional]
457            sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector.
458                           Expected to be either a GRPCSparseValues object or a dict of the form:
459                           {'indices': List[int], 'values': List[float]} where the lists each have the same length.
460
461
462        Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.
463        """
464        if set_metadata is not None:
465            set_metadata_struct = dict_to_proto_struct(set_metadata)
466        else:
467            set_metadata_struct = None
468
469        timeout = kwargs.pop("timeout", None)
470        sparse_values = self._parse_sparse_values_arg(sparse_values)
471        args_dict = self._parse_non_empty_args(
472            [
473                ("values", values),
474                ("set_metadata", set_metadata_struct),
475                ("namespace", namespace),
476                ("sparse_values", sparse_values),
477            ]
478        )
479
480        request = UpdateRequest(id=id, **args_dict)
481        if async_req:
482            future = self._wrap_grpc_call(self.stub.Update.future, request, timeout=timeout)
483            return PineconeGrpcFuture(future)
484        else:
485            return self._wrap_grpc_call(self.stub.Update, request, timeout=timeout)
486
487    def list_paginated(
488        self,
489        prefix: Optional[str] = None,
490        limit: Optional[int] = None,
491        pagination_token: Optional[str] = None,
492        namespace: Optional[str] = None,
493        **kwargs,
494    ) -> SimpleListResponse:
495        """
496        The list_paginated operation finds vectors based on an id prefix within a single namespace.
497        It returns matching ids in a paginated form, with a pagination token to fetch the next page of results.
498        This id list can then be passed to fetch or delete operations, depending on your use case.
499
500        Consider using the `list` method to avoid having to handle pagination tokens manually.
501
502        Examples:
503            >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace')
504            >>> [v.id for v in results.vectors]
505            ['99', '990', '991', '992', '993']
506            >>> results.pagination.next
507            eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9
508            >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next)
509
510        Args:
511            prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will
512                                    be used with the effect of listing all ids in a namespace [optional]
513            limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
514            pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned
515                in the response if additional results are available. [optional]
516            namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
517
518        Returns: SimpleListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed.
519        """
520        args_dict = self._parse_non_empty_args(
521            [
522                ("prefix", prefix),
523                ("limit", limit),
524                ("namespace", namespace),
525                ("pagination_token", pagination_token),
526            ]
527        )
528        request = ListRequest(**args_dict, **kwargs)
529        timeout = kwargs.pop("timeout", None)
530        response = self._wrap_grpc_call(self.stub.List, request, timeout=timeout)
531
532        if response.pagination and response.pagination.next != "":
533            pagination = Pagination(next=response.pagination.next)
534        else:
535            pagination = None
536
537        return SimpleListResponse(
538            namespace=response.namespace,
539            vectors=response.vectors,
540            pagination=pagination,
541        )
542
543    def list(self, **kwargs):
544        """
545        The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields
546        a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your
547        behalf.
548
549        Examples:
550            >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'):
551            >>>     print(ids)
552            ['99', '990', '991', '992', '993']
553            ['994', '995', '996', '997', '998']
554            ['999']
555
556        Args:
557            prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will
558                                    be used with the effect of listing all ids in a namespace [optional]
559            limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
560            pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned
561                in the response if additional results are available. [optional]
562            namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
563        """
564        done = False
565        while not done:
566            try:
567                results = self.list_paginated(**kwargs)
568            except Exception as e:
569                raise e
570
571            if len(results.vectors) > 0:
572                yield [v.id for v in results.vectors]
573
574            if results.pagination and results.pagination.next:
575                kwargs.update({"pagination_token": results.pagination.next})
576            else:
577                done = True
578
579    def describe_index_stats(
580        self,
581        filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
582        **kwargs,
583    ) -> DescribeIndexStatsResponse:
584        """
585        The DescribeIndexStats operation returns statistics about the index's contents.
586        For example: The vector count per namespace and the number of dimensions.
587
588        Examples:
589            >>> index.describe_index_stats()
590            >>> index.describe_index_stats(filter={'key': 'value'})
591
592        Args:
593            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
594            If this parameter is present, the operation only returns statistics for vectors that satisfy the filter.
595            See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
596
597        Returns: DescribeIndexStatsResponse object which contains stats about the index.
598        """
599        if filter is not None:
600            filter_struct = dict_to_proto_struct(filter)
601        else:
602            filter_struct = None
603        args_dict = self._parse_non_empty_args([("filter", filter_struct)])
604        timeout = kwargs.pop("timeout", None)
605
606        request = DescribeIndexStatsRequest(**args_dict)
607        response = self._wrap_grpc_call(self.stub.DescribeIndexStats, request, timeout=timeout)
608        json_response = json_format.MessageToDict(response)
609        return parse_stats_response(json_response)
610
611    @staticmethod
612    def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]:
613        return {arg_name: val for arg_name, val in args if val is not None}
614
615    @staticmethod
616    def _parse_sparse_values_arg(
617        sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]]
618    ) -> Optional[GRPCSparseValues]:
619        if sparse_values is None:
620            return None
621
622        if isinstance(sparse_values, GRPCSparseValues):
623            return sparse_values
624
625        if not isinstance(sparse_values, dict) or "indices" not in sparse_values or "values" not in sparse_values:
626            raise ValueError(
627                "Invalid sparse values argument. Expected a dict of: {'indices': List[int], 'values': List[float]}."
628                f"Received: {sparse_values}"
629            )
630
631        return GRPCSparseValues(indices=sparse_values["indices"], values=sparse_values["values"])

A client for interacting with a Pinecone index via GRPC API.

stub_class View Source

61    @property
62    def stub_class(self):
63        return VectorServiceStub

def upsert( self, vectors: Union[List[vector_service_pb2.Vector], List[pinecone.core.openapi.data.model.vector.Vector], List[tuple], List[dict]], async_req: bool = False, namespace: Optional[str] = None, batch_size: Optional[int] = None, show_progress: bool = True, **kwargs) -> Union[vector_service_pb2.UpsertResponse, pinecone.grpc.future.PineconeGrpcFuture]: View Source

 65    def upsert(
 66        self,
 67        vectors: Union[List[GRPCVector], List[NonGRPCVector], List[tuple], List[dict]],
 68        async_req: bool = False,
 69        namespace: Optional[str] = None,
 70        batch_size: Optional[int] = None,
 71        show_progress: bool = True,
 72        **kwargs,
 73    ) -> Union[UpsertResponse, PineconeGrpcFuture]:
 74        """
 75        The upsert operation writes vectors into a namespace.
 76        If a new value is upserted for an existing vector id, it will overwrite the previous value.
 77
 78        Examples:
 79            >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}),
 80                              ('id2', [1.0, 2.0, 3.0])
 81                              ],
 82                              namespace='ns1', async_req=True)
 83            >>> index.upsert([{'id': 'id1', 'values': [1.0, 2.0, 3.0], 'metadata': {'key': 'value'}},
 84                              {'id': 'id2',
 85                                        'values': [1.0, 2.0, 3.0],
 86                                        'sparse_values': {'indices': [1, 8], 'values': [0.2, 0.4]},
 87                              ])
 88            >>> index.upsert([GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
 89                              GRPCVector(id='id2', values=[1.0, 2.0, 3.0]),
 90                              GRPCVector(id='id3',
 91                                         values=[1.0, 2.0, 3.0],
 92                                         sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))])
 93
 94        Args:
 95            vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert.
 96
 97                     A vector can be represented by a 1) GRPCVector object, a 2) tuple or 3) a dictionary
 98                     1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values).
 99                        where id is a string, vector is a list of floats, and metadata is a dict.
100                        Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])
101
102                    2) if a GRPCVector object is used, a GRPCVector object must be of the form
103                        GRPCVector(id, values, metadata), where metadata is an optional argument of type
104                        Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]
105                       Examples: GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
106                                 GRPCVector(id='id2', values=[1.0, 2.0, 3.0]),
107                                 GRPCVector(id='id3',
108                                            values=[1.0, 2.0, 3.0],
109                                            sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))
110
111                    3) if a dictionary is used, it must be in the form
112                       {'id': str, 'values': List[float], 'sparse_values': {'indices': List[int], 'values': List[float]},
113                        'metadata': dict}
114
115                    Note: the dimension of each vector must match the dimension of the index.
116            async_req (bool): If True, the upsert operation will be performed asynchronously.
117                              Cannot be used with batch_size.
118                              Defaults to False. See: https://docs.pinecone.io/docs/performance-tuning [optional]
119            namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional]
120            batch_size (int): The number of vectors to upsert in each batch.
121                                Cannot be used with async_req=True.
122                               If not specified, all vectors will be upserted in a single batch. [optional]
123            show_progress (bool): Whether to show a progress bar using tqdm.
124                                  Applied only if batch_size is provided. Default is True.
125
126        Returns: UpsertResponse, contains the number of vectors upserted
127        """
128        if async_req and batch_size is not None:
129            raise ValueError(
130                "async_req is not supported when batch_size is provided."
131                "To upsert in parallel, please follow: "
132                "https://docs.pinecone.io/docs/performance-tuning"
133            )
134
135        timeout = kwargs.pop("timeout", None)
136
137        vectors = list(map(VectorFactoryGRPC.build, vectors))
138        if async_req:
139            args_dict = self._parse_non_empty_args([("namespace", namespace)])
140            request = UpsertRequest(vectors=vectors, **args_dict, **kwargs)
141            future = self._wrap_grpc_call(self.stub.Upsert.future, request, timeout=timeout)
142            return PineconeGrpcFuture(future)
143
144        if batch_size is None:
145            return self._upsert_batch(vectors, namespace, timeout=timeout, **kwargs)
146
147        if not isinstance(batch_size, int) or batch_size <= 0:
148            raise ValueError("batch_size must be a positive integer")
149
150        pbar = tqdm(
151            total=len(vectors),
152            disable=not show_progress,
153            desc="Upserted vectors",
154        )
155        total_upserted = 0
156        for i in range(0, len(vectors), batch_size):
157            batch_result = self._upsert_batch(
158                vectors[i : i + batch_size],
159                namespace,
160                timeout=timeout,
161                **kwargs,
162            )
163            pbar.update(batch_result.upserted_count)
164            # we can't use here pbar.n for the case show_progress=False
165            total_upserted += batch_result.upserted_count
166
167        return UpsertResponse(upserted_count=total_upserted)

The upsert operation writes vectors into a namespace. If a new value is upserted for an existing vector id, it will overwrite the previous value.

Examples:

>>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}),
                  ('id2', [1.0, 2.0, 3.0])
                  ],
                  namespace='ns1', async_req=True)
>>> index.upsert([{'id': 'id1', 'values': [1.0, 2.0, 3.0], 'metadata': {'key': 'value'}},
                  {'id': 'id2',
                            'values': [1.0, 2.0, 3.0],
                            'sparse_values': {'indices': [1, 8], 'values': [0.2, 0.4]},
                  ])
>>> index.upsert([GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
                  GRPCVector(id='id2', values=[1.0, 2.0, 3.0]),
                  GRPCVector(id='id3',
                             values=[1.0, 2.0, 3.0],
                             sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))])

Arguments:

vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert.

A vector can be represented by a 1) GRPCVector object, a 2) tuple or 3) a dictionary 1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values). where id is a string, vector is a list of floats, and metadata is a dict. Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])

2) if a GRPCVector object is used, a GRPCVector object must be of the form GRPCVector(id, values, metadata), where metadata is an optional argument of type Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]] Examples: GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), GRPCVector(id='id2', values=[1.0, 2.0, 3.0]), GRPCVector(id='id3', values=[1.0, 2.0, 3.0], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))

3) if a dictionary is used, it must be in the form {'id': str, 'values': List[float], 'sparse_values': {'indices': List[int], 'values': List[float]}, 'metadata': dict}

Note: the dimension of each vector must match the dimension of the index.
async_req (bool): If True, the upsert operation will be performed asynchronously. Cannot be used with batch_size. Defaults to False. See: pinecone.grpc.pinecone.io/docs/performance-tuning">https://docspinecone.grpc.pinecone.io/docs/performance-tuning [optional]
namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional]
batch_size (int): The number of vectors to upsert in each batch. Cannot be used with async_req=True. If not specified, all vectors will be upserted in a single batch. [optional]
show_progress (bool): Whether to show a progress bar using tqdm. Applied only if batch_size is provided. Default is True.

Returns: UpsertResponse, contains the number of vectors upserted

def upsert_from_dataframe( self, df, namespace: str = '', batch_size: int = 500, use_async_requests: bool = True, show_progress: bool = True) -> vector_service_pb2.UpsertResponse: View Source

180    def upsert_from_dataframe(
181        self,
182        df,
183        namespace: str = "",
184        batch_size: int = 500,
185        use_async_requests: bool = True,
186        show_progress: bool = True,
187    ) -> UpsertResponse:
188        """Upserts a dataframe into the index.
189
190        Args:
191            df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata.
192            namespace: The namespace to upsert into.
193            batch_size: The number of rows to upsert in a single batch.
194            use_async_requests: Whether to upsert multiple requests at the same time using asynchronous request mechanism.
195                                Set to `False`
196            show_progress: Whether to show a progress bar.
197        """
198        try:
199            import pandas as pd
200        except ImportError:
201            raise RuntimeError(
202                "The `pandas` package is not installed. Please install pandas to use `upsert_from_dataframe()`"
203            )
204
205        if not isinstance(df, pd.DataFrame):
206            raise ValueError(f"Only pandas dataframes are supported. Found: {type(df)}")
207
208        pbar = tqdm(
209            total=len(df),
210            disable=not show_progress,
211            desc="sending upsert requests",
212        )
213        results = []
214        for chunk in self._iter_dataframe(df, batch_size=batch_size):
215            res = self.upsert(
216                vectors=chunk,
217                namespace=namespace,
218                async_req=use_async_requests,
219            )
220            pbar.update(len(chunk))
221            results.append(res)
222
223        if use_async_requests:
224            cast_results = cast(List[PineconeGrpcFuture], results)
225            results = [
226                async_result.result()
227                for async_result in tqdm(
228                    cast_results,
229                    disable=not show_progress,
230                    desc="collecting async responses",
231                )
232            ]
233
234        upserted_count = 0
235        for res in results:
236            if hasattr(res, "upserted_count") and isinstance(res.upserted_count, int):
237                upserted_count += res.upserted_count
238
239        return UpsertResponse(upserted_count=upserted_count)

Upserts a dataframe into the index.

Arguments:

df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata.
namespace: The namespace to upsert into.
batch_size: The number of rows to upsert in a single batch.
use_async_requests: Whether to upsert multiple requests at the same time using asynchronous request mechanism. Set to False
show_progress: Whether to show a progress bar.

def delete( self, ids: Optional[List[str]] = None, delete_all: Optional[bool] = None, namespace: Optional[str] = None, filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, async_req: bool = False, **kwargs) -> Union[vector_service_pb2.DeleteResponse, pinecone.grpc.future.PineconeGrpcFuture]: View Source

247    def delete(
248        self,
249        ids: Optional[List[str]] = None,
250        delete_all: Optional[bool] = None,
251        namespace: Optional[str] = None,
252        filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
253        async_req: bool = False,
254        **kwargs,
255    ) -> Union[DeleteResponse, PineconeGrpcFuture]:
256        """
257        The Delete operation deletes vectors from the index, from a single namespace.
258        No error raised if the vector id does not exist.
259        Note: for any delete call, if namespace is not specified, the default namespace is used.
260
261        Delete can occur in the following mutual exclusive ways:
262        1. Delete by ids from a single namespace
263        2. Delete all vectors from a single namespace by setting delete_all to True
264        3. Delete all vectors from a single namespace by specifying a metadata filter
265           (note that for this option delete all must be set to False)
266
267        Examples:
268            >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace')
269            >>> index.delete(delete_all=True, namespace='my_namespace')
270            >>> index.delete(filter={'key': 'value'}, namespace='my_namespace', async_req=True)
271
272        Args:
273            ids (List[str]): Vector ids to delete [optional]
274            delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional]
275                               Default is False.
276            namespace (str): The namespace to delete vectors from [optional]
277                             If not specified, the default namespace is used.
278            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
279                    If specified, the metadata filter here will be used to select the vectors to delete.
280                    This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True.
281                     See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
282            async_req (bool): If True, the delete operation will be performed asynchronously.
283                              Defaults to False. [optional]
284
285        Returns: DeleteResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.
286        """
287
288        if filter is not None:
289            filter_struct = dict_to_proto_struct(filter)
290        else:
291            filter_struct = None
292
293        args_dict = self._parse_non_empty_args(
294            [
295                ("ids", ids),
296                ("delete_all", delete_all),
297                ("namespace", namespace),
298                ("filter", filter_struct),
299            ]
300        )
301        timeout = kwargs.pop("timeout", None)
302
303        request = DeleteRequest(**args_dict, **kwargs)
304        if async_req:
305            future = self._wrap_grpc_call(self.stub.Delete.future, request, timeout=timeout)
306            return PineconeGrpcFuture(future)
307        else:
308            return self._wrap_grpc_call(self.stub.Delete, request, timeout=timeout)

The Delete operation deletes vectors from the index, from a single namespace. No error raised if the vector id does not exist. Note: for any delete call, if namespace is not specified, the default namespace is used.

Delete can occur in the following mutual exclusive ways:

Delete by ids from a single namespace
Delete all vectors from a single namespace by setting delete_all to True
Delete all vectors from a single namespace by specifying a metadata filter (note that for this option delete all must be set to False)

Examples:

>>> index.delete(ids=['id1', 'id2'], namespace='my_namespace')
>>> index.delete(delete_all=True, namespace='my_namespace')
>>> index.delete(filter={'key': 'value'}, namespace='my_namespace', async_req=True)

Arguments:

ids (List[str]): Vector ids to delete [optional]
delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] Default is False.
namespace (str): The namespace to delete vectors from [optional] If not specified, the default namespace is used.
filter (Dict[str, Union[str, float, int, bool, List, dict]]): If specified, the metadata filter here will be used to select the vectors to delete. This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. See https://wwwpinecone.grpc.pinecone.io/docs/metadata-filtering/.. [optional]
async_req (bool): If True, the delete operation will be performed asynchronously. Defaults to False. [optional]

Returns: DeleteResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.

def fetch( self, ids: Optional[List[str]], namespace: Optional[str] = None, **kwargs) -> pinecone.core.openapi.data.model.fetch_response.FetchResponse: View Source

310    def fetch(
311        self,
312        ids: Optional[List[str]],
313        namespace: Optional[str] = None,
314        **kwargs,
315    ) -> FetchResponse:
316        """
317        The fetch operation looks up and returns vectors, by ID, from a single namespace.
318        The returned vectors include the vector data and/or metadata.
319
320        Examples:
321            >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace')
322            >>> index.fetch(ids=['id1', 'id2'])
323
324        Args:
325            ids (List[str]): The vector IDs to fetch.
326            namespace (str): The namespace to fetch vectors from.
327                             If not specified, the default namespace is used. [optional]
328
329        Returns: FetchResponse object which contains the list of Vector objects, and namespace name.
330        """
331        timeout = kwargs.pop("timeout", None)
332
333        args_dict = self._parse_non_empty_args([("namespace", namespace)])
334
335        request = FetchRequest(ids=ids, **args_dict, **kwargs)
336        response = self._wrap_grpc_call(self.stub.Fetch, request, timeout=timeout)
337        json_response = json_format.MessageToDict(response)
338        return parse_fetch_response(json_response)

The fetch operation looks up and returns vectors, by ID, from a single namespace. The returned vectors include the vector data and/or metadata.

Examples:

>>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace')
>>> index.fetch(ids=['id1', 'id2'])

Arguments:

ids (List[str]): The vector IDs to fetch.
namespace (str): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]

Returns: FetchResponse object which contains the list of Vector objects, and namespace name.

def query( self, vector: Optional[List[float]] = None, id: Optional[str] = None, namespace: Optional[str] = None, top_k: Optional[int] = None, filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, include_values: Optional[bool] = None, include_metadata: Optional[bool] = None, sparse_vector: Union[vector_service_pb2.SparseValues, pinecone.grpc.index_grpc.SparseVectorTypedDict, NoneType] = None, **kwargs) -> pinecone.core.openapi.data.model.query_response.QueryResponse: View Source

340    def query(
341        self,
342        vector: Optional[List[float]] = None,
343        id: Optional[str] = None,
344        namespace: Optional[str] = None,
345        top_k: Optional[int] = None,
346        filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
347        include_values: Optional[bool] = None,
348        include_metadata: Optional[bool] = None,
349        sparse_vector: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None,
350        **kwargs,
351    ) -> QueryResponse:
352        """
353        The Query operation searches a namespace, using a query vector.
354        It retrieves the ids of the most similar items in a namespace, along with their similarity scores.
355
356        Examples:
357            >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace')
358            >>> index.query(id='id1', top_k=10, namespace='my_namespace')
359            >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'})
360            >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True)
361            >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]},
362            >>>             top_k=10, namespace='my_namespace')
363            >>> index.query(vector=[1, 2, 3], sparse_vector=GRPCSparseValues([1, 2], [0.2, 0.4]),
364            >>>             top_k=10, namespace='my_namespace')
365
366        Args:
367            vector (List[float]): The query vector. This should be the same length as the dimension of the index
368                                  being queried. Each `query()` request can contain only one of the parameters
369                                  `id` or `vector`.. [optional]
370            id (str): The unique ID of the vector to be used as a query vector.
371                      Each `query()` request can contain only one of the parameters
372                      `vector` or  `id`.. [optional]
373            top_k (int): The number of results to return for each query. Must be an integer greater than 1.
374            namespace (str): The namespace to fetch vectors from.
375                             If not specified, the default namespace is used. [optional]
376            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
377                    The filter to apply. You can use vector metadata to limit your search.
378                    See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
379            include_values (bool): Indicates whether vector values are included in the response.
380                                   If omitted the server will use the default value of False [optional]
381            include_metadata (bool): Indicates whether metadata is included in the response as well as the ids.
382                                     If omitted the server will use the default value of False  [optional]
383            sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector.
384                            Expected to be either a GRPCSparseValues object or a dict of the form:
385                             {'indices': List[int], 'values': List[float]}, where the lists each have the same length.
386
387        Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects,
388                 and namespace name.
389        """
390
391        if vector is not None and id is not None:
392            raise ValueError("Cannot specify both `id` and `vector`")
393
394        if filter is not None:
395            filter_struct = dict_to_proto_struct(filter)
396        else:
397            filter_struct = None
398
399        sparse_vector = self._parse_sparse_values_arg(sparse_vector)
400        args_dict = self._parse_non_empty_args(
401            [
402                ("vector", vector),
403                ("id", id),
404                ("namespace", namespace),
405                ("top_k", top_k),
406                ("filter", filter_struct),
407                ("include_values", include_values),
408                ("include_metadata", include_metadata),
409                ("sparse_vector", sparse_vector),
410            ]
411        )
412
413        request = QueryRequest(**args_dict)
414
415        timeout = kwargs.pop("timeout", None)
416        response = self._wrap_grpc_call(self.stub.Query, request, timeout=timeout)
417        json_response = json_format.MessageToDict(response)
418        return parse_query_response(json_response, _check_type=False)

The Query operation searches a namespace, using a query vector. It retrieves the ids of the most similar items in a namespace, along with their similarity scores.

Examples:

>>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace')
>>> index.query(id='id1', top_k=10, namespace='my_namespace')
>>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'})
>>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True)
>>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]},
>>>             top_k=10, namespace='my_namespace')
>>> index.query(vector=[1, 2, 3], sparse_vector=GRPCSparseValues([1, 2], [0.2, 0.4]),
>>>             top_k=10, namespace='my_namespace')

Arguments:

vector (List[float]): The query vector. This should be the same length as the dimension of the index being queried. Each query() request can contain only one of the parameters id or vector.. [optional]
id (str): The unique ID of the vector to be used as a query vector. Each query() request can contain only one of the parameters vector or id.. [optional]
top_k (int): The number of results to return for each query. Must be an integer greater than 1.
namespace (str): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
filter (Dict[str, Union[str, float, int, bool, List, dict]]): The filter to apply. You can use vector metadata to limit your search. See https://wwwpinecone.grpc.pinecone.io/docs/metadata-filtering/.. [optional]
include_values (bool): Indicates whether vector values are included in the response. If omitted the server will use the default value of False [optional]
include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. If omitted the server will use the default value of False [optional]
sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. Expected to be either a GRPCSparseValues object or a dict of the form: {'indices': List[int], 'values': List[float]}, where the lists each have the same length.

Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, and namespace name.

def update( self, id: str, async_req: bool = False, values: Optional[List[float]] = None, set_metadata: Optional[Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]] = None, namespace: Optional[str] = None, sparse_values: Union[vector_service_pb2.SparseValues, pinecone.grpc.index_grpc.SparseVectorTypedDict, NoneType] = None, **kwargs) -> Union[vector_service_pb2.UpdateResponse, pinecone.grpc.future.PineconeGrpcFuture]: View Source

420    def update(
421        self,
422        id: str,
423        async_req: bool = False,
424        values: Optional[List[float]] = None,
425        set_metadata: Optional[
426            Dict[
427                str,
428                Union[str, float, int, bool, List[int], List[float], List[str]],
429            ]
430        ] = None,
431        namespace: Optional[str] = None,
432        sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None,
433        **kwargs,
434    ) -> Union[UpdateResponse, PineconeGrpcFuture]:
435        """
436        The Update operation updates vector in a namespace.
437        If a value is included, it will overwrite the previous value.
438        If a set_metadata is included,
439        the values of the fields specified in it will be added or overwrite the previous value.
440
441        Examples:
442            >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
443            >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace', async_req=True)
444            >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
445            >>>              namespace='my_namespace')
446            >>> index.update(id='id1', values=[1, 2, 3], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]),
447            >>>              namespace='my_namespace')
448
449        Args:
450            id (str): Vector's unique id.
451            async_req (bool): If True, the update operation will be performed asynchronously.
452                              Defaults to False. [optional]
453            values (List[float]): vector values to set. [optional]
454            set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
455                metadata to set for vector. [optional]
456            namespace (str): Namespace name where to update the vector.. [optional]
457            sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector.
458                           Expected to be either a GRPCSparseValues object or a dict of the form:
459                           {'indices': List[int], 'values': List[float]} where the lists each have the same length.
460
461
462        Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.
463        """
464        if set_metadata is not None:
465            set_metadata_struct = dict_to_proto_struct(set_metadata)
466        else:
467            set_metadata_struct = None
468
469        timeout = kwargs.pop("timeout", None)
470        sparse_values = self._parse_sparse_values_arg(sparse_values)
471        args_dict = self._parse_non_empty_args(
472            [
473                ("values", values),
474                ("set_metadata", set_metadata_struct),
475                ("namespace", namespace),
476                ("sparse_values", sparse_values),
477            ]
478        )
479
480        request = UpdateRequest(id=id, **args_dict)
481        if async_req:
482            future = self._wrap_grpc_call(self.stub.Update.future, request, timeout=timeout)
483            return PineconeGrpcFuture(future)
484        else:
485            return self._wrap_grpc_call(self.stub.Update, request, timeout=timeout)

The Update operation updates vector in a namespace. If a value is included, it will overwrite the previous value. If a set_metadata is included, the values of the fields specified in it will be added or overwrite the previous value.

Examples:

>>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
>>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace', async_req=True)
>>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
>>>              namespace='my_namespace')
>>> index.update(id='id1', values=[1, 2, 3], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]),
>>>              namespace='my_namespace')

Arguments:

id (str): Vector's unique id.
async_req (bool): If True, the update operation will be performed asynchronously. Defaults to False. [optional]
values (List[float]): vector values to set. [optional]
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): metadata to set for vector. [optional]
namespace (str): Namespace name where to update the vector.. [optional]
sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. Expected to be either a GRPCSparseValues object or a dict of the form: {'indices': List[int], 'values': List[float]} where the lists each have the same length.

Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.

def list_paginated( self, prefix: Optional[str] = None, limit: Optional[int] = None, pagination_token: Optional[str] = None, namespace: Optional[str] = None, **kwargs) -> pinecone.models.list_response.ListResponse: View Source

487    def list_paginated(
488        self,
489        prefix: Optional[str] = None,
490        limit: Optional[int] = None,
491        pagination_token: Optional[str] = None,
492        namespace: Optional[str] = None,
493        **kwargs,
494    ) -> SimpleListResponse:
495        """
496        The list_paginated operation finds vectors based on an id prefix within a single namespace.
497        It returns matching ids in a paginated form, with a pagination token to fetch the next page of results.
498        This id list can then be passed to fetch or delete operations, depending on your use case.
499
500        Consider using the `list` method to avoid having to handle pagination tokens manually.
501
502        Examples:
503            >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace')
504            >>> [v.id for v in results.vectors]
505            ['99', '990', '991', '992', '993']
506            >>> results.pagination.next
507            eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9
508            >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next)
509
510        Args:
511            prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will
512                                    be used with the effect of listing all ids in a namespace [optional]
513            limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
514            pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned
515                in the response if additional results are available. [optional]
516            namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
517
518        Returns: SimpleListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed.
519        """
520        args_dict = self._parse_non_empty_args(
521            [
522                ("prefix", prefix),
523                ("limit", limit),
524                ("namespace", namespace),
525                ("pagination_token", pagination_token),
526            ]
527        )
528        request = ListRequest(**args_dict, **kwargs)
529        timeout = kwargs.pop("timeout", None)
530        response = self._wrap_grpc_call(self.stub.List, request, timeout=timeout)
531
532        if response.pagination and response.pagination.next != "":
533            pagination = Pagination(next=response.pagination.next)
534        else:
535            pagination = None
536
537        return SimpleListResponse(
538            namespace=response.namespace,
539            vectors=response.vectors,
540            pagination=pagination,
541        )

The list_paginated operation finds vectors based on an id prefix within a single namespace. It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. This id list can then be passed to fetch or delete operations, depending on your use case.

Consider using the list method to avoid having to handle pagination tokens manually.

Examples:

>>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace')
>>> [v.id for v in results.vectors]
['99', '990', '991', '992', '993']
>>> results.pagination.next
eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9
>>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next)

Arguments:

prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will be used with the effect of listing all ids in a namespace [optional]
limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned in the response if additional results are available. [optional]
namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]

Returns: SimpleListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed.

def list(self, **kwargs): View Source

543    def list(self, **kwargs):
544        """
545        The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields
546        a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your
547        behalf.
548
549        Examples:
550            >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'):
551            >>>     print(ids)
552            ['99', '990', '991', '992', '993']
553            ['994', '995', '996', '997', '998']
554            ['999']
555
556        Args:
557            prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will
558                                    be used with the effect of listing all ids in a namespace [optional]
559            limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
560            pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned
561                in the response if additional results are available. [optional]
562            namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
563        """
564        done = False
565        while not done:
566            try:
567                results = self.list_paginated(**kwargs)
568            except Exception as e:
569                raise e
570
571            if len(results.vectors) > 0:
572                yield [v.id for v in results.vectors]
573
574            if results.pagination and results.pagination.next:
575                kwargs.update({"pagination_token": results.pagination.next})
576            else:
577                done = True

The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your behalf.

Examples:

>>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'):
>>>     print(ids)
['99', '990', '991', '992', '993']
['994', '995', '996', '997', '998']
['999']

Arguments:

prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will be used with the effect of listing all ids in a namespace [optional]
limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned in the response if additional results are available. [optional]
namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]

def describe_index_stats( self, filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, **kwargs) -> pinecone.core.openapi.data.model.describe_index_stats_response.DescribeIndexStatsResponse: View Source

579    def describe_index_stats(
580        self,
581        filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
582        **kwargs,
583    ) -> DescribeIndexStatsResponse:
584        """
585        The DescribeIndexStats operation returns statistics about the index's contents.
586        For example: The vector count per namespace and the number of dimensions.
587
588        Examples:
589            >>> index.describe_index_stats()
590            >>> index.describe_index_stats(filter={'key': 'value'})
591
592        Args:
593            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
594            If this parameter is present, the operation only returns statistics for vectors that satisfy the filter.
595            See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
596
597        Returns: DescribeIndexStatsResponse object which contains stats about the index.
598        """
599        if filter is not None:
600            filter_struct = dict_to_proto_struct(filter)
601        else:
602            filter_struct = None
603        args_dict = self._parse_non_empty_args([("filter", filter_struct)])
604        timeout = kwargs.pop("timeout", None)
605
606        request = DescribeIndexStatsRequest(**args_dict)
607        response = self._wrap_grpc_call(self.stub.DescribeIndexStats, request, timeout=timeout)
608        json_response = json_format.MessageToDict(response)
609        return parse_stats_response(json_response)

The DescribeIndexStats operation returns statistics about the index's contents. For example: The vector count per namespace and the number of dimensions.

Examples:

>>> index.describe_index_stats()
>>> index.describe_index_stats(filter={'key': 'value'})

Arguments:

filter (Dict[str, Union[str, float, int, bool, List, dict]]):
If this parameter is present, the operation only returns statistics for vectors that satisfy the filter.
See https: //wwwpinecone.grpc.pinecone.io/docs/metadata-filtering/.. [optional]

Returns: DescribeIndexStatsResponse object which contains stats about the index.

Inherited Members

pinecone.grpc.base.GRPCIndexBase: GRPCIndexBase; name; config; grpc_client_config; retry_config; fixed_metadata; method_config; stub; channel; grpc_server_on; close

GRPCVector = <class 'vector_service_pb2.Vector'>

GRPCQueryVector = <class 'vector_service_pb2.QueryVector'>

GRPCSparseValues = <class 'vector_service_pb2.SparseValues'>