pinecone.grpc.index_grpc
1import logging 2from typing import Optional, Dict, Union, List, Tuple, Any, TypedDict, cast 3 4from google.protobuf import json_format 5 6from tqdm.autonotebook import tqdm 7 8from .utils import ( 9 dict_to_proto_struct, 10 parse_fetch_response, 11 parse_query_response, 12 parse_stats_response, 13) 14from .vector_factory_grpc import VectorFactoryGRPC 15 16from pinecone.core.openapi.data.models import ( 17 FetchResponse, 18 QueryResponse, 19 DescribeIndexStatsResponse, 20) 21from pinecone.models.list_response import ( 22 ListResponse as SimpleListResponse, 23 Pagination, 24) 25from pinecone.core.grpc.protos.vector_service_pb2 import ( 26 Vector as GRPCVector, 27 QueryVector as GRPCQueryVector, 28 UpsertRequest, 29 UpsertResponse, 30 DeleteRequest, 31 QueryRequest, 32 FetchRequest, 33 UpdateRequest, 34 ListRequest, 35 ListResponse, 36 DescribeIndexStatsRequest, 37 DeleteResponse, 38 UpdateResponse, 39 SparseValues as GRPCSparseValues, 40) 41from pinecone import Vector as NonGRPCVector 42from pinecone.core.grpc.protos.vector_service_pb2_grpc import VectorServiceStub 43from .base import GRPCIndexBase 44from .future import PineconeGrpcFuture 45 46 47__all__ = ["GRPCIndex", "GRPCVector", "GRPCQueryVector", "GRPCSparseValues"] 48 49_logger = logging.getLogger(__name__) 50 51 52class SparseVectorTypedDict(TypedDict): 53 indices: List[int] 54 values: List[float] 55 56 57class GRPCIndex(GRPCIndexBase): 58 """A client for interacting with a Pinecone index via GRPC API.""" 59 60 @property 61 def stub_class(self): 62 return VectorServiceStub 63 64 def upsert( 65 self, 66 vectors: Union[List[GRPCVector], List[NonGRPCVector], List[tuple], List[dict]], 67 async_req: bool = False, 68 namespace: Optional[str] = None, 69 batch_size: Optional[int] = None, 70 show_progress: bool = True, 71 **kwargs, 72 ) -> Union[UpsertResponse, PineconeGrpcFuture]: 73 """ 74 The upsert operation writes vectors into a namespace. 75 If a new value is upserted for an existing vector id, it will overwrite the previous value. 76 77 Examples: 78 >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}), 79 ('id2', [1.0, 2.0, 3.0]) 80 ], 81 namespace='ns1', async_req=True) 82 >>> index.upsert([{'id': 'id1', 'values': [1.0, 2.0, 3.0], 'metadata': {'key': 'value'}}, 83 {'id': 'id2', 84 'values': [1.0, 2.0, 3.0], 85 'sparse_values': {'indices': [1, 8], 'values': [0.2, 0.4]}, 86 ]) 87 >>> index.upsert([GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), 88 GRPCVector(id='id2', values=[1.0, 2.0, 3.0]), 89 GRPCVector(id='id3', 90 values=[1.0, 2.0, 3.0], 91 sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))]) 92 93 Args: 94 vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert. 95 96 A vector can be represented by a 1) GRPCVector object, a 2) tuple or 3) a dictionary 97 1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values). 98 where id is a string, vector is a list of floats, and metadata is a dict. 99 Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0]) 100 101 2) if a GRPCVector object is used, a GRPCVector object must be of the form 102 GRPCVector(id, values, metadata), where metadata is an optional argument of type 103 Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]] 104 Examples: GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), 105 GRPCVector(id='id2', values=[1.0, 2.0, 3.0]), 106 GRPCVector(id='id3', 107 values=[1.0, 2.0, 3.0], 108 sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4])) 109 110 3) if a dictionary is used, it must be in the form 111 {'id': str, 'values': List[float], 'sparse_values': {'indices': List[int], 'values': List[float]}, 112 'metadata': dict} 113 114 Note: the dimension of each vector must match the dimension of the index. 115 async_req (bool): If True, the upsert operation will be performed asynchronously. 116 Cannot be used with batch_size. 117 Defaults to False. See: https://docs.pinecone.io/docs/performance-tuning [optional] 118 namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] 119 batch_size (int): The number of vectors to upsert in each batch. 120 Cannot be used with async_req=True. 121 If not specified, all vectors will be upserted in a single batch. [optional] 122 show_progress (bool): Whether to show a progress bar using tqdm. 123 Applied only if batch_size is provided. Default is True. 124 125 Returns: UpsertResponse, contains the number of vectors upserted 126 """ 127 if async_req and batch_size is not None: 128 raise ValueError( 129 "async_req is not supported when batch_size is provided." 130 "To upsert in parallel, please follow: " 131 "https://docs.pinecone.io/docs/performance-tuning" 132 ) 133 134 timeout = kwargs.pop("timeout", None) 135 136 vectors = list(map(VectorFactoryGRPC.build, vectors)) 137 if async_req: 138 args_dict = self._parse_non_empty_args([("namespace", namespace)]) 139 request = UpsertRequest(vectors=vectors, **args_dict, **kwargs) 140 future = self._wrap_grpc_call(self.stub.Upsert.future, request, timeout=timeout) 141 return PineconeGrpcFuture(future) 142 143 if batch_size is None: 144 return self._upsert_batch(vectors, namespace, timeout=timeout, **kwargs) 145 146 if not isinstance(batch_size, int) or batch_size <= 0: 147 raise ValueError("batch_size must be a positive integer") 148 149 pbar = tqdm( 150 total=len(vectors), 151 disable=not show_progress, 152 desc="Upserted vectors", 153 ) 154 total_upserted = 0 155 for i in range(0, len(vectors), batch_size): 156 batch_result = self._upsert_batch( 157 vectors[i : i + batch_size], 158 namespace, 159 timeout=timeout, 160 **kwargs, 161 ) 162 pbar.update(batch_result.upserted_count) 163 # we can't use here pbar.n for the case show_progress=False 164 total_upserted += batch_result.upserted_count 165 166 return UpsertResponse(upserted_count=total_upserted) 167 168 def _upsert_batch( 169 self, 170 vectors: List[GRPCVector], 171 namespace: Optional[str], 172 timeout: Optional[float], 173 **kwargs, 174 ) -> UpsertResponse: 175 args_dict = self._parse_non_empty_args([("namespace", namespace)]) 176 request = UpsertRequest(vectors=vectors, **args_dict) 177 return self._wrap_grpc_call(self.stub.Upsert, request, timeout=timeout, **kwargs) 178 179 def upsert_from_dataframe( 180 self, 181 df, 182 namespace: str = "", 183 batch_size: int = 500, 184 use_async_requests: bool = True, 185 show_progress: bool = True, 186 ) -> UpsertResponse: 187 """Upserts a dataframe into the index. 188 189 Args: 190 df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata. 191 namespace: The namespace to upsert into. 192 batch_size: The number of rows to upsert in a single batch. 193 use_async_requests: Whether to upsert multiple requests at the same time using asynchronous request mechanism. 194 Set to `False` 195 show_progress: Whether to show a progress bar. 196 """ 197 try: 198 import pandas as pd 199 except ImportError: 200 raise RuntimeError( 201 "The `pandas` package is not installed. Please install pandas to use `upsert_from_dataframe()`" 202 ) 203 204 if not isinstance(df, pd.DataFrame): 205 raise ValueError(f"Only pandas dataframes are supported. Found: {type(df)}") 206 207 pbar = tqdm( 208 total=len(df), 209 disable=not show_progress, 210 desc="sending upsert requests", 211 ) 212 results = [] 213 for chunk in self._iter_dataframe(df, batch_size=batch_size): 214 res = self.upsert( 215 vectors=chunk, 216 namespace=namespace, 217 async_req=use_async_requests, 218 ) 219 pbar.update(len(chunk)) 220 results.append(res) 221 222 if use_async_requests: 223 cast_results = cast(List[PineconeGrpcFuture], results) 224 results = [ 225 async_result.result() 226 for async_result in tqdm( 227 cast_results, 228 disable=not show_progress, 229 desc="collecting async responses", 230 ) 231 ] 232 233 upserted_count = 0 234 for res in results: 235 if hasattr(res, "upserted_count") and isinstance(res.upserted_count, int): 236 upserted_count += res.upserted_count 237 238 return UpsertResponse(upserted_count=upserted_count) 239 240 @staticmethod 241 def _iter_dataframe(df, batch_size): 242 for i in range(0, len(df), batch_size): 243 batch = df.iloc[i : i + batch_size].to_dict(orient="records") 244 yield batch 245 246 def delete( 247 self, 248 ids: Optional[List[str]] = None, 249 delete_all: Optional[bool] = None, 250 namespace: Optional[str] = None, 251 filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, 252 async_req: bool = False, 253 **kwargs, 254 ) -> Union[DeleteResponse, PineconeGrpcFuture]: 255 """ 256 The Delete operation deletes vectors from the index, from a single namespace. 257 No error raised if the vector id does not exist. 258 Note: for any delete call, if namespace is not specified, the default namespace is used. 259 260 Delete can occur in the following mutual exclusive ways: 261 1. Delete by ids from a single namespace 262 2. Delete all vectors from a single namespace by setting delete_all to True 263 3. Delete all vectors from a single namespace by specifying a metadata filter 264 (note that for this option delete all must be set to False) 265 266 Examples: 267 >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace') 268 >>> index.delete(delete_all=True, namespace='my_namespace') 269 >>> index.delete(filter={'key': 'value'}, namespace='my_namespace', async_req=True) 270 271 Args: 272 ids (List[str]): Vector ids to delete [optional] 273 delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] 274 Default is False. 275 namespace (str): The namespace to delete vectors from [optional] 276 If not specified, the default namespace is used. 277 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 278 If specified, the metadata filter here will be used to select the vectors to delete. 279 This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. 280 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 281 async_req (bool): If True, the delete operation will be performed asynchronously. 282 Defaults to False. [optional] 283 284 Returns: DeleteResponse (contains no data) or a PineconeGrpcFuture object if async_req is True. 285 """ 286 287 if filter is not None: 288 filter_struct = dict_to_proto_struct(filter) 289 else: 290 filter_struct = None 291 292 args_dict = self._parse_non_empty_args( 293 [ 294 ("ids", ids), 295 ("delete_all", delete_all), 296 ("namespace", namespace), 297 ("filter", filter_struct), 298 ] 299 ) 300 timeout = kwargs.pop("timeout", None) 301 302 request = DeleteRequest(**args_dict, **kwargs) 303 if async_req: 304 future = self._wrap_grpc_call(self.stub.Delete.future, request, timeout=timeout) 305 return PineconeGrpcFuture(future) 306 else: 307 return self._wrap_grpc_call(self.stub.Delete, request, timeout=timeout) 308 309 def fetch( 310 self, 311 ids: Optional[List[str]], 312 namespace: Optional[str] = None, 313 **kwargs, 314 ) -> FetchResponse: 315 """ 316 The fetch operation looks up and returns vectors, by ID, from a single namespace. 317 The returned vectors include the vector data and/or metadata. 318 319 Examples: 320 >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace') 321 >>> index.fetch(ids=['id1', 'id2']) 322 323 Args: 324 ids (List[str]): The vector IDs to fetch. 325 namespace (str): The namespace to fetch vectors from. 326 If not specified, the default namespace is used. [optional] 327 328 Returns: FetchResponse object which contains the list of Vector objects, and namespace name. 329 """ 330 timeout = kwargs.pop("timeout", None) 331 332 args_dict = self._parse_non_empty_args([("namespace", namespace)]) 333 334 request = FetchRequest(ids=ids, **args_dict, **kwargs) 335 response = self._wrap_grpc_call(self.stub.Fetch, request, timeout=timeout) 336 json_response = json_format.MessageToDict(response) 337 return parse_fetch_response(json_response) 338 339 def query( 340 self, 341 vector: Optional[List[float]] = None, 342 id: Optional[str] = None, 343 namespace: Optional[str] = None, 344 top_k: Optional[int] = None, 345 filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, 346 include_values: Optional[bool] = None, 347 include_metadata: Optional[bool] = None, 348 sparse_vector: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None, 349 **kwargs, 350 ) -> QueryResponse: 351 """ 352 The Query operation searches a namespace, using a query vector. 353 It retrieves the ids of the most similar items in a namespace, along with their similarity scores. 354 355 Examples: 356 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') 357 >>> index.query(id='id1', top_k=10, namespace='my_namespace') 358 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) 359 >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) 360 >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, 361 >>> top_k=10, namespace='my_namespace') 362 >>> index.query(vector=[1, 2, 3], sparse_vector=GRPCSparseValues([1, 2], [0.2, 0.4]), 363 >>> top_k=10, namespace='my_namespace') 364 365 Args: 366 vector (List[float]): The query vector. This should be the same length as the dimension of the index 367 being queried. Each `query()` request can contain only one of the parameters 368 `id` or `vector`.. [optional] 369 id (str): The unique ID of the vector to be used as a query vector. 370 Each `query()` request can contain only one of the parameters 371 `vector` or `id`.. [optional] 372 top_k (int): The number of results to return for each query. Must be an integer greater than 1. 373 namespace (str): The namespace to fetch vectors from. 374 If not specified, the default namespace is used. [optional] 375 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 376 The filter to apply. You can use vector metadata to limit your search. 377 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 378 include_values (bool): Indicates whether vector values are included in the response. 379 If omitted the server will use the default value of False [optional] 380 include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. 381 If omitted the server will use the default value of False [optional] 382 sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. 383 Expected to be either a GRPCSparseValues object or a dict of the form: 384 {'indices': List[int], 'values': List[float]}, where the lists each have the same length. 385 386 Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, 387 and namespace name. 388 """ 389 390 if vector is not None and id is not None: 391 raise ValueError("Cannot specify both `id` and `vector`") 392 393 if filter is not None: 394 filter_struct = dict_to_proto_struct(filter) 395 else: 396 filter_struct = None 397 398 sparse_vector = self._parse_sparse_values_arg(sparse_vector) 399 args_dict = self._parse_non_empty_args( 400 [ 401 ("vector", vector), 402 ("id", id), 403 ("namespace", namespace), 404 ("top_k", top_k), 405 ("filter", filter_struct), 406 ("include_values", include_values), 407 ("include_metadata", include_metadata), 408 ("sparse_vector", sparse_vector), 409 ] 410 ) 411 412 request = QueryRequest(**args_dict) 413 414 timeout = kwargs.pop("timeout", None) 415 response = self._wrap_grpc_call(self.stub.Query, request, timeout=timeout) 416 json_response = json_format.MessageToDict(response) 417 return parse_query_response(json_response, _check_type=False) 418 419 def update( 420 self, 421 id: str, 422 async_req: bool = False, 423 values: Optional[List[float]] = None, 424 set_metadata: Optional[ 425 Dict[ 426 str, 427 Union[str, float, int, bool, List[int], List[float], List[str]], 428 ] 429 ] = None, 430 namespace: Optional[str] = None, 431 sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None, 432 **kwargs, 433 ) -> Union[UpdateResponse, PineconeGrpcFuture]: 434 """ 435 The Update operation updates vector in a namespace. 436 If a value is included, it will overwrite the previous value. 437 If a set_metadata is included, 438 the values of the fields specified in it will be added or overwrite the previous value. 439 440 Examples: 441 >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') 442 >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace', async_req=True) 443 >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, 444 >>> namespace='my_namespace') 445 >>> index.update(id='id1', values=[1, 2, 3], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]), 446 >>> namespace='my_namespace') 447 448 Args: 449 id (str): Vector's unique id. 450 async_req (bool): If True, the update operation will be performed asynchronously. 451 Defaults to False. [optional] 452 values (List[float]): vector values to set. [optional] 453 set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): 454 metadata to set for vector. [optional] 455 namespace (str): Namespace name where to update the vector.. [optional] 456 sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. 457 Expected to be either a GRPCSparseValues object or a dict of the form: 458 {'indices': List[int], 'values': List[float]} where the lists each have the same length. 459 460 461 Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True. 462 """ 463 if set_metadata is not None: 464 set_metadata_struct = dict_to_proto_struct(set_metadata) 465 else: 466 set_metadata_struct = None 467 468 timeout = kwargs.pop("timeout", None) 469 sparse_values = self._parse_sparse_values_arg(sparse_values) 470 args_dict = self._parse_non_empty_args( 471 [ 472 ("values", values), 473 ("set_metadata", set_metadata_struct), 474 ("namespace", namespace), 475 ("sparse_values", sparse_values), 476 ] 477 ) 478 479 request = UpdateRequest(id=id, **args_dict) 480 if async_req: 481 future = self._wrap_grpc_call(self.stub.Update.future, request, timeout=timeout) 482 return PineconeGrpcFuture(future) 483 else: 484 return self._wrap_grpc_call(self.stub.Update, request, timeout=timeout) 485 486 def list_paginated( 487 self, 488 prefix: Optional[str] = None, 489 limit: Optional[int] = None, 490 pagination_token: Optional[str] = None, 491 namespace: Optional[str] = None, 492 **kwargs, 493 ) -> SimpleListResponse: 494 """ 495 The list_paginated operation finds vectors based on an id prefix within a single namespace. 496 It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. 497 This id list can then be passed to fetch or delete operations, depending on your use case. 498 499 Consider using the `list` method to avoid having to handle pagination tokens manually. 500 501 Examples: 502 >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') 503 >>> [v.id for v in results.vectors] 504 ['99', '990', '991', '992', '993'] 505 >>> results.pagination.next 506 eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 507 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next) 508 509 Args: 510 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 511 be used with the effect of listing all ids in a namespace [optional] 512 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 513 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 514 in the response if additional results are available. [optional] 515 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 516 517 Returns: SimpleListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed. 518 """ 519 args_dict = self._parse_non_empty_args( 520 [ 521 ("prefix", prefix), 522 ("limit", limit), 523 ("namespace", namespace), 524 ("pagination_token", pagination_token), 525 ] 526 ) 527 request = ListRequest(**args_dict, **kwargs) 528 timeout = kwargs.pop("timeout", None) 529 response = self._wrap_grpc_call(self.stub.List, request, timeout=timeout) 530 531 if response.pagination and response.pagination.next != "": 532 pagination = Pagination(next=response.pagination.next) 533 else: 534 pagination = None 535 536 return SimpleListResponse( 537 namespace=response.namespace, 538 vectors=response.vectors, 539 pagination=pagination, 540 ) 541 542 def list(self, **kwargs): 543 """ 544 The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields 545 a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your 546 behalf. 547 548 Examples: 549 >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): 550 >>> print(ids) 551 ['99', '990', '991', '992', '993'] 552 ['994', '995', '996', '997', '998'] 553 ['999'] 554 555 Args: 556 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 557 be used with the effect of listing all ids in a namespace [optional] 558 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 559 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 560 in the response if additional results are available. [optional] 561 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 562 """ 563 done = False 564 while not done: 565 try: 566 results = self.list_paginated(**kwargs) 567 except Exception as e: 568 raise e 569 570 if len(results.vectors) > 0: 571 yield [v.id for v in results.vectors] 572 573 if results.pagination and results.pagination.next: 574 kwargs.update({"pagination_token": results.pagination.next}) 575 else: 576 done = True 577 578 def describe_index_stats( 579 self, 580 filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, 581 **kwargs, 582 ) -> DescribeIndexStatsResponse: 583 """ 584 The DescribeIndexStats operation returns statistics about the index's contents. 585 For example: The vector count per namespace and the number of dimensions. 586 587 Examples: 588 >>> index.describe_index_stats() 589 >>> index.describe_index_stats(filter={'key': 'value'}) 590 591 Args: 592 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 593 If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. 594 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 595 596 Returns: DescribeIndexStatsResponse object which contains stats about the index. 597 """ 598 if filter is not None: 599 filter_struct = dict_to_proto_struct(filter) 600 else: 601 filter_struct = None 602 args_dict = self._parse_non_empty_args([("filter", filter_struct)]) 603 timeout = kwargs.pop("timeout", None) 604 605 request = DescribeIndexStatsRequest(**args_dict) 606 response = self._wrap_grpc_call(self.stub.DescribeIndexStats, request, timeout=timeout) 607 json_response = json_format.MessageToDict(response) 608 return parse_stats_response(json_response) 609 610 @staticmethod 611 def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]: 612 return {arg_name: val for arg_name, val in args if val is not None} 613 614 @staticmethod 615 def _parse_sparse_values_arg( 616 sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] 617 ) -> Optional[GRPCSparseValues]: 618 if sparse_values is None: 619 return None 620 621 if isinstance(sparse_values, GRPCSparseValues): 622 return sparse_values 623 624 if not isinstance(sparse_values, dict) or "indices" not in sparse_values or "values" not in sparse_values: 625 raise ValueError( 626 "Invalid sparse values argument. Expected a dict of: {'indices': List[int], 'values': List[float]}." 627 f"Received: {sparse_values}" 628 ) 629 630 return GRPCSparseValues(indices=sparse_values["indices"], values=sparse_values["values"])
58class GRPCIndex(GRPCIndexBase): 59 """A client for interacting with a Pinecone index via GRPC API.""" 60 61 @property 62 def stub_class(self): 63 return VectorServiceStub 64 65 def upsert( 66 self, 67 vectors: Union[List[GRPCVector], List[NonGRPCVector], List[tuple], List[dict]], 68 async_req: bool = False, 69 namespace: Optional[str] = None, 70 batch_size: Optional[int] = None, 71 show_progress: bool = True, 72 **kwargs, 73 ) -> Union[UpsertResponse, PineconeGrpcFuture]: 74 """ 75 The upsert operation writes vectors into a namespace. 76 If a new value is upserted for an existing vector id, it will overwrite the previous value. 77 78 Examples: 79 >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}), 80 ('id2', [1.0, 2.0, 3.0]) 81 ], 82 namespace='ns1', async_req=True) 83 >>> index.upsert([{'id': 'id1', 'values': [1.0, 2.0, 3.0], 'metadata': {'key': 'value'}}, 84 {'id': 'id2', 85 'values': [1.0, 2.0, 3.0], 86 'sparse_values': {'indices': [1, 8], 'values': [0.2, 0.4]}, 87 ]) 88 >>> index.upsert([GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), 89 GRPCVector(id='id2', values=[1.0, 2.0, 3.0]), 90 GRPCVector(id='id3', 91 values=[1.0, 2.0, 3.0], 92 sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))]) 93 94 Args: 95 vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert. 96 97 A vector can be represented by a 1) GRPCVector object, a 2) tuple or 3) a dictionary 98 1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values). 99 where id is a string, vector is a list of floats, and metadata is a dict. 100 Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0]) 101 102 2) if a GRPCVector object is used, a GRPCVector object must be of the form 103 GRPCVector(id, values, metadata), where metadata is an optional argument of type 104 Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]] 105 Examples: GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), 106 GRPCVector(id='id2', values=[1.0, 2.0, 3.0]), 107 GRPCVector(id='id3', 108 values=[1.0, 2.0, 3.0], 109 sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4])) 110 111 3) if a dictionary is used, it must be in the form 112 {'id': str, 'values': List[float], 'sparse_values': {'indices': List[int], 'values': List[float]}, 113 'metadata': dict} 114 115 Note: the dimension of each vector must match the dimension of the index. 116 async_req (bool): If True, the upsert operation will be performed asynchronously. 117 Cannot be used with batch_size. 118 Defaults to False. See: https://docs.pinecone.io/docs/performance-tuning [optional] 119 namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] 120 batch_size (int): The number of vectors to upsert in each batch. 121 Cannot be used with async_req=True. 122 If not specified, all vectors will be upserted in a single batch. [optional] 123 show_progress (bool): Whether to show a progress bar using tqdm. 124 Applied only if batch_size is provided. Default is True. 125 126 Returns: UpsertResponse, contains the number of vectors upserted 127 """ 128 if async_req and batch_size is not None: 129 raise ValueError( 130 "async_req is not supported when batch_size is provided." 131 "To upsert in parallel, please follow: " 132 "https://docs.pinecone.io/docs/performance-tuning" 133 ) 134 135 timeout = kwargs.pop("timeout", None) 136 137 vectors = list(map(VectorFactoryGRPC.build, vectors)) 138 if async_req: 139 args_dict = self._parse_non_empty_args([("namespace", namespace)]) 140 request = UpsertRequest(vectors=vectors, **args_dict, **kwargs) 141 future = self._wrap_grpc_call(self.stub.Upsert.future, request, timeout=timeout) 142 return PineconeGrpcFuture(future) 143 144 if batch_size is None: 145 return self._upsert_batch(vectors, namespace, timeout=timeout, **kwargs) 146 147 if not isinstance(batch_size, int) or batch_size <= 0: 148 raise ValueError("batch_size must be a positive integer") 149 150 pbar = tqdm( 151 total=len(vectors), 152 disable=not show_progress, 153 desc="Upserted vectors", 154 ) 155 total_upserted = 0 156 for i in range(0, len(vectors), batch_size): 157 batch_result = self._upsert_batch( 158 vectors[i : i + batch_size], 159 namespace, 160 timeout=timeout, 161 **kwargs, 162 ) 163 pbar.update(batch_result.upserted_count) 164 # we can't use here pbar.n for the case show_progress=False 165 total_upserted += batch_result.upserted_count 166 167 return UpsertResponse(upserted_count=total_upserted) 168 169 def _upsert_batch( 170 self, 171 vectors: List[GRPCVector], 172 namespace: Optional[str], 173 timeout: Optional[float], 174 **kwargs, 175 ) -> UpsertResponse: 176 args_dict = self._parse_non_empty_args([("namespace", namespace)]) 177 request = UpsertRequest(vectors=vectors, **args_dict) 178 return self._wrap_grpc_call(self.stub.Upsert, request, timeout=timeout, **kwargs) 179 180 def upsert_from_dataframe( 181 self, 182 df, 183 namespace: str = "", 184 batch_size: int = 500, 185 use_async_requests: bool = True, 186 show_progress: bool = True, 187 ) -> UpsertResponse: 188 """Upserts a dataframe into the index. 189 190 Args: 191 df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata. 192 namespace: The namespace to upsert into. 193 batch_size: The number of rows to upsert in a single batch. 194 use_async_requests: Whether to upsert multiple requests at the same time using asynchronous request mechanism. 195 Set to `False` 196 show_progress: Whether to show a progress bar. 197 """ 198 try: 199 import pandas as pd 200 except ImportError: 201 raise RuntimeError( 202 "The `pandas` package is not installed. Please install pandas to use `upsert_from_dataframe()`" 203 ) 204 205 if not isinstance(df, pd.DataFrame): 206 raise ValueError(f"Only pandas dataframes are supported. Found: {type(df)}") 207 208 pbar = tqdm( 209 total=len(df), 210 disable=not show_progress, 211 desc="sending upsert requests", 212 ) 213 results = [] 214 for chunk in self._iter_dataframe(df, batch_size=batch_size): 215 res = self.upsert( 216 vectors=chunk, 217 namespace=namespace, 218 async_req=use_async_requests, 219 ) 220 pbar.update(len(chunk)) 221 results.append(res) 222 223 if use_async_requests: 224 cast_results = cast(List[PineconeGrpcFuture], results) 225 results = [ 226 async_result.result() 227 for async_result in tqdm( 228 cast_results, 229 disable=not show_progress, 230 desc="collecting async responses", 231 ) 232 ] 233 234 upserted_count = 0 235 for res in results: 236 if hasattr(res, "upserted_count") and isinstance(res.upserted_count, int): 237 upserted_count += res.upserted_count 238 239 return UpsertResponse(upserted_count=upserted_count) 240 241 @staticmethod 242 def _iter_dataframe(df, batch_size): 243 for i in range(0, len(df), batch_size): 244 batch = df.iloc[i : i + batch_size].to_dict(orient="records") 245 yield batch 246 247 def delete( 248 self, 249 ids: Optional[List[str]] = None, 250 delete_all: Optional[bool] = None, 251 namespace: Optional[str] = None, 252 filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, 253 async_req: bool = False, 254 **kwargs, 255 ) -> Union[DeleteResponse, PineconeGrpcFuture]: 256 """ 257 The Delete operation deletes vectors from the index, from a single namespace. 258 No error raised if the vector id does not exist. 259 Note: for any delete call, if namespace is not specified, the default namespace is used. 260 261 Delete can occur in the following mutual exclusive ways: 262 1. Delete by ids from a single namespace 263 2. Delete all vectors from a single namespace by setting delete_all to True 264 3. Delete all vectors from a single namespace by specifying a metadata filter 265 (note that for this option delete all must be set to False) 266 267 Examples: 268 >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace') 269 >>> index.delete(delete_all=True, namespace='my_namespace') 270 >>> index.delete(filter={'key': 'value'}, namespace='my_namespace', async_req=True) 271 272 Args: 273 ids (List[str]): Vector ids to delete [optional] 274 delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] 275 Default is False. 276 namespace (str): The namespace to delete vectors from [optional] 277 If not specified, the default namespace is used. 278 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 279 If specified, the metadata filter here will be used to select the vectors to delete. 280 This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. 281 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 282 async_req (bool): If True, the delete operation will be performed asynchronously. 283 Defaults to False. [optional] 284 285 Returns: DeleteResponse (contains no data) or a PineconeGrpcFuture object if async_req is True. 286 """ 287 288 if filter is not None: 289 filter_struct = dict_to_proto_struct(filter) 290 else: 291 filter_struct = None 292 293 args_dict = self._parse_non_empty_args( 294 [ 295 ("ids", ids), 296 ("delete_all", delete_all), 297 ("namespace", namespace), 298 ("filter", filter_struct), 299 ] 300 ) 301 timeout = kwargs.pop("timeout", None) 302 303 request = DeleteRequest(**args_dict, **kwargs) 304 if async_req: 305 future = self._wrap_grpc_call(self.stub.Delete.future, request, timeout=timeout) 306 return PineconeGrpcFuture(future) 307 else: 308 return self._wrap_grpc_call(self.stub.Delete, request, timeout=timeout) 309 310 def fetch( 311 self, 312 ids: Optional[List[str]], 313 namespace: Optional[str] = None, 314 **kwargs, 315 ) -> FetchResponse: 316 """ 317 The fetch operation looks up and returns vectors, by ID, from a single namespace. 318 The returned vectors include the vector data and/or metadata. 319 320 Examples: 321 >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace') 322 >>> index.fetch(ids=['id1', 'id2']) 323 324 Args: 325 ids (List[str]): The vector IDs to fetch. 326 namespace (str): The namespace to fetch vectors from. 327 If not specified, the default namespace is used. [optional] 328 329 Returns: FetchResponse object which contains the list of Vector objects, and namespace name. 330 """ 331 timeout = kwargs.pop("timeout", None) 332 333 args_dict = self._parse_non_empty_args([("namespace", namespace)]) 334 335 request = FetchRequest(ids=ids, **args_dict, **kwargs) 336 response = self._wrap_grpc_call(self.stub.Fetch, request, timeout=timeout) 337 json_response = json_format.MessageToDict(response) 338 return parse_fetch_response(json_response) 339 340 def query( 341 self, 342 vector: Optional[List[float]] = None, 343 id: Optional[str] = None, 344 namespace: Optional[str] = None, 345 top_k: Optional[int] = None, 346 filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, 347 include_values: Optional[bool] = None, 348 include_metadata: Optional[bool] = None, 349 sparse_vector: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None, 350 **kwargs, 351 ) -> QueryResponse: 352 """ 353 The Query operation searches a namespace, using a query vector. 354 It retrieves the ids of the most similar items in a namespace, along with their similarity scores. 355 356 Examples: 357 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') 358 >>> index.query(id='id1', top_k=10, namespace='my_namespace') 359 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) 360 >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) 361 >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, 362 >>> top_k=10, namespace='my_namespace') 363 >>> index.query(vector=[1, 2, 3], sparse_vector=GRPCSparseValues([1, 2], [0.2, 0.4]), 364 >>> top_k=10, namespace='my_namespace') 365 366 Args: 367 vector (List[float]): The query vector. This should be the same length as the dimension of the index 368 being queried. Each `query()` request can contain only one of the parameters 369 `id` or `vector`.. [optional] 370 id (str): The unique ID of the vector to be used as a query vector. 371 Each `query()` request can contain only one of the parameters 372 `vector` or `id`.. [optional] 373 top_k (int): The number of results to return for each query. Must be an integer greater than 1. 374 namespace (str): The namespace to fetch vectors from. 375 If not specified, the default namespace is used. [optional] 376 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 377 The filter to apply. You can use vector metadata to limit your search. 378 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 379 include_values (bool): Indicates whether vector values are included in the response. 380 If omitted the server will use the default value of False [optional] 381 include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. 382 If omitted the server will use the default value of False [optional] 383 sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. 384 Expected to be either a GRPCSparseValues object or a dict of the form: 385 {'indices': List[int], 'values': List[float]}, where the lists each have the same length. 386 387 Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, 388 and namespace name. 389 """ 390 391 if vector is not None and id is not None: 392 raise ValueError("Cannot specify both `id` and `vector`") 393 394 if filter is not None: 395 filter_struct = dict_to_proto_struct(filter) 396 else: 397 filter_struct = None 398 399 sparse_vector = self._parse_sparse_values_arg(sparse_vector) 400 args_dict = self._parse_non_empty_args( 401 [ 402 ("vector", vector), 403 ("id", id), 404 ("namespace", namespace), 405 ("top_k", top_k), 406 ("filter", filter_struct), 407 ("include_values", include_values), 408 ("include_metadata", include_metadata), 409 ("sparse_vector", sparse_vector), 410 ] 411 ) 412 413 request = QueryRequest(**args_dict) 414 415 timeout = kwargs.pop("timeout", None) 416 response = self._wrap_grpc_call(self.stub.Query, request, timeout=timeout) 417 json_response = json_format.MessageToDict(response) 418 return parse_query_response(json_response, _check_type=False) 419 420 def update( 421 self, 422 id: str, 423 async_req: bool = False, 424 values: Optional[List[float]] = None, 425 set_metadata: Optional[ 426 Dict[ 427 str, 428 Union[str, float, int, bool, List[int], List[float], List[str]], 429 ] 430 ] = None, 431 namespace: Optional[str] = None, 432 sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None, 433 **kwargs, 434 ) -> Union[UpdateResponse, PineconeGrpcFuture]: 435 """ 436 The Update operation updates vector in a namespace. 437 If a value is included, it will overwrite the previous value. 438 If a set_metadata is included, 439 the values of the fields specified in it will be added or overwrite the previous value. 440 441 Examples: 442 >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') 443 >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace', async_req=True) 444 >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, 445 >>> namespace='my_namespace') 446 >>> index.update(id='id1', values=[1, 2, 3], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]), 447 >>> namespace='my_namespace') 448 449 Args: 450 id (str): Vector's unique id. 451 async_req (bool): If True, the update operation will be performed asynchronously. 452 Defaults to False. [optional] 453 values (List[float]): vector values to set. [optional] 454 set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): 455 metadata to set for vector. [optional] 456 namespace (str): Namespace name where to update the vector.. [optional] 457 sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. 458 Expected to be either a GRPCSparseValues object or a dict of the form: 459 {'indices': List[int], 'values': List[float]} where the lists each have the same length. 460 461 462 Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True. 463 """ 464 if set_metadata is not None: 465 set_metadata_struct = dict_to_proto_struct(set_metadata) 466 else: 467 set_metadata_struct = None 468 469 timeout = kwargs.pop("timeout", None) 470 sparse_values = self._parse_sparse_values_arg(sparse_values) 471 args_dict = self._parse_non_empty_args( 472 [ 473 ("values", values), 474 ("set_metadata", set_metadata_struct), 475 ("namespace", namespace), 476 ("sparse_values", sparse_values), 477 ] 478 ) 479 480 request = UpdateRequest(id=id, **args_dict) 481 if async_req: 482 future = self._wrap_grpc_call(self.stub.Update.future, request, timeout=timeout) 483 return PineconeGrpcFuture(future) 484 else: 485 return self._wrap_grpc_call(self.stub.Update, request, timeout=timeout) 486 487 def list_paginated( 488 self, 489 prefix: Optional[str] = None, 490 limit: Optional[int] = None, 491 pagination_token: Optional[str] = None, 492 namespace: Optional[str] = None, 493 **kwargs, 494 ) -> SimpleListResponse: 495 """ 496 The list_paginated operation finds vectors based on an id prefix within a single namespace. 497 It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. 498 This id list can then be passed to fetch or delete operations, depending on your use case. 499 500 Consider using the `list` method to avoid having to handle pagination tokens manually. 501 502 Examples: 503 >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') 504 >>> [v.id for v in results.vectors] 505 ['99', '990', '991', '992', '993'] 506 >>> results.pagination.next 507 eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 508 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next) 509 510 Args: 511 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 512 be used with the effect of listing all ids in a namespace [optional] 513 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 514 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 515 in the response if additional results are available. [optional] 516 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 517 518 Returns: SimpleListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed. 519 """ 520 args_dict = self._parse_non_empty_args( 521 [ 522 ("prefix", prefix), 523 ("limit", limit), 524 ("namespace", namespace), 525 ("pagination_token", pagination_token), 526 ] 527 ) 528 request = ListRequest(**args_dict, **kwargs) 529 timeout = kwargs.pop("timeout", None) 530 response = self._wrap_grpc_call(self.stub.List, request, timeout=timeout) 531 532 if response.pagination and response.pagination.next != "": 533 pagination = Pagination(next=response.pagination.next) 534 else: 535 pagination = None 536 537 return SimpleListResponse( 538 namespace=response.namespace, 539 vectors=response.vectors, 540 pagination=pagination, 541 ) 542 543 def list(self, **kwargs): 544 """ 545 The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields 546 a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your 547 behalf. 548 549 Examples: 550 >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): 551 >>> print(ids) 552 ['99', '990', '991', '992', '993'] 553 ['994', '995', '996', '997', '998'] 554 ['999'] 555 556 Args: 557 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 558 be used with the effect of listing all ids in a namespace [optional] 559 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 560 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 561 in the response if additional results are available. [optional] 562 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 563 """ 564 done = False 565 while not done: 566 try: 567 results = self.list_paginated(**kwargs) 568 except Exception as e: 569 raise e 570 571 if len(results.vectors) > 0: 572 yield [v.id for v in results.vectors] 573 574 if results.pagination and results.pagination.next: 575 kwargs.update({"pagination_token": results.pagination.next}) 576 else: 577 done = True 578 579 def describe_index_stats( 580 self, 581 filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, 582 **kwargs, 583 ) -> DescribeIndexStatsResponse: 584 """ 585 The DescribeIndexStats operation returns statistics about the index's contents. 586 For example: The vector count per namespace and the number of dimensions. 587 588 Examples: 589 >>> index.describe_index_stats() 590 >>> index.describe_index_stats(filter={'key': 'value'}) 591 592 Args: 593 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 594 If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. 595 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 596 597 Returns: DescribeIndexStatsResponse object which contains stats about the index. 598 """ 599 if filter is not None: 600 filter_struct = dict_to_proto_struct(filter) 601 else: 602 filter_struct = None 603 args_dict = self._parse_non_empty_args([("filter", filter_struct)]) 604 timeout = kwargs.pop("timeout", None) 605 606 request = DescribeIndexStatsRequest(**args_dict) 607 response = self._wrap_grpc_call(self.stub.DescribeIndexStats, request, timeout=timeout) 608 json_response = json_format.MessageToDict(response) 609 return parse_stats_response(json_response) 610 611 @staticmethod 612 def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]: 613 return {arg_name: val for arg_name, val in args if val is not None} 614 615 @staticmethod 616 def _parse_sparse_values_arg( 617 sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] 618 ) -> Optional[GRPCSparseValues]: 619 if sparse_values is None: 620 return None 621 622 if isinstance(sparse_values, GRPCSparseValues): 623 return sparse_values 624 625 if not isinstance(sparse_values, dict) or "indices" not in sparse_values or "values" not in sparse_values: 626 raise ValueError( 627 "Invalid sparse values argument. Expected a dict of: {'indices': List[int], 'values': List[float]}." 628 f"Received: {sparse_values}" 629 ) 630 631 return GRPCSparseValues(indices=sparse_values["indices"], values=sparse_values["values"])
A client for interacting with a Pinecone index via GRPC API.
65 def upsert( 66 self, 67 vectors: Union[List[GRPCVector], List[NonGRPCVector], List[tuple], List[dict]], 68 async_req: bool = False, 69 namespace: Optional[str] = None, 70 batch_size: Optional[int] = None, 71 show_progress: bool = True, 72 **kwargs, 73 ) -> Union[UpsertResponse, PineconeGrpcFuture]: 74 """ 75 The upsert operation writes vectors into a namespace. 76 If a new value is upserted for an existing vector id, it will overwrite the previous value. 77 78 Examples: 79 >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}), 80 ('id2', [1.0, 2.0, 3.0]) 81 ], 82 namespace='ns1', async_req=True) 83 >>> index.upsert([{'id': 'id1', 'values': [1.0, 2.0, 3.0], 'metadata': {'key': 'value'}}, 84 {'id': 'id2', 85 'values': [1.0, 2.0, 3.0], 86 'sparse_values': {'indices': [1, 8], 'values': [0.2, 0.4]}, 87 ]) 88 >>> index.upsert([GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), 89 GRPCVector(id='id2', values=[1.0, 2.0, 3.0]), 90 GRPCVector(id='id3', 91 values=[1.0, 2.0, 3.0], 92 sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))]) 93 94 Args: 95 vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert. 96 97 A vector can be represented by a 1) GRPCVector object, a 2) tuple or 3) a dictionary 98 1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values). 99 where id is a string, vector is a list of floats, and metadata is a dict. 100 Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0]) 101 102 2) if a GRPCVector object is used, a GRPCVector object must be of the form 103 GRPCVector(id, values, metadata), where metadata is an optional argument of type 104 Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]] 105 Examples: GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), 106 GRPCVector(id='id2', values=[1.0, 2.0, 3.0]), 107 GRPCVector(id='id3', 108 values=[1.0, 2.0, 3.0], 109 sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4])) 110 111 3) if a dictionary is used, it must be in the form 112 {'id': str, 'values': List[float], 'sparse_values': {'indices': List[int], 'values': List[float]}, 113 'metadata': dict} 114 115 Note: the dimension of each vector must match the dimension of the index. 116 async_req (bool): If True, the upsert operation will be performed asynchronously. 117 Cannot be used with batch_size. 118 Defaults to False. See: https://docs.pinecone.io/docs/performance-tuning [optional] 119 namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] 120 batch_size (int): The number of vectors to upsert in each batch. 121 Cannot be used with async_req=True. 122 If not specified, all vectors will be upserted in a single batch. [optional] 123 show_progress (bool): Whether to show a progress bar using tqdm. 124 Applied only if batch_size is provided. Default is True. 125 126 Returns: UpsertResponse, contains the number of vectors upserted 127 """ 128 if async_req and batch_size is not None: 129 raise ValueError( 130 "async_req is not supported when batch_size is provided." 131 "To upsert in parallel, please follow: " 132 "https://docs.pinecone.io/docs/performance-tuning" 133 ) 134 135 timeout = kwargs.pop("timeout", None) 136 137 vectors = list(map(VectorFactoryGRPC.build, vectors)) 138 if async_req: 139 args_dict = self._parse_non_empty_args([("namespace", namespace)]) 140 request = UpsertRequest(vectors=vectors, **args_dict, **kwargs) 141 future = self._wrap_grpc_call(self.stub.Upsert.future, request, timeout=timeout) 142 return PineconeGrpcFuture(future) 143 144 if batch_size is None: 145 return self._upsert_batch(vectors, namespace, timeout=timeout, **kwargs) 146 147 if not isinstance(batch_size, int) or batch_size <= 0: 148 raise ValueError("batch_size must be a positive integer") 149 150 pbar = tqdm( 151 total=len(vectors), 152 disable=not show_progress, 153 desc="Upserted vectors", 154 ) 155 total_upserted = 0 156 for i in range(0, len(vectors), batch_size): 157 batch_result = self._upsert_batch( 158 vectors[i : i + batch_size], 159 namespace, 160 timeout=timeout, 161 **kwargs, 162 ) 163 pbar.update(batch_result.upserted_count) 164 # we can't use here pbar.n for the case show_progress=False 165 total_upserted += batch_result.upserted_count 166 167 return UpsertResponse(upserted_count=total_upserted)
The upsert operation writes vectors into a namespace. If a new value is upserted for an existing vector id, it will overwrite the previous value.
Examples:
>>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0]) ], namespace='ns1', async_req=True) >>> index.upsert([{'id': 'id1', 'values': [1.0, 2.0, 3.0], 'metadata': {'key': 'value'}}, {'id': 'id2', 'values': [1.0, 2.0, 3.0], 'sparse_values': {'indices': [1, 8], 'values': [0.2, 0.4]}, ]) >>> index.upsert([GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), GRPCVector(id='id2', values=[1.0, 2.0, 3.0]), GRPCVector(id='id3', values=[1.0, 2.0, 3.0], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))])
Arguments:
vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert.
A vector can be represented by a 1) GRPCVector object, a 2) tuple or 3) a dictionary 1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values). where id is a string, vector is a list of floats, and metadata is a dict. Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])
2) if a GRPCVector object is used, a GRPCVector object must be of the form GRPCVector(id, values, metadata), where metadata is an optional argument of type Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]] Examples: GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), GRPCVector(id='id2', values=[1.0, 2.0, 3.0]), GRPCVector(id='id3', values=[1.0, 2.0, 3.0], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]))
3) if a dictionary is used, it must be in the form {'id': str, 'values': List[float], 'sparse_values': {'indices': List[int], 'values': List[float]}, 'metadata': dict}
Note: the dimension of each vector must match the dimension of the index.
- async_req (bool): If True, the upsert operation will be performed asynchronously. Cannot be used with batch_size. Defaults to False. See: pinecone.grpc.pinecone.io/docs/performance-tuning">https://docspinecone.grpc.pinecone.io/docs/performance-tuning [optional]
- namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional]
- batch_size (int): The number of vectors to upsert in each batch. Cannot be used with async_req=True. If not specified, all vectors will be upserted in a single batch. [optional]
- show_progress (bool): Whether to show a progress bar using tqdm. Applied only if batch_size is provided. Default is True.
Returns: UpsertResponse, contains the number of vectors upserted
180 def upsert_from_dataframe( 181 self, 182 df, 183 namespace: str = "", 184 batch_size: int = 500, 185 use_async_requests: bool = True, 186 show_progress: bool = True, 187 ) -> UpsertResponse: 188 """Upserts a dataframe into the index. 189 190 Args: 191 df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata. 192 namespace: The namespace to upsert into. 193 batch_size: The number of rows to upsert in a single batch. 194 use_async_requests: Whether to upsert multiple requests at the same time using asynchronous request mechanism. 195 Set to `False` 196 show_progress: Whether to show a progress bar. 197 """ 198 try: 199 import pandas as pd 200 except ImportError: 201 raise RuntimeError( 202 "The `pandas` package is not installed. Please install pandas to use `upsert_from_dataframe()`" 203 ) 204 205 if not isinstance(df, pd.DataFrame): 206 raise ValueError(f"Only pandas dataframes are supported. Found: {type(df)}") 207 208 pbar = tqdm( 209 total=len(df), 210 disable=not show_progress, 211 desc="sending upsert requests", 212 ) 213 results = [] 214 for chunk in self._iter_dataframe(df, batch_size=batch_size): 215 res = self.upsert( 216 vectors=chunk, 217 namespace=namespace, 218 async_req=use_async_requests, 219 ) 220 pbar.update(len(chunk)) 221 results.append(res) 222 223 if use_async_requests: 224 cast_results = cast(List[PineconeGrpcFuture], results) 225 results = [ 226 async_result.result() 227 for async_result in tqdm( 228 cast_results, 229 disable=not show_progress, 230 desc="collecting async responses", 231 ) 232 ] 233 234 upserted_count = 0 235 for res in results: 236 if hasattr(res, "upserted_count") and isinstance(res.upserted_count, int): 237 upserted_count += res.upserted_count 238 239 return UpsertResponse(upserted_count=upserted_count)
Upserts a dataframe into the index.
Arguments:
- df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata.
- namespace: The namespace to upsert into.
- batch_size: The number of rows to upsert in a single batch.
- use_async_requests: Whether to upsert multiple requests at the same time using asynchronous request mechanism.
Set to
False
- show_progress: Whether to show a progress bar.
247 def delete( 248 self, 249 ids: Optional[List[str]] = None, 250 delete_all: Optional[bool] = None, 251 namespace: Optional[str] = None, 252 filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, 253 async_req: bool = False, 254 **kwargs, 255 ) -> Union[DeleteResponse, PineconeGrpcFuture]: 256 """ 257 The Delete operation deletes vectors from the index, from a single namespace. 258 No error raised if the vector id does not exist. 259 Note: for any delete call, if namespace is not specified, the default namespace is used. 260 261 Delete can occur in the following mutual exclusive ways: 262 1. Delete by ids from a single namespace 263 2. Delete all vectors from a single namespace by setting delete_all to True 264 3. Delete all vectors from a single namespace by specifying a metadata filter 265 (note that for this option delete all must be set to False) 266 267 Examples: 268 >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace') 269 >>> index.delete(delete_all=True, namespace='my_namespace') 270 >>> index.delete(filter={'key': 'value'}, namespace='my_namespace', async_req=True) 271 272 Args: 273 ids (List[str]): Vector ids to delete [optional] 274 delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] 275 Default is False. 276 namespace (str): The namespace to delete vectors from [optional] 277 If not specified, the default namespace is used. 278 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 279 If specified, the metadata filter here will be used to select the vectors to delete. 280 This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. 281 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 282 async_req (bool): If True, the delete operation will be performed asynchronously. 283 Defaults to False. [optional] 284 285 Returns: DeleteResponse (contains no data) or a PineconeGrpcFuture object if async_req is True. 286 """ 287 288 if filter is not None: 289 filter_struct = dict_to_proto_struct(filter) 290 else: 291 filter_struct = None 292 293 args_dict = self._parse_non_empty_args( 294 [ 295 ("ids", ids), 296 ("delete_all", delete_all), 297 ("namespace", namespace), 298 ("filter", filter_struct), 299 ] 300 ) 301 timeout = kwargs.pop("timeout", None) 302 303 request = DeleteRequest(**args_dict, **kwargs) 304 if async_req: 305 future = self._wrap_grpc_call(self.stub.Delete.future, request, timeout=timeout) 306 return PineconeGrpcFuture(future) 307 else: 308 return self._wrap_grpc_call(self.stub.Delete, request, timeout=timeout)
The Delete operation deletes vectors from the index, from a single namespace. No error raised if the vector id does not exist. Note: for any delete call, if namespace is not specified, the default namespace is used.
Delete can occur in the following mutual exclusive ways:
- Delete by ids from a single namespace
- Delete all vectors from a single namespace by setting delete_all to True
- Delete all vectors from a single namespace by specifying a metadata filter (note that for this option delete all must be set to False)
Examples:
>>> index.delete(ids=['id1', 'id2'], namespace='my_namespace') >>> index.delete(delete_all=True, namespace='my_namespace') >>> index.delete(filter={'key': 'value'}, namespace='my_namespace', async_req=True)
Arguments:
- ids (List[str]): Vector ids to delete [optional]
- delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] Default is False.
- namespace (str): The namespace to delete vectors from [optional] If not specified, the default namespace is used.
- filter (Dict[str, Union[str, float, int, bool, List, dict]]): If specified, the metadata filter here will be used to select the vectors to delete. This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. See https://wwwpinecone.grpc.pinecone.io/docs/metadata-filtering/.. [optional]
- async_req (bool): If True, the delete operation will be performed asynchronously. Defaults to False. [optional]
Returns: DeleteResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.
310 def fetch( 311 self, 312 ids: Optional[List[str]], 313 namespace: Optional[str] = None, 314 **kwargs, 315 ) -> FetchResponse: 316 """ 317 The fetch operation looks up and returns vectors, by ID, from a single namespace. 318 The returned vectors include the vector data and/or metadata. 319 320 Examples: 321 >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace') 322 >>> index.fetch(ids=['id1', 'id2']) 323 324 Args: 325 ids (List[str]): The vector IDs to fetch. 326 namespace (str): The namespace to fetch vectors from. 327 If not specified, the default namespace is used. [optional] 328 329 Returns: FetchResponse object which contains the list of Vector objects, and namespace name. 330 """ 331 timeout = kwargs.pop("timeout", None) 332 333 args_dict = self._parse_non_empty_args([("namespace", namespace)]) 334 335 request = FetchRequest(ids=ids, **args_dict, **kwargs) 336 response = self._wrap_grpc_call(self.stub.Fetch, request, timeout=timeout) 337 json_response = json_format.MessageToDict(response) 338 return parse_fetch_response(json_response)
The fetch operation looks up and returns vectors, by ID, from a single namespace. The returned vectors include the vector data and/or metadata.
Examples:
>>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace') >>> index.fetch(ids=['id1', 'id2'])
Arguments:
- ids (List[str]): The vector IDs to fetch.
- namespace (str): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
Returns: FetchResponse object which contains the list of Vector objects, and namespace name.
340 def query( 341 self, 342 vector: Optional[List[float]] = None, 343 id: Optional[str] = None, 344 namespace: Optional[str] = None, 345 top_k: Optional[int] = None, 346 filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, 347 include_values: Optional[bool] = None, 348 include_metadata: Optional[bool] = None, 349 sparse_vector: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None, 350 **kwargs, 351 ) -> QueryResponse: 352 """ 353 The Query operation searches a namespace, using a query vector. 354 It retrieves the ids of the most similar items in a namespace, along with their similarity scores. 355 356 Examples: 357 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') 358 >>> index.query(id='id1', top_k=10, namespace='my_namespace') 359 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) 360 >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) 361 >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, 362 >>> top_k=10, namespace='my_namespace') 363 >>> index.query(vector=[1, 2, 3], sparse_vector=GRPCSparseValues([1, 2], [0.2, 0.4]), 364 >>> top_k=10, namespace='my_namespace') 365 366 Args: 367 vector (List[float]): The query vector. This should be the same length as the dimension of the index 368 being queried. Each `query()` request can contain only one of the parameters 369 `id` or `vector`.. [optional] 370 id (str): The unique ID of the vector to be used as a query vector. 371 Each `query()` request can contain only one of the parameters 372 `vector` or `id`.. [optional] 373 top_k (int): The number of results to return for each query. Must be an integer greater than 1. 374 namespace (str): The namespace to fetch vectors from. 375 If not specified, the default namespace is used. [optional] 376 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 377 The filter to apply. You can use vector metadata to limit your search. 378 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 379 include_values (bool): Indicates whether vector values are included in the response. 380 If omitted the server will use the default value of False [optional] 381 include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. 382 If omitted the server will use the default value of False [optional] 383 sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. 384 Expected to be either a GRPCSparseValues object or a dict of the form: 385 {'indices': List[int], 'values': List[float]}, where the lists each have the same length. 386 387 Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, 388 and namespace name. 389 """ 390 391 if vector is not None and id is not None: 392 raise ValueError("Cannot specify both `id` and `vector`") 393 394 if filter is not None: 395 filter_struct = dict_to_proto_struct(filter) 396 else: 397 filter_struct = None 398 399 sparse_vector = self._parse_sparse_values_arg(sparse_vector) 400 args_dict = self._parse_non_empty_args( 401 [ 402 ("vector", vector), 403 ("id", id), 404 ("namespace", namespace), 405 ("top_k", top_k), 406 ("filter", filter_struct), 407 ("include_values", include_values), 408 ("include_metadata", include_metadata), 409 ("sparse_vector", sparse_vector), 410 ] 411 ) 412 413 request = QueryRequest(**args_dict) 414 415 timeout = kwargs.pop("timeout", None) 416 response = self._wrap_grpc_call(self.stub.Query, request, timeout=timeout) 417 json_response = json_format.MessageToDict(response) 418 return parse_query_response(json_response, _check_type=False)
The Query operation searches a namespace, using a query vector. It retrieves the ids of the most similar items in a namespace, along with their similarity scores.
Examples:
>>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') >>> index.query(id='id1', top_k=10, namespace='my_namespace') >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, >>> top_k=10, namespace='my_namespace') >>> index.query(vector=[1, 2, 3], sparse_vector=GRPCSparseValues([1, 2], [0.2, 0.4]), >>> top_k=10, namespace='my_namespace')
Arguments:
- vector (List[float]): The query vector. This should be the same length as the dimension of the index
being queried. Each
query()
request can contain only one of the parametersid
orvector
.. [optional] - id (str): The unique ID of the vector to be used as a query vector.
Each
query()
request can contain only one of the parametersvector
orid
.. [optional] - top_k (int): The number of results to return for each query. Must be an integer greater than 1.
- namespace (str): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
- filter (Dict[str, Union[str, float, int, bool, List, dict]]): The filter to apply. You can use vector metadata to limit your search. See https://wwwpinecone.grpc.pinecone.io/docs/metadata-filtering/.. [optional]
- include_values (bool): Indicates whether vector values are included in the response. If omitted the server will use the default value of False [optional]
- include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. If omitted the server will use the default value of False [optional]
- sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. Expected to be either a GRPCSparseValues object or a dict of the form: {'indices': List[int], 'values': List[float]}, where the lists each have the same length.
Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, and namespace name.
420 def update( 421 self, 422 id: str, 423 async_req: bool = False, 424 values: Optional[List[float]] = None, 425 set_metadata: Optional[ 426 Dict[ 427 str, 428 Union[str, float, int, bool, List[int], List[float], List[str]], 429 ] 430 ] = None, 431 namespace: Optional[str] = None, 432 sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None, 433 **kwargs, 434 ) -> Union[UpdateResponse, PineconeGrpcFuture]: 435 """ 436 The Update operation updates vector in a namespace. 437 If a value is included, it will overwrite the previous value. 438 If a set_metadata is included, 439 the values of the fields specified in it will be added or overwrite the previous value. 440 441 Examples: 442 >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') 443 >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace', async_req=True) 444 >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, 445 >>> namespace='my_namespace') 446 >>> index.update(id='id1', values=[1, 2, 3], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]), 447 >>> namespace='my_namespace') 448 449 Args: 450 id (str): Vector's unique id. 451 async_req (bool): If True, the update operation will be performed asynchronously. 452 Defaults to False. [optional] 453 values (List[float]): vector values to set. [optional] 454 set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): 455 metadata to set for vector. [optional] 456 namespace (str): Namespace name where to update the vector.. [optional] 457 sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. 458 Expected to be either a GRPCSparseValues object or a dict of the form: 459 {'indices': List[int], 'values': List[float]} where the lists each have the same length. 460 461 462 Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True. 463 """ 464 if set_metadata is not None: 465 set_metadata_struct = dict_to_proto_struct(set_metadata) 466 else: 467 set_metadata_struct = None 468 469 timeout = kwargs.pop("timeout", None) 470 sparse_values = self._parse_sparse_values_arg(sparse_values) 471 args_dict = self._parse_non_empty_args( 472 [ 473 ("values", values), 474 ("set_metadata", set_metadata_struct), 475 ("namespace", namespace), 476 ("sparse_values", sparse_values), 477 ] 478 ) 479 480 request = UpdateRequest(id=id, **args_dict) 481 if async_req: 482 future = self._wrap_grpc_call(self.stub.Update.future, request, timeout=timeout) 483 return PineconeGrpcFuture(future) 484 else: 485 return self._wrap_grpc_call(self.stub.Update, request, timeout=timeout)
The Update operation updates vector in a namespace. If a value is included, it will overwrite the previous value. If a set_metadata is included, the values of the fields specified in it will be added or overwrite the previous value.
Examples:
>>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace', async_req=True) >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, >>> namespace='my_namespace') >>> index.update(id='id1', values=[1, 2, 3], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]), >>> namespace='my_namespace')
Arguments:
- id (str): Vector's unique id.
- async_req (bool): If True, the update operation will be performed asynchronously. Defaults to False. [optional]
- values (List[float]): vector values to set. [optional]
- set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): metadata to set for vector. [optional]
- namespace (str): Namespace name where to update the vector.. [optional]
- sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. Expected to be either a GRPCSparseValues object or a dict of the form: {'indices': List[int], 'values': List[float]} where the lists each have the same length.
Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.
487 def list_paginated( 488 self, 489 prefix: Optional[str] = None, 490 limit: Optional[int] = None, 491 pagination_token: Optional[str] = None, 492 namespace: Optional[str] = None, 493 **kwargs, 494 ) -> SimpleListResponse: 495 """ 496 The list_paginated operation finds vectors based on an id prefix within a single namespace. 497 It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. 498 This id list can then be passed to fetch or delete operations, depending on your use case. 499 500 Consider using the `list` method to avoid having to handle pagination tokens manually. 501 502 Examples: 503 >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') 504 >>> [v.id for v in results.vectors] 505 ['99', '990', '991', '992', '993'] 506 >>> results.pagination.next 507 eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 508 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next) 509 510 Args: 511 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 512 be used with the effect of listing all ids in a namespace [optional] 513 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 514 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 515 in the response if additional results are available. [optional] 516 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 517 518 Returns: SimpleListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed. 519 """ 520 args_dict = self._parse_non_empty_args( 521 [ 522 ("prefix", prefix), 523 ("limit", limit), 524 ("namespace", namespace), 525 ("pagination_token", pagination_token), 526 ] 527 ) 528 request = ListRequest(**args_dict, **kwargs) 529 timeout = kwargs.pop("timeout", None) 530 response = self._wrap_grpc_call(self.stub.List, request, timeout=timeout) 531 532 if response.pagination and response.pagination.next != "": 533 pagination = Pagination(next=response.pagination.next) 534 else: 535 pagination = None 536 537 return SimpleListResponse( 538 namespace=response.namespace, 539 vectors=response.vectors, 540 pagination=pagination, 541 )
The list_paginated operation finds vectors based on an id prefix within a single namespace. It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. This id list can then be passed to fetch or delete operations, depending on your use case.
Consider using the list
method to avoid having to handle pagination tokens manually.
Examples:
>>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') >>> [v.id for v in results.vectors] ['99', '990', '991', '992', '993'] >>> results.pagination.next eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next)
Arguments:
- prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will be used with the effect of listing all ids in a namespace [optional]
- limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
- pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned in the response if additional results are available. [optional]
- namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
Returns: SimpleListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed.
543 def list(self, **kwargs): 544 """ 545 The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields 546 a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your 547 behalf. 548 549 Examples: 550 >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): 551 >>> print(ids) 552 ['99', '990', '991', '992', '993'] 553 ['994', '995', '996', '997', '998'] 554 ['999'] 555 556 Args: 557 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 558 be used with the effect of listing all ids in a namespace [optional] 559 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 560 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 561 in the response if additional results are available. [optional] 562 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 563 """ 564 done = False 565 while not done: 566 try: 567 results = self.list_paginated(**kwargs) 568 except Exception as e: 569 raise e 570 571 if len(results.vectors) > 0: 572 yield [v.id for v in results.vectors] 573 574 if results.pagination and results.pagination.next: 575 kwargs.update({"pagination_token": results.pagination.next}) 576 else: 577 done = True
The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your behalf.
Examples:
>>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): >>> print(ids) ['99', '990', '991', '992', '993'] ['994', '995', '996', '997', '998'] ['999']
Arguments:
- prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will be used with the effect of listing all ids in a namespace [optional]
- limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
- pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned in the response if additional results are available. [optional]
- namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
579 def describe_index_stats( 580 self, 581 filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, 582 **kwargs, 583 ) -> DescribeIndexStatsResponse: 584 """ 585 The DescribeIndexStats operation returns statistics about the index's contents. 586 For example: The vector count per namespace and the number of dimensions. 587 588 Examples: 589 >>> index.describe_index_stats() 590 >>> index.describe_index_stats(filter={'key': 'value'}) 591 592 Args: 593 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 594 If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. 595 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 596 597 Returns: DescribeIndexStatsResponse object which contains stats about the index. 598 """ 599 if filter is not None: 600 filter_struct = dict_to_proto_struct(filter) 601 else: 602 filter_struct = None 603 args_dict = self._parse_non_empty_args([("filter", filter_struct)]) 604 timeout = kwargs.pop("timeout", None) 605 606 request = DescribeIndexStatsRequest(**args_dict) 607 response = self._wrap_grpc_call(self.stub.DescribeIndexStats, request, timeout=timeout) 608 json_response = json_format.MessageToDict(response) 609 return parse_stats_response(json_response)
The DescribeIndexStats operation returns statistics about the index's contents. For example: The vector count per namespace and the number of dimensions.
Examples:
>>> index.describe_index_stats() >>> index.describe_index_stats(filter={'key': 'value'})
Arguments:
- filter (Dict[str, Union[str, float, int, bool, List, dict]]):
- If this parameter is present, the operation only returns statistics for vectors that satisfy the filter.
- See https: //wwwpinecone.grpc.pinecone.io/docs/metadata-filtering/.. [optional]
Returns: DescribeIndexStatsResponse object which contains stats about the index.