pinecone .data .interfaces
1from abc import ABC, abstractmethod 2from typing import Union, List, Optional, Dict, Any 3 4from pinecone.core.openapi.db_data.models import ( 5 FetchResponse, 6 QueryResponse, 7 IndexDescription as DescribeIndexStatsResponse, 8 UpsertResponse, 9 Vector, 10 ListResponse, 11 SparseValues, 12 SearchRecordsResponse, 13) 14from .query_results_aggregator import QueryNamespacesResults 15from multiprocessing.pool import ApplyResult 16from .types import ( 17 VectorTypedDict, 18 SparseVectorTypedDict, 19 VectorMetadataTypedDict, 20 FilterTypedDict, 21 VectorTuple, 22 VectorTupleWithMetadata, 23 SearchQueryTypedDict, 24 SearchRerankTypedDict, 25) 26from .dataclasses import SearchQuery, SearchRerank 27 28 29class IndexInterface(ABC): 30 @abstractmethod 31 def upsert( 32 self, 33 vectors: Union[ 34 List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict] 35 ], 36 namespace: Optional[str] = None, 37 batch_size: Optional[int] = None, 38 show_progress: bool = True, 39 **kwargs, 40 ) -> UpsertResponse: 41 """ 42 Args: 43 vectors (Union[List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict]]): A list of vectors to upsert. 44 namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] 45 batch_size (int): The number of vectors to upsert in each batch. 46 If not specified, all vectors will be upserted in a single batch. [optional] 47 show_progress (bool): Whether to show a progress bar using tqdm. 48 Applied only if batch_size is provided. Default is True. 49 50 Returns: 51 `UpsertResponse`, includes the number of vectors upserted. 52 53 54 The upsert operation writes vectors into a namespace. 55 If a new value is upserted for an existing vector id, it will overwrite the previous value. 56 57 To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel 58 59 ## Upserting dense vectors 60 61 **Note:** the dimension of each dense vector must match the dimension of the index. 62 63 A vector can be represented in a variety of ways. 64 65 ```python 66 from pinecone import Pinecone, Vector 67 68 pc = Pinecone() 69 idx = pc.Index("index-name") 70 71 # A Vector object 72 idx.upsert( 73 namespace = 'my-namespace', 74 vectors = [ 75 Vector(id='id1', values=[0.1, 0.2, 0.3, 0.4], metadata={'metadata_key': 'metadata_value'}), 76 ] 77 ) 78 79 # A vector tuple 80 idx.upsert( 81 namespace = 'my-namespace', 82 vectors = [ 83 ('id1', [0.1, 0.2, 0.3, 0.4]), 84 ] 85 ) 86 87 # A vector tuple with metadata 88 idx.upsert( 89 namespace = 'my-namespace', 90 vectors = [ 91 ('id1', [0.1, 0.2, 0.3, 0.4], {'metadata_key': 'metadata_value'}), 92 ] 93 ) 94 95 # A vector dictionary 96 idx.upsert( 97 namespace = 'my-namespace', 98 vectors = [ 99 {"id": 1, "values": [0.1, 0.2, 0.3, 0.4], "metadata": {"metadata_key": "metadata_value"}}, 100 ] 101 ``` 102 103 ## Upserting sparse vectors 104 105 ```python 106 from pinecone import Pinecone, Vector, SparseValues 107 108 pc = Pinecone() 109 idx = pc.Index("index-name") 110 111 # A Vector object 112 idx.upsert( 113 namespace = 'my-namespace', 114 vectors = [ 115 Vector(id='id1', sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4])), 116 ] 117 ) 118 119 # A dictionary 120 idx.upsert( 121 namespace = 'my-namespace', 122 vectors = [ 123 {"id": 1, "sparse_values": {"indices": [1, 2], "values": [0.2, 0.4]}}, 124 ] 125 ) 126 ``` 127 128 ## Batch upsert 129 130 If you have a large number of vectors, you can upsert them in batches. 131 132 ```python 133 from pinecone import Pinecone, Vector 134 135 pc = Pinecone() 136 idx = pc.Index("index-name") 137 138 idx.upsert( 139 namespace = 'my-namespace', 140 vectors = [ 141 {'id': 'id1', 'values': [0.1, 0.2, 0.3, 0.4]}, 142 {'id': 'id2', 'values': [0.2, 0.3, 0.4, 0.5]}, 143 {'id': 'id3', 'values': [0.3, 0.4, 0.5, 0.6]}, 144 {'id': 'id4', 'values': [0.4, 0.5, 0.6, 0.7]}, 145 {'id': 'id5', 'values': [0.5, 0.6, 0.7, 0.8]}, 146 # More vectors here 147 ], 148 batch_size = 50 149 ) 150 ``` 151 152 ## Visual progress bar with tqdm 153 154 To see a progress bar when upserting in batches, you will need to separately install the `tqdm` package. 155 If `tqdm` is present, the client will detect and use it to display progress when `show_progress=True`. 156 """ 157 pass 158 159 @abstractmethod 160 def upsert_from_dataframe( 161 self, df, namespace: Optional[str] = None, batch_size: int = 500, show_progress: bool = True 162 ): 163 """Upserts a dataframe into the index. 164 165 Args: 166 df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata. 167 namespace: The namespace to upsert into. 168 batch_size: The number of rows to upsert in a single batch. 169 show_progress: Whether to show a progress bar. 170 """ 171 pass 172 173 @abstractmethod 174 def upsert_records(self, namespace: str, records: List[Dict]): 175 """ 176 :param namespace: The namespace of the index to upsert records to. 177 :type namespace: str, required 178 :param records: The records to upsert into the index. 179 :type records: List[Dict], required 180 181 Upsert records to a namespace. A record is a dictionary that contains eitiher an `id` or `_id` 182 field along with other fields that will be stored as metadata. The `id` or `_id` field is used 183 as the unique identifier for the record. At least one field in the record should correspond to 184 a field mapping in the index's embed configuration. 185 186 When records are upserted, Pinecone converts mapped fields into embeddings and upserts them into 187 the specified namespacce of the index. 188 189 ```python 190 from pinecone import ( 191 Pinecone, 192 CloudProvider, 193 AwsRegion, 194 EmbedModel 195 IndexEmbed 196 ) 197 198 pc = Pinecone(api_key="<<PINECONE_API_KEY>>") 199 200 # Create an index for your embedding model 201 index_model = pc.create_index_for_model( 202 name="my-model-index", 203 cloud=CloudProvider.AWS, 204 region=AwsRegion.US_WEST_2, 205 embed=IndexEmbed( 206 model=EmbedModel.Multilingual_E5_Large, 207 field_map={"text": "my_text_field"} 208 ) 209 ) 210 211 # Instantiate the index client 212 idx = pc.Index(host=index_model.host) 213 214 # upsert records 215 idx.upsert_records( 216 namespace="my-namespace", 217 records=[ 218 { 219 "_id": "test1", 220 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 221 }, 222 { 223 "_id": "test2", 224 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 225 }, 226 { 227 "_id": "test3", 228 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 229 }, 230 { 231 "_id": "test4", 232 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 233 }, 234 { 235 "_id": "test5", 236 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 237 }, 238 { 239 "_id": "test6", 240 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 241 }, 242 ], 243 ) 244 245 from pinecone import SearchQuery, SearchRerank, RerankModel 246 247 # search for similar records 248 response = idx.search_records( 249 namespace="my-namespace", 250 query=SearchQuery( 251 inputs={ 252 "text": "Apple corporation", 253 }, 254 top_k=3, 255 ), 256 rerank=SearchRerank( 257 model=RerankModel.Bge_Reranker_V2_M3, 258 rank_fields=["my_text_field"], 259 top_n=3, 260 ), 261 ) 262 ``` 263 """ 264 pass 265 266 @abstractmethod 267 def search( 268 self, 269 namespace: str, 270 query: Union[SearchQueryTypedDict, SearchQuery], 271 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 272 fields: Optional[List[str]] = ["*"], # Default to returning all fields 273 ) -> SearchRecordsResponse: 274 """ 275 :param namespace: The namespace in the index to search. 276 :type namespace: str, required 277 :param query: The SearchQuery to use for the search. 278 :type query: Union[Dict, SearchQuery], required 279 :param rerank: The SearchRerank to use with the search request. 280 :type rerank: Union[Dict, SearchRerank], optional 281 :return: The records that match the search. 282 283 Search for records. 284 285 This operation converts a query to a vector embedding and then searches a namespace. You 286 can optionally provide a reranking operation as part of the search. 287 288 ```python 289 from pinecone import ( 290 Pinecone, 291 CloudProvider, 292 AwsRegion, 293 EmbedModel 294 IndexEmbed 295 ) 296 297 pc = Pinecone(api_key="<<PINECONE_API_KEY>>") 298 299 # Create an index for your embedding model 300 index_model = pc.create_index_for_model( 301 name="my-model-index", 302 cloud=CloudProvider.AWS, 303 region=AwsRegion.US_WEST_2, 304 embed=IndexEmbed( 305 model=EmbedModel.Multilingual_E5_Large, 306 field_map={"text": "my_text_field"} 307 ) 308 ) 309 310 # Instantiate the index client 311 idx = pc.Index(host=index_model.host) 312 313 # upsert records 314 idx.upsert_records( 315 namespace="my-namespace", 316 records=[ 317 { 318 "_id": "test1", 319 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 320 }, 321 { 322 "_id": "test2", 323 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 324 }, 325 { 326 "_id": "test3", 327 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 328 }, 329 { 330 "_id": "test4", 331 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 332 }, 333 { 334 "_id": "test5", 335 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 336 }, 337 { 338 "_id": "test6", 339 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 340 }, 341 ], 342 ) 343 344 from pinecone import SearchQuery, SearchRerank, RerankModel 345 346 # search for similar records 347 response = idx.search_records( 348 namespace="my-namespace", 349 query=SearchQuery( 350 inputs={ 351 "text": "Apple corporation", 352 }, 353 top_k=3, 354 ), 355 rerank=SearchRerank( 356 model=RerankModel.Bge_Reranker_V2_M3, 357 rank_fields=["my_text_field"], 358 top_n=3, 359 ), 360 ) 361 ``` 362 """ 363 pass 364 365 @abstractmethod 366 def search_records( 367 self, 368 namespace: str, 369 query: Union[SearchQueryTypedDict, SearchQuery], 370 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 371 fields: Optional[List[str]] = ["*"], # Default to returning all fields 372 ) -> SearchRecordsResponse: 373 """Alias of the search() method.""" 374 pass 375 376 @abstractmethod 377 def delete( 378 self, 379 ids: Optional[List[str]] = None, 380 delete_all: Optional[bool] = None, 381 namespace: Optional[str] = None, 382 filter: Optional[FilterTypedDict] = None, 383 **kwargs, 384 ) -> Dict[str, Any]: 385 """ 386 Args: 387 ids (List[str]): Vector ids to delete [optional] 388 delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] 389 Default is False. 390 namespace (str): The namespace to delete vectors from [optional] 391 If not specified, the default namespace is used. 392 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 393 If specified, the metadata filter here will be used to select the vectors to delete. 394 This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. 395 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 396 397 398 The Delete operation deletes vectors from the index, from a single namespace. 399 400 No error is raised if the vector id does not exist. 401 402 Note: For any delete call, if namespace is not specified, the default namespace `""` is used. 403 Since the delete operation does not error when ids are not present, this means you may not receive 404 an error if you delete from the wrong namespace. 405 406 Delete can occur in the following mutual exclusive ways: 407 1. Delete by ids from a single namespace 408 2. Delete all vectors from a single namespace by setting delete_all to True 409 3. Delete all vectors from a single namespace by specifying a metadata filter 410 (note that for this option delete all must be set to False) 411 412 API reference: https://docs.pinecone.io/reference/delete_post 413 414 Examples: 415 >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace') 416 >>> index.delete(delete_all=True, namespace='my_namespace') 417 >>> index.delete(filter={'key': 'value'}, namespace='my_namespace') 418 419 420 Returns: An empty dictionary if the delete operation was successful. 421 """ 422 pass 423 424 @abstractmethod 425 def fetch(self, ids: List[str], namespace: Optional[str] = None, **kwargs) -> FetchResponse: 426 """ 427 The fetch operation looks up and returns vectors, by ID, from a single namespace. 428 The returned vectors include the vector data and/or metadata. 429 430 API reference: https://docs.pinecone.io/reference/fetch 431 432 Examples: 433 >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace') 434 >>> index.fetch(ids=['id1', 'id2']) 435 436 Args: 437 ids (List[str]): The vector IDs to fetch. 438 namespace (str): The namespace to fetch vectors from. 439 If not specified, the default namespace is used. [optional] 440 441 Returns: FetchResponse object which contains the list of Vector objects, and namespace name. 442 """ 443 pass 444 445 @abstractmethod 446 def query( 447 self, 448 *args, 449 top_k: int, 450 vector: Optional[List[float]] = None, 451 id: Optional[str] = None, 452 namespace: Optional[str] = None, 453 filter: Optional[FilterTypedDict] = None, 454 include_values: Optional[bool] = None, 455 include_metadata: Optional[bool] = None, 456 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 457 **kwargs, 458 ) -> Union[QueryResponse, ApplyResult]: 459 """ 460 The Query operation searches a namespace, using a query vector. 461 It retrieves the ids of the most similar items in a namespace, along with their similarity scores. 462 463 API reference: https://docs.pinecone.io/reference/query 464 465 Examples: 466 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') 467 >>> index.query(id='id1', top_k=10, namespace='my_namespace') 468 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) 469 >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) 470 >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, 471 >>> top_k=10, namespace='my_namespace') 472 >>> index.query(vector=[1, 2, 3], sparse_vector=SparseValues([1, 2], [0.2, 0.4]), 473 >>> top_k=10, namespace='my_namespace') 474 475 Args: 476 vector (List[float]): The query vector. This should be the same length as the dimension of the index 477 being queried. Each `query()` request can contain only one of the parameters 478 `id` or `vector`.. [optional] 479 id (str): The unique ID of the vector to be used as a query vector. 480 Each `query()` request can contain only one of the parameters 481 `vector` or `id`. [optional] 482 top_k (int): The number of results to return for each query. Must be an integer greater than 1. 483 namespace (str): The namespace to query vectors from. 484 If not specified, the default namespace is used. [optional] 485 filter (Dict[str, Union[str, float, int, bool, List, dict]): 486 The filter to apply. You can use vector metadata to limit your search. 487 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 488 include_values (bool): Indicates whether vector values are included in the response. 489 If omitted the server will use the default value of False [optional] 490 include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. 491 If omitted the server will use the default value of False [optional] 492 sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. 493 Expected to be either a SparseValues object or a dict of the form: 494 {'indices': List[int], 'values': List[float]}, where the lists each have the same length. 495 496 Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, 497 and namespace name. 498 """ 499 pass 500 501 @abstractmethod 502 def query_namespaces( 503 self, 504 vector: List[float], 505 namespaces: List[str], 506 top_k: Optional[int] = None, 507 filter: Optional[FilterTypedDict] = None, 508 include_values: Optional[bool] = None, 509 include_metadata: Optional[bool] = None, 510 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 511 **kwargs, 512 ) -> QueryNamespacesResults: 513 """The query_namespaces() method is used to make a query to multiple namespaces in parallel and combine the results into one result set. 514 515 Since several asynchronous calls are made on your behalf when calling this method, you will need to tune the pool_threads and connection_pool_maxsize parameter of the Index constructor to suite your workload. 516 517 Examples: 518 519 ```python 520 from pinecone import Pinecone 521 522 pc = Pinecone(api_key="your-api-key") 523 index = pc.Index( 524 host="index-name", 525 pool_threads=32, 526 connection_pool_maxsize=32 527 ) 528 529 query_vec = [0.1, 0.2, 0.3] # An embedding that matches the index dimension 530 combined_results = index.query_namespaces( 531 vector=query_vec, 532 namespaces=['ns1', 'ns2', 'ns3', 'ns4'], 533 metric="cosine", 534 top_k=10, 535 filter={'genre': {"$eq": "drama"}}, 536 include_values=True, 537 include_metadata=True 538 ) 539 for vec in combined_results.matches: 540 print(vec.id, vec.score) 541 print(combined_results.usage) 542 ``` 543 544 Args: 545 vector (List[float]): The query vector, must be the same length as the dimension of the index being queried. 546 namespaces (List[str]): The list of namespaces to query. 547 top_k (Optional[int], optional): The number of results you would like to request from each namespace. Defaults to 10. 548 metric (str): Must be one of 'cosine', 'euclidean', 'dotproduct'. This is needed in order to merge results across namespaces, since the interpretation of score depends on the index metric type. 549 filter (Optional[Dict[str, Union[str, float, int, bool, List, dict]]], optional): Pass an optional filter to filter results based on metadata. Defaults to None. 550 include_values (Optional[bool], optional): Boolean field indicating whether vector values should be included with results. Defaults to None. 551 include_metadata (Optional[bool], optional): Boolean field indicating whether vector metadata should be included with results. Defaults to None. 552 sparse_vector (Optional[ Union[SparseValues, Dict[str, Union[List[float], List[int]]]] ], optional): If you are working with a dotproduct index, you can pass a sparse vector as part of your hybrid search. Defaults to None. 553 554 Returns: 555 QueryNamespacesResults: A QueryNamespacesResults object containing the combined results from all namespaces, as well as the combined usage cost in read units. 556 """ 557 pass 558 559 @abstractmethod 560 def update( 561 self, 562 id: str, 563 values: Optional[List[float]] = None, 564 set_metadata: Optional[VectorMetadataTypedDict] = None, 565 namespace: Optional[str] = None, 566 sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 567 **kwargs, 568 ) -> Dict[str, Any]: 569 """ 570 The Update operation updates vector in a namespace. 571 If a value is included, it will overwrite the previous value. 572 If a set_metadata is included, 573 the values of the fields specified in it will be added or overwrite the previous value. 574 575 API reference: https://docs.pinecone.io/reference/update 576 577 Examples: 578 >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') 579 >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace') 580 >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, 581 >>> namespace='my_namespace') 582 >>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]), 583 >>> namespace='my_namespace') 584 585 Args: 586 id (str): Vector's unique id. 587 values (List[float]): vector values to set. [optional] 588 set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): 589 metadata to set for vector. [optional] 590 namespace (str): Namespace name where to update the vector.. [optional] 591 sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. 592 Expected to be either a SparseValues object or a dict of the form: 593 {'indices': List[int], 'values': List[float]} where the lists each have the same length. 594 595 596 Returns: An empty dictionary if the update was successful. 597 """ 598 pass 599 600 @abstractmethod 601 def describe_index_stats( 602 self, filter: Optional[FilterTypedDict] = None, **kwargs 603 ) -> DescribeIndexStatsResponse: 604 """ 605 The DescribeIndexStats operation returns statistics about the index's contents. 606 For example: The vector count per namespace and the number of dimensions. 607 608 API reference: https://docs.pinecone.io/reference/describe_index_stats_post 609 610 Examples: 611 >>> index.describe_index_stats() 612 >>> index.describe_index_stats(filter={'key': 'value'}) 613 614 Args: 615 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 616 If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. 617 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 618 619 Returns: DescribeIndexStatsResponse object which contains stats about the index. 620 """ 621 pass 622 623 @abstractmethod 624 def list_paginated( 625 self, 626 prefix: Optional[str] = None, 627 limit: Optional[int] = None, 628 pagination_token: Optional[str] = None, 629 namespace: Optional[str] = None, 630 **kwargs, 631 ) -> ListResponse: 632 """ 633 The list_paginated operation finds vectors based on an id prefix within a single namespace. 634 It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. 635 This id list can then be passed to fetch or delete operations, depending on your use case. 636 637 Consider using the `list` method to avoid having to handle pagination tokens manually. 638 639 Examples: 640 >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') 641 >>> [v.id for v in results.vectors] 642 ['99', '990', '991', '992', '993'] 643 >>> results.pagination.next 644 eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 645 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next) 646 647 Args: 648 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 649 be used with the effect of listing all ids in a namespace [optional] 650 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 651 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 652 in the response if additional results are available. [optional] 653 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 654 655 Returns: ListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed. 656 """ 657 pass 658 659 @abstractmethod 660 def list(self, **kwargs): 661 """ 662 The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields 663 a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your 664 behalf. 665 666 Examples: 667 >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): 668 >>> print(ids) 669 ['99', '990', '991', '992', '993'] 670 ['994', '995', '996', '997', '998'] 671 ['999'] 672 673 Args: 674 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 675 be used with the effect of listing all ids in a namespace [optional] 676 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 677 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 678 in the response if additional results are available. [optional] 679 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 680 """ 681 pass
30class IndexInterface(ABC): 31 @abstractmethod 32 def upsert( 33 self, 34 vectors: Union[ 35 List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict] 36 ], 37 namespace: Optional[str] = None, 38 batch_size: Optional[int] = None, 39 show_progress: bool = True, 40 **kwargs, 41 ) -> UpsertResponse: 42 """ 43 Args: 44 vectors (Union[List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict]]): A list of vectors to upsert. 45 namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] 46 batch_size (int): The number of vectors to upsert in each batch. 47 If not specified, all vectors will be upserted in a single batch. [optional] 48 show_progress (bool): Whether to show a progress bar using tqdm. 49 Applied only if batch_size is provided. Default is True. 50 51 Returns: 52 `UpsertResponse`, includes the number of vectors upserted. 53 54 55 The upsert operation writes vectors into a namespace. 56 If a new value is upserted for an existing vector id, it will overwrite the previous value. 57 58 To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel 59 60 ## Upserting dense vectors 61 62 **Note:** the dimension of each dense vector must match the dimension of the index. 63 64 A vector can be represented in a variety of ways. 65 66 ```python 67 from pinecone import Pinecone, Vector 68 69 pc = Pinecone() 70 idx = pc.Index("index-name") 71 72 # A Vector object 73 idx.upsert( 74 namespace = 'my-namespace', 75 vectors = [ 76 Vector(id='id1', values=[0.1, 0.2, 0.3, 0.4], metadata={'metadata_key': 'metadata_value'}), 77 ] 78 ) 79 80 # A vector tuple 81 idx.upsert( 82 namespace = 'my-namespace', 83 vectors = [ 84 ('id1', [0.1, 0.2, 0.3, 0.4]), 85 ] 86 ) 87 88 # A vector tuple with metadata 89 idx.upsert( 90 namespace = 'my-namespace', 91 vectors = [ 92 ('id1', [0.1, 0.2, 0.3, 0.4], {'metadata_key': 'metadata_value'}), 93 ] 94 ) 95 96 # A vector dictionary 97 idx.upsert( 98 namespace = 'my-namespace', 99 vectors = [ 100 {"id": 1, "values": [0.1, 0.2, 0.3, 0.4], "metadata": {"metadata_key": "metadata_value"}}, 101 ] 102 ``` 103 104 ## Upserting sparse vectors 105 106 ```python 107 from pinecone import Pinecone, Vector, SparseValues 108 109 pc = Pinecone() 110 idx = pc.Index("index-name") 111 112 # A Vector object 113 idx.upsert( 114 namespace = 'my-namespace', 115 vectors = [ 116 Vector(id='id1', sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4])), 117 ] 118 ) 119 120 # A dictionary 121 idx.upsert( 122 namespace = 'my-namespace', 123 vectors = [ 124 {"id": 1, "sparse_values": {"indices": [1, 2], "values": [0.2, 0.4]}}, 125 ] 126 ) 127 ``` 128 129 ## Batch upsert 130 131 If you have a large number of vectors, you can upsert them in batches. 132 133 ```python 134 from pinecone import Pinecone, Vector 135 136 pc = Pinecone() 137 idx = pc.Index("index-name") 138 139 idx.upsert( 140 namespace = 'my-namespace', 141 vectors = [ 142 {'id': 'id1', 'values': [0.1, 0.2, 0.3, 0.4]}, 143 {'id': 'id2', 'values': [0.2, 0.3, 0.4, 0.5]}, 144 {'id': 'id3', 'values': [0.3, 0.4, 0.5, 0.6]}, 145 {'id': 'id4', 'values': [0.4, 0.5, 0.6, 0.7]}, 146 {'id': 'id5', 'values': [0.5, 0.6, 0.7, 0.8]}, 147 # More vectors here 148 ], 149 batch_size = 50 150 ) 151 ``` 152 153 ## Visual progress bar with tqdm 154 155 To see a progress bar when upserting in batches, you will need to separately install the `tqdm` package. 156 If `tqdm` is present, the client will detect and use it to display progress when `show_progress=True`. 157 """ 158 pass 159 160 @abstractmethod 161 def upsert_from_dataframe( 162 self, df, namespace: Optional[str] = None, batch_size: int = 500, show_progress: bool = True 163 ): 164 """Upserts a dataframe into the index. 165 166 Args: 167 df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata. 168 namespace: The namespace to upsert into. 169 batch_size: The number of rows to upsert in a single batch. 170 show_progress: Whether to show a progress bar. 171 """ 172 pass 173 174 @abstractmethod 175 def upsert_records(self, namespace: str, records: List[Dict]): 176 """ 177 :param namespace: The namespace of the index to upsert records to. 178 :type namespace: str, required 179 :param records: The records to upsert into the index. 180 :type records: List[Dict], required 181 182 Upsert records to a namespace. A record is a dictionary that contains eitiher an `id` or `_id` 183 field along with other fields that will be stored as metadata. The `id` or `_id` field is used 184 as the unique identifier for the record. At least one field in the record should correspond to 185 a field mapping in the index's embed configuration. 186 187 When records are upserted, Pinecone converts mapped fields into embeddings and upserts them into 188 the specified namespacce of the index. 189 190 ```python 191 from pinecone import ( 192 Pinecone, 193 CloudProvider, 194 AwsRegion, 195 EmbedModel 196 IndexEmbed 197 ) 198 199 pc = Pinecone(api_key="<<PINECONE_API_KEY>>") 200 201 # Create an index for your embedding model 202 index_model = pc.create_index_for_model( 203 name="my-model-index", 204 cloud=CloudProvider.AWS, 205 region=AwsRegion.US_WEST_2, 206 embed=IndexEmbed( 207 model=EmbedModel.Multilingual_E5_Large, 208 field_map={"text": "my_text_field"} 209 ) 210 ) 211 212 # Instantiate the index client 213 idx = pc.Index(host=index_model.host) 214 215 # upsert records 216 idx.upsert_records( 217 namespace="my-namespace", 218 records=[ 219 { 220 "_id": "test1", 221 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 222 }, 223 { 224 "_id": "test2", 225 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 226 }, 227 { 228 "_id": "test3", 229 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 230 }, 231 { 232 "_id": "test4", 233 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 234 }, 235 { 236 "_id": "test5", 237 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 238 }, 239 { 240 "_id": "test6", 241 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 242 }, 243 ], 244 ) 245 246 from pinecone import SearchQuery, SearchRerank, RerankModel 247 248 # search for similar records 249 response = idx.search_records( 250 namespace="my-namespace", 251 query=SearchQuery( 252 inputs={ 253 "text": "Apple corporation", 254 }, 255 top_k=3, 256 ), 257 rerank=SearchRerank( 258 model=RerankModel.Bge_Reranker_V2_M3, 259 rank_fields=["my_text_field"], 260 top_n=3, 261 ), 262 ) 263 ``` 264 """ 265 pass 266 267 @abstractmethod 268 def search( 269 self, 270 namespace: str, 271 query: Union[SearchQueryTypedDict, SearchQuery], 272 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 273 fields: Optional[List[str]] = ["*"], # Default to returning all fields 274 ) -> SearchRecordsResponse: 275 """ 276 :param namespace: The namespace in the index to search. 277 :type namespace: str, required 278 :param query: The SearchQuery to use for the search. 279 :type query: Union[Dict, SearchQuery], required 280 :param rerank: The SearchRerank to use with the search request. 281 :type rerank: Union[Dict, SearchRerank], optional 282 :return: The records that match the search. 283 284 Search for records. 285 286 This operation converts a query to a vector embedding and then searches a namespace. You 287 can optionally provide a reranking operation as part of the search. 288 289 ```python 290 from pinecone import ( 291 Pinecone, 292 CloudProvider, 293 AwsRegion, 294 EmbedModel 295 IndexEmbed 296 ) 297 298 pc = Pinecone(api_key="<<PINECONE_API_KEY>>") 299 300 # Create an index for your embedding model 301 index_model = pc.create_index_for_model( 302 name="my-model-index", 303 cloud=CloudProvider.AWS, 304 region=AwsRegion.US_WEST_2, 305 embed=IndexEmbed( 306 model=EmbedModel.Multilingual_E5_Large, 307 field_map={"text": "my_text_field"} 308 ) 309 ) 310 311 # Instantiate the index client 312 idx = pc.Index(host=index_model.host) 313 314 # upsert records 315 idx.upsert_records( 316 namespace="my-namespace", 317 records=[ 318 { 319 "_id": "test1", 320 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 321 }, 322 { 323 "_id": "test2", 324 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 325 }, 326 { 327 "_id": "test3", 328 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 329 }, 330 { 331 "_id": "test4", 332 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 333 }, 334 { 335 "_id": "test5", 336 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 337 }, 338 { 339 "_id": "test6", 340 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 341 }, 342 ], 343 ) 344 345 from pinecone import SearchQuery, SearchRerank, RerankModel 346 347 # search for similar records 348 response = idx.search_records( 349 namespace="my-namespace", 350 query=SearchQuery( 351 inputs={ 352 "text": "Apple corporation", 353 }, 354 top_k=3, 355 ), 356 rerank=SearchRerank( 357 model=RerankModel.Bge_Reranker_V2_M3, 358 rank_fields=["my_text_field"], 359 top_n=3, 360 ), 361 ) 362 ``` 363 """ 364 pass 365 366 @abstractmethod 367 def search_records( 368 self, 369 namespace: str, 370 query: Union[SearchQueryTypedDict, SearchQuery], 371 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 372 fields: Optional[List[str]] = ["*"], # Default to returning all fields 373 ) -> SearchRecordsResponse: 374 """Alias of the search() method.""" 375 pass 376 377 @abstractmethod 378 def delete( 379 self, 380 ids: Optional[List[str]] = None, 381 delete_all: Optional[bool] = None, 382 namespace: Optional[str] = None, 383 filter: Optional[FilterTypedDict] = None, 384 **kwargs, 385 ) -> Dict[str, Any]: 386 """ 387 Args: 388 ids (List[str]): Vector ids to delete [optional] 389 delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] 390 Default is False. 391 namespace (str): The namespace to delete vectors from [optional] 392 If not specified, the default namespace is used. 393 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 394 If specified, the metadata filter here will be used to select the vectors to delete. 395 This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. 396 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 397 398 399 The Delete operation deletes vectors from the index, from a single namespace. 400 401 No error is raised if the vector id does not exist. 402 403 Note: For any delete call, if namespace is not specified, the default namespace `""` is used. 404 Since the delete operation does not error when ids are not present, this means you may not receive 405 an error if you delete from the wrong namespace. 406 407 Delete can occur in the following mutual exclusive ways: 408 1. Delete by ids from a single namespace 409 2. Delete all vectors from a single namespace by setting delete_all to True 410 3. Delete all vectors from a single namespace by specifying a metadata filter 411 (note that for this option delete all must be set to False) 412 413 API reference: https://docs.pinecone.io/reference/delete_post 414 415 Examples: 416 >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace') 417 >>> index.delete(delete_all=True, namespace='my_namespace') 418 >>> index.delete(filter={'key': 'value'}, namespace='my_namespace') 419 420 421 Returns: An empty dictionary if the delete operation was successful. 422 """ 423 pass 424 425 @abstractmethod 426 def fetch(self, ids: List[str], namespace: Optional[str] = None, **kwargs) -> FetchResponse: 427 """ 428 The fetch operation looks up and returns vectors, by ID, from a single namespace. 429 The returned vectors include the vector data and/or metadata. 430 431 API reference: https://docs.pinecone.io/reference/fetch 432 433 Examples: 434 >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace') 435 >>> index.fetch(ids=['id1', 'id2']) 436 437 Args: 438 ids (List[str]): The vector IDs to fetch. 439 namespace (str): The namespace to fetch vectors from. 440 If not specified, the default namespace is used. [optional] 441 442 Returns: FetchResponse object which contains the list of Vector objects, and namespace name. 443 """ 444 pass 445 446 @abstractmethod 447 def query( 448 self, 449 *args, 450 top_k: int, 451 vector: Optional[List[float]] = None, 452 id: Optional[str] = None, 453 namespace: Optional[str] = None, 454 filter: Optional[FilterTypedDict] = None, 455 include_values: Optional[bool] = None, 456 include_metadata: Optional[bool] = None, 457 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 458 **kwargs, 459 ) -> Union[QueryResponse, ApplyResult]: 460 """ 461 The Query operation searches a namespace, using a query vector. 462 It retrieves the ids of the most similar items in a namespace, along with their similarity scores. 463 464 API reference: https://docs.pinecone.io/reference/query 465 466 Examples: 467 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') 468 >>> index.query(id='id1', top_k=10, namespace='my_namespace') 469 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) 470 >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) 471 >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, 472 >>> top_k=10, namespace='my_namespace') 473 >>> index.query(vector=[1, 2, 3], sparse_vector=SparseValues([1, 2], [0.2, 0.4]), 474 >>> top_k=10, namespace='my_namespace') 475 476 Args: 477 vector (List[float]): The query vector. This should be the same length as the dimension of the index 478 being queried. Each `query()` request can contain only one of the parameters 479 `id` or `vector`.. [optional] 480 id (str): The unique ID of the vector to be used as a query vector. 481 Each `query()` request can contain only one of the parameters 482 `vector` or `id`. [optional] 483 top_k (int): The number of results to return for each query. Must be an integer greater than 1. 484 namespace (str): The namespace to query vectors from. 485 If not specified, the default namespace is used. [optional] 486 filter (Dict[str, Union[str, float, int, bool, List, dict]): 487 The filter to apply. You can use vector metadata to limit your search. 488 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 489 include_values (bool): Indicates whether vector values are included in the response. 490 If omitted the server will use the default value of False [optional] 491 include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. 492 If omitted the server will use the default value of False [optional] 493 sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. 494 Expected to be either a SparseValues object or a dict of the form: 495 {'indices': List[int], 'values': List[float]}, where the lists each have the same length. 496 497 Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, 498 and namespace name. 499 """ 500 pass 501 502 @abstractmethod 503 def query_namespaces( 504 self, 505 vector: List[float], 506 namespaces: List[str], 507 top_k: Optional[int] = None, 508 filter: Optional[FilterTypedDict] = None, 509 include_values: Optional[bool] = None, 510 include_metadata: Optional[bool] = None, 511 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 512 **kwargs, 513 ) -> QueryNamespacesResults: 514 """The query_namespaces() method is used to make a query to multiple namespaces in parallel and combine the results into one result set. 515 516 Since several asynchronous calls are made on your behalf when calling this method, you will need to tune the pool_threads and connection_pool_maxsize parameter of the Index constructor to suite your workload. 517 518 Examples: 519 520 ```python 521 from pinecone import Pinecone 522 523 pc = Pinecone(api_key="your-api-key") 524 index = pc.Index( 525 host="index-name", 526 pool_threads=32, 527 connection_pool_maxsize=32 528 ) 529 530 query_vec = [0.1, 0.2, 0.3] # An embedding that matches the index dimension 531 combined_results = index.query_namespaces( 532 vector=query_vec, 533 namespaces=['ns1', 'ns2', 'ns3', 'ns4'], 534 metric="cosine", 535 top_k=10, 536 filter={'genre': {"$eq": "drama"}}, 537 include_values=True, 538 include_metadata=True 539 ) 540 for vec in combined_results.matches: 541 print(vec.id, vec.score) 542 print(combined_results.usage) 543 ``` 544 545 Args: 546 vector (List[float]): The query vector, must be the same length as the dimension of the index being queried. 547 namespaces (List[str]): The list of namespaces to query. 548 top_k (Optional[int], optional): The number of results you would like to request from each namespace. Defaults to 10. 549 metric (str): Must be one of 'cosine', 'euclidean', 'dotproduct'. This is needed in order to merge results across namespaces, since the interpretation of score depends on the index metric type. 550 filter (Optional[Dict[str, Union[str, float, int, bool, List, dict]]], optional): Pass an optional filter to filter results based on metadata. Defaults to None. 551 include_values (Optional[bool], optional): Boolean field indicating whether vector values should be included with results. Defaults to None. 552 include_metadata (Optional[bool], optional): Boolean field indicating whether vector metadata should be included with results. Defaults to None. 553 sparse_vector (Optional[ Union[SparseValues, Dict[str, Union[List[float], List[int]]]] ], optional): If you are working with a dotproduct index, you can pass a sparse vector as part of your hybrid search. Defaults to None. 554 555 Returns: 556 QueryNamespacesResults: A QueryNamespacesResults object containing the combined results from all namespaces, as well as the combined usage cost in read units. 557 """ 558 pass 559 560 @abstractmethod 561 def update( 562 self, 563 id: str, 564 values: Optional[List[float]] = None, 565 set_metadata: Optional[VectorMetadataTypedDict] = None, 566 namespace: Optional[str] = None, 567 sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 568 **kwargs, 569 ) -> Dict[str, Any]: 570 """ 571 The Update operation updates vector in a namespace. 572 If a value is included, it will overwrite the previous value. 573 If a set_metadata is included, 574 the values of the fields specified in it will be added or overwrite the previous value. 575 576 API reference: https://docs.pinecone.io/reference/update 577 578 Examples: 579 >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') 580 >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace') 581 >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, 582 >>> namespace='my_namespace') 583 >>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]), 584 >>> namespace='my_namespace') 585 586 Args: 587 id (str): Vector's unique id. 588 values (List[float]): vector values to set. [optional] 589 set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): 590 metadata to set for vector. [optional] 591 namespace (str): Namespace name where to update the vector.. [optional] 592 sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. 593 Expected to be either a SparseValues object or a dict of the form: 594 {'indices': List[int], 'values': List[float]} where the lists each have the same length. 595 596 597 Returns: An empty dictionary if the update was successful. 598 """ 599 pass 600 601 @abstractmethod 602 def describe_index_stats( 603 self, filter: Optional[FilterTypedDict] = None, **kwargs 604 ) -> DescribeIndexStatsResponse: 605 """ 606 The DescribeIndexStats operation returns statistics about the index's contents. 607 For example: The vector count per namespace and the number of dimensions. 608 609 API reference: https://docs.pinecone.io/reference/describe_index_stats_post 610 611 Examples: 612 >>> index.describe_index_stats() 613 >>> index.describe_index_stats(filter={'key': 'value'}) 614 615 Args: 616 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 617 If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. 618 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 619 620 Returns: DescribeIndexStatsResponse object which contains stats about the index. 621 """ 622 pass 623 624 @abstractmethod 625 def list_paginated( 626 self, 627 prefix: Optional[str] = None, 628 limit: Optional[int] = None, 629 pagination_token: Optional[str] = None, 630 namespace: Optional[str] = None, 631 **kwargs, 632 ) -> ListResponse: 633 """ 634 The list_paginated operation finds vectors based on an id prefix within a single namespace. 635 It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. 636 This id list can then be passed to fetch or delete operations, depending on your use case. 637 638 Consider using the `list` method to avoid having to handle pagination tokens manually. 639 640 Examples: 641 >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') 642 >>> [v.id for v in results.vectors] 643 ['99', '990', '991', '992', '993'] 644 >>> results.pagination.next 645 eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 646 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next) 647 648 Args: 649 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 650 be used with the effect of listing all ids in a namespace [optional] 651 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 652 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 653 in the response if additional results are available. [optional] 654 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 655 656 Returns: ListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed. 657 """ 658 pass 659 660 @abstractmethod 661 def list(self, **kwargs): 662 """ 663 The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields 664 a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your 665 behalf. 666 667 Examples: 668 >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): 669 >>> print(ids) 670 ['99', '990', '991', '992', '993'] 671 ['994', '995', '996', '997', '998'] 672 ['999'] 673 674 Args: 675 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 676 be used with the effect of listing all ids in a namespace [optional] 677 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 678 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 679 in the response if additional results are available. [optional] 680 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 681 """ 682 pass
Helper class that provides a standard way to create an ABC using inheritance.
31 @abstractmethod 32 def upsert( 33 self, 34 vectors: Union[ 35 List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict] 36 ], 37 namespace: Optional[str] = None, 38 batch_size: Optional[int] = None, 39 show_progress: bool = True, 40 **kwargs, 41 ) -> UpsertResponse: 42 """ 43 Args: 44 vectors (Union[List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict]]): A list of vectors to upsert. 45 namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] 46 batch_size (int): The number of vectors to upsert in each batch. 47 If not specified, all vectors will be upserted in a single batch. [optional] 48 show_progress (bool): Whether to show a progress bar using tqdm. 49 Applied only if batch_size is provided. Default is True. 50 51 Returns: 52 `UpsertResponse`, includes the number of vectors upserted. 53 54 55 The upsert operation writes vectors into a namespace. 56 If a new value is upserted for an existing vector id, it will overwrite the previous value. 57 58 To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel 59 60 ## Upserting dense vectors 61 62 **Note:** the dimension of each dense vector must match the dimension of the index. 63 64 A vector can be represented in a variety of ways. 65 66 ```python 67 from pinecone import Pinecone, Vector 68 69 pc = Pinecone() 70 idx = pc.Index("index-name") 71 72 # A Vector object 73 idx.upsert( 74 namespace = 'my-namespace', 75 vectors = [ 76 Vector(id='id1', values=[0.1, 0.2, 0.3, 0.4], metadata={'metadata_key': 'metadata_value'}), 77 ] 78 ) 79 80 # A vector tuple 81 idx.upsert( 82 namespace = 'my-namespace', 83 vectors = [ 84 ('id1', [0.1, 0.2, 0.3, 0.4]), 85 ] 86 ) 87 88 # A vector tuple with metadata 89 idx.upsert( 90 namespace = 'my-namespace', 91 vectors = [ 92 ('id1', [0.1, 0.2, 0.3, 0.4], {'metadata_key': 'metadata_value'}), 93 ] 94 ) 95 96 # A vector dictionary 97 idx.upsert( 98 namespace = 'my-namespace', 99 vectors = [ 100 {"id": 1, "values": [0.1, 0.2, 0.3, 0.4], "metadata": {"metadata_key": "metadata_value"}}, 101 ] 102 ``` 103 104 ## Upserting sparse vectors 105 106 ```python 107 from pinecone import Pinecone, Vector, SparseValues 108 109 pc = Pinecone() 110 idx = pc.Index("index-name") 111 112 # A Vector object 113 idx.upsert( 114 namespace = 'my-namespace', 115 vectors = [ 116 Vector(id='id1', sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4])), 117 ] 118 ) 119 120 # A dictionary 121 idx.upsert( 122 namespace = 'my-namespace', 123 vectors = [ 124 {"id": 1, "sparse_values": {"indices": [1, 2], "values": [0.2, 0.4]}}, 125 ] 126 ) 127 ``` 128 129 ## Batch upsert 130 131 If you have a large number of vectors, you can upsert them in batches. 132 133 ```python 134 from pinecone import Pinecone, Vector 135 136 pc = Pinecone() 137 idx = pc.Index("index-name") 138 139 idx.upsert( 140 namespace = 'my-namespace', 141 vectors = [ 142 {'id': 'id1', 'values': [0.1, 0.2, 0.3, 0.4]}, 143 {'id': 'id2', 'values': [0.2, 0.3, 0.4, 0.5]}, 144 {'id': 'id3', 'values': [0.3, 0.4, 0.5, 0.6]}, 145 {'id': 'id4', 'values': [0.4, 0.5, 0.6, 0.7]}, 146 {'id': 'id5', 'values': [0.5, 0.6, 0.7, 0.8]}, 147 # More vectors here 148 ], 149 batch_size = 50 150 ) 151 ``` 152 153 ## Visual progress bar with tqdm 154 155 To see a progress bar when upserting in batches, you will need to separately install the `tqdm` package. 156 If `tqdm` is present, the client will detect and use it to display progress when `show_progress=True`. 157 """ 158 pass
Arguments:
- vectors (Union[List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict]]): A list of vectors to upsert.
- namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional]
- batch_size (int): The number of vectors to upsert in each batch. If not specified, all vectors will be upserted in a single batch. [optional]
- show_progress (bool): Whether to show a progress bar using tqdm. Applied only if batch_size is provided. Default is True.
Returns:
UpsertResponse
, includes the number of vectors upserted.
The upsert operation writes vectors into a namespace. If a new value is upserted for an existing vector id, it will overwrite the previous value.
To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel
Upserting dense vectors
Note: the dimension of each dense vector must match the dimension of the index.
A vector can be represented in a variety of ways.
from pinecone import Pinecone, Vector
pc = Pinecone()
idx = pc.Index("index-name")
# A Vector object
idx.upsert(
namespace = 'my-namespace',
vectors = [
Vector(id='id1', values=[0.1, 0.2, 0.3, 0.4], metadata={'metadata_key': 'metadata_value'}),
]
)
# A vector tuple
idx.upsert(
namespace = 'my-namespace',
vectors = [
('id1', [0.1, 0.2, 0.3, 0.4]),
]
)
# A vector tuple with metadata
idx.upsert(
namespace = 'my-namespace',
vectors = [
('id1', [0.1, 0.2, 0.3, 0.4], {'metadata_key': 'metadata_value'}),
]
)
# A vector dictionary
idx.upsert(
namespace = 'my-namespace',
vectors = [
{"id": 1, "values": [0.1, 0.2, 0.3, 0.4], "metadata": {"metadata_key": "metadata_value"}},
]
Upserting sparse vectors
from pinecone import Pinecone, Vector, SparseValues
pc = Pinecone()
idx = pc.Index("index-name")
# A Vector object
idx.upsert(
namespace = 'my-namespace',
vectors = [
Vector(id='id1', sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4])),
]
)
# A dictionary
idx.upsert(
namespace = 'my-namespace',
vectors = [
{"id": 1, "sparse_values": {"indices": [1, 2], "values": [0.2, 0.4]}},
]
)
Batch upsert
If you have a large number of vectors, you can upsert them in batches.
from pinecone import Pinecone, Vector
pc = Pinecone()
idx = pc.Index("index-name")
idx.upsert(
namespace = 'my-namespace',
vectors = [
{'id': 'id1', 'values': [0.1, 0.2, 0.3, 0.4]},
{'id': 'id2', 'values': [0.2, 0.3, 0.4, 0.5]},
{'id': 'id3', 'values': [0.3, 0.4, 0.5, 0.6]},
{'id': 'id4', 'values': [0.4, 0.5, 0.6, 0.7]},
{'id': 'id5', 'values': [0.5, 0.6, 0.7, 0.8]},
# More vectors here
],
batch_size = 50
)
Visual progress bar with tqdm
To see a progress bar when upserting in batches, you will need to separately install the tqdm
package.
If tqdm
is present, the client will detect and use it to display progress when show_progress=True
.
160 @abstractmethod 161 def upsert_from_dataframe( 162 self, df, namespace: Optional[str] = None, batch_size: int = 500, show_progress: bool = True 163 ): 164 """Upserts a dataframe into the index. 165 166 Args: 167 df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata. 168 namespace: The namespace to upsert into. 169 batch_size: The number of rows to upsert in a single batch. 170 show_progress: Whether to show a progress bar. 171 """ 172 pass
Upserts a dataframe into the index.
Arguments:
- df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata.
- namespace: The namespace to upsert into.
- batch_size: The number of rows to upsert in a single batch.
- show_progress: Whether to show a progress bar.
174 @abstractmethod 175 def upsert_records(self, namespace: str, records: List[Dict]): 176 """ 177 :param namespace: The namespace of the index to upsert records to. 178 :type namespace: str, required 179 :param records: The records to upsert into the index. 180 :type records: List[Dict], required 181 182 Upsert records to a namespace. A record is a dictionary that contains eitiher an `id` or `_id` 183 field along with other fields that will be stored as metadata. The `id` or `_id` field is used 184 as the unique identifier for the record. At least one field in the record should correspond to 185 a field mapping in the index's embed configuration. 186 187 When records are upserted, Pinecone converts mapped fields into embeddings and upserts them into 188 the specified namespacce of the index. 189 190 ```python 191 from pinecone import ( 192 Pinecone, 193 CloudProvider, 194 AwsRegion, 195 EmbedModel 196 IndexEmbed 197 ) 198 199 pc = Pinecone(api_key="<<PINECONE_API_KEY>>") 200 201 # Create an index for your embedding model 202 index_model = pc.create_index_for_model( 203 name="my-model-index", 204 cloud=CloudProvider.AWS, 205 region=AwsRegion.US_WEST_2, 206 embed=IndexEmbed( 207 model=EmbedModel.Multilingual_E5_Large, 208 field_map={"text": "my_text_field"} 209 ) 210 ) 211 212 # Instantiate the index client 213 idx = pc.Index(host=index_model.host) 214 215 # upsert records 216 idx.upsert_records( 217 namespace="my-namespace", 218 records=[ 219 { 220 "_id": "test1", 221 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 222 }, 223 { 224 "_id": "test2", 225 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 226 }, 227 { 228 "_id": "test3", 229 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 230 }, 231 { 232 "_id": "test4", 233 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 234 }, 235 { 236 "_id": "test5", 237 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 238 }, 239 { 240 "_id": "test6", 241 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 242 }, 243 ], 244 ) 245 246 from pinecone import SearchQuery, SearchRerank, RerankModel 247 248 # search for similar records 249 response = idx.search_records( 250 namespace="my-namespace", 251 query=SearchQuery( 252 inputs={ 253 "text": "Apple corporation", 254 }, 255 top_k=3, 256 ), 257 rerank=SearchRerank( 258 model=RerankModel.Bge_Reranker_V2_M3, 259 rank_fields=["my_text_field"], 260 top_n=3, 261 ), 262 ) 263 ``` 264 """ 265 pass
Parameters
- namespace: The namespace of the index to upsert records to.
- records: The records to upsert into the index.
Upsert records to a namespace. A record is a dictionary that contains eitiher an id
or _id
field along with other fields that will be stored as metadata. The id
or _id
field is used
as the unique identifier for the record. At least one field in the record should correspond to
a field mapping in the index's embed configuration.
When records are upserted, Pinecone converts mapped fields into embeddings and upserts them into the specified namespacce of the index.
from pinecone import (
Pinecone,
CloudProvider,
AwsRegion,
EmbedModel
IndexEmbed
)
pc = Pinecone(api_key="<<PINECONE_API_KEY>>")
# Create an index for your embedding model
index_model = pc.create_index_for_model(
name="my-model-index",
cloud=CloudProvider.AWS,
region=AwsRegion.US_WEST_2,
embed=IndexEmbed(
model=EmbedModel.Multilingual_E5_Large,
field_map={"text": "my_text_field"}
)
)
# Instantiate the index client
idx = pc.Index(host=index_model.host)
# upsert records
idx.upsert_records(
namespace="my-namespace",
records=[
{
"_id": "test1",
"my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.",
},
{
"_id": "test2",
"my_text_field": "The tech company Apple is known for its innovative products like the iPhone.",
},
{
"_id": "test3",
"my_text_field": "Many people enjoy eating apples as a healthy snack.",
},
{
"_id": "test4",
"my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
},
{
"_id": "test5",
"my_text_field": "An apple a day keeps the doctor away, as the saying goes.",
},
{
"_id": "test6",
"my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.",
},
],
)
from pinecone import SearchQuery, SearchRerank, RerankModel
# search for similar records
response = idx.search_records(
namespace="my-namespace",
query=SearchQuery(
inputs={
"text": "Apple corporation",
},
top_k=3,
),
rerank=SearchRerank(
model=RerankModel.Bge_Reranker_V2_M3,
rank_fields=["my_text_field"],
top_n=3,
),
)
267 @abstractmethod 268 def search( 269 self, 270 namespace: str, 271 query: Union[SearchQueryTypedDict, SearchQuery], 272 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 273 fields: Optional[List[str]] = ["*"], # Default to returning all fields 274 ) -> SearchRecordsResponse: 275 """ 276 :param namespace: The namespace in the index to search. 277 :type namespace: str, required 278 :param query: The SearchQuery to use for the search. 279 :type query: Union[Dict, SearchQuery], required 280 :param rerank: The SearchRerank to use with the search request. 281 :type rerank: Union[Dict, SearchRerank], optional 282 :return: The records that match the search. 283 284 Search for records. 285 286 This operation converts a query to a vector embedding and then searches a namespace. You 287 can optionally provide a reranking operation as part of the search. 288 289 ```python 290 from pinecone import ( 291 Pinecone, 292 CloudProvider, 293 AwsRegion, 294 EmbedModel 295 IndexEmbed 296 ) 297 298 pc = Pinecone(api_key="<<PINECONE_API_KEY>>") 299 300 # Create an index for your embedding model 301 index_model = pc.create_index_for_model( 302 name="my-model-index", 303 cloud=CloudProvider.AWS, 304 region=AwsRegion.US_WEST_2, 305 embed=IndexEmbed( 306 model=EmbedModel.Multilingual_E5_Large, 307 field_map={"text": "my_text_field"} 308 ) 309 ) 310 311 # Instantiate the index client 312 idx = pc.Index(host=index_model.host) 313 314 # upsert records 315 idx.upsert_records( 316 namespace="my-namespace", 317 records=[ 318 { 319 "_id": "test1", 320 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 321 }, 322 { 323 "_id": "test2", 324 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 325 }, 326 { 327 "_id": "test3", 328 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 329 }, 330 { 331 "_id": "test4", 332 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 333 }, 334 { 335 "_id": "test5", 336 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 337 }, 338 { 339 "_id": "test6", 340 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 341 }, 342 ], 343 ) 344 345 from pinecone import SearchQuery, SearchRerank, RerankModel 346 347 # search for similar records 348 response = idx.search_records( 349 namespace="my-namespace", 350 query=SearchQuery( 351 inputs={ 352 "text": "Apple corporation", 353 }, 354 top_k=3, 355 ), 356 rerank=SearchRerank( 357 model=RerankModel.Bge_Reranker_V2_M3, 358 rank_fields=["my_text_field"], 359 top_n=3, 360 ), 361 ) 362 ``` 363 """ 364 pass
Parameters
- namespace: The namespace in the index to search.
- query: The SearchQuery to use for the search.
- rerank: The SearchRerank to use with the search request.
Returns
The records that match the search.
Search for records.
This operation converts a query to a vector embedding and then searches a namespace. You can optionally provide a reranking operation as part of the search.
from pinecone import (
Pinecone,
CloudProvider,
AwsRegion,
EmbedModel
IndexEmbed
)
pc = Pinecone(api_key="<<PINECONE_API_KEY>>")
# Create an index for your embedding model
index_model = pc.create_index_for_model(
name="my-model-index",
cloud=CloudProvider.AWS,
region=AwsRegion.US_WEST_2,
embed=IndexEmbed(
model=EmbedModel.Multilingual_E5_Large,
field_map={"text": "my_text_field"}
)
)
# Instantiate the index client
idx = pc.Index(host=index_model.host)
# upsert records
idx.upsert_records(
namespace="my-namespace",
records=[
{
"_id": "test1",
"my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.",
},
{
"_id": "test2",
"my_text_field": "The tech company Apple is known for its innovative products like the iPhone.",
},
{
"_id": "test3",
"my_text_field": "Many people enjoy eating apples as a healthy snack.",
},
{
"_id": "test4",
"my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
},
{
"_id": "test5",
"my_text_field": "An apple a day keeps the doctor away, as the saying goes.",
},
{
"_id": "test6",
"my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.",
},
],
)
from pinecone import SearchQuery, SearchRerank, RerankModel
# search for similar records
response = idx.search_records(
namespace="my-namespace",
query=SearchQuery(
inputs={
"text": "Apple corporation",
},
top_k=3,
),
rerank=SearchRerank(
model=RerankModel.Bge_Reranker_V2_M3,
rank_fields=["my_text_field"],
top_n=3,
),
)
366 @abstractmethod 367 def search_records( 368 self, 369 namespace: str, 370 query: Union[SearchQueryTypedDict, SearchQuery], 371 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 372 fields: Optional[List[str]] = ["*"], # Default to returning all fields 373 ) -> SearchRecordsResponse: 374 """Alias of the search() method.""" 375 pass
Alias of the search() method.
377 @abstractmethod 378 def delete( 379 self, 380 ids: Optional[List[str]] = None, 381 delete_all: Optional[bool] = None, 382 namespace: Optional[str] = None, 383 filter: Optional[FilterTypedDict] = None, 384 **kwargs, 385 ) -> Dict[str, Any]: 386 """ 387 Args: 388 ids (List[str]): Vector ids to delete [optional] 389 delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] 390 Default is False. 391 namespace (str): The namespace to delete vectors from [optional] 392 If not specified, the default namespace is used. 393 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 394 If specified, the metadata filter here will be used to select the vectors to delete. 395 This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. 396 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 397 398 399 The Delete operation deletes vectors from the index, from a single namespace. 400 401 No error is raised if the vector id does not exist. 402 403 Note: For any delete call, if namespace is not specified, the default namespace `""` is used. 404 Since the delete operation does not error when ids are not present, this means you may not receive 405 an error if you delete from the wrong namespace. 406 407 Delete can occur in the following mutual exclusive ways: 408 1. Delete by ids from a single namespace 409 2. Delete all vectors from a single namespace by setting delete_all to True 410 3. Delete all vectors from a single namespace by specifying a metadata filter 411 (note that for this option delete all must be set to False) 412 413 API reference: https://docs.pinecone.io/reference/delete_post 414 415 Examples: 416 >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace') 417 >>> index.delete(delete_all=True, namespace='my_namespace') 418 >>> index.delete(filter={'key': 'value'}, namespace='my_namespace') 419 420 421 Returns: An empty dictionary if the delete operation was successful. 422 """ 423 pass
Arguments:
- ids (List[str]): Vector ids to delete [optional]
- delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] Default is False.
- namespace (str): The namespace to delete vectors from [optional] If not specified, the default namespace is used.
- filter (Dict[str, Union[str, float, int, bool, List, dict]]): If specified, the metadata filter here will be used to select the vectors to delete. This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
The Delete operation deletes vectors from the index, from a single namespace.
No error is raised if the vector id does not exist.
Note: For any delete call, if namespace is not specified, the default namespace ""
is used.
Since the delete operation does not error when ids are not present, this means you may not receive
an error if you delete from the wrong namespace.
Delete can occur in the following mutual exclusive ways:
- Delete by ids from a single namespace
- Delete all vectors from a single namespace by setting delete_all to True
- Delete all vectors from a single namespace by specifying a metadata filter (note that for this option delete all must be set to False)
API reference: https://docs.pinecone.io/reference/delete_post
Examples:
>>> index.delete(ids=['id1', 'id2'], namespace='my_namespace') >>> index.delete(delete_all=True, namespace='my_namespace') >>> index.delete(filter={'key': 'value'}, namespace='my_namespace')
Returns: An empty dictionary if the delete operation was successful.
425 @abstractmethod 426 def fetch(self, ids: List[str], namespace: Optional[str] = None, **kwargs) -> FetchResponse: 427 """ 428 The fetch operation looks up and returns vectors, by ID, from a single namespace. 429 The returned vectors include the vector data and/or metadata. 430 431 API reference: https://docs.pinecone.io/reference/fetch 432 433 Examples: 434 >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace') 435 >>> index.fetch(ids=['id1', 'id2']) 436 437 Args: 438 ids (List[str]): The vector IDs to fetch. 439 namespace (str): The namespace to fetch vectors from. 440 If not specified, the default namespace is used. [optional] 441 442 Returns: FetchResponse object which contains the list of Vector objects, and namespace name. 443 """ 444 pass
The fetch operation looks up and returns vectors, by ID, from a single namespace. The returned vectors include the vector data and/or metadata.
API reference: https://docs.pinecone.io/reference/fetch
Examples:
>>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace') >>> index.fetch(ids=['id1', 'id2'])
Arguments:
- ids (List[str]): The vector IDs to fetch.
- namespace (str): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
Returns: FetchResponse object which contains the list of Vector objects, and namespace name.
446 @abstractmethod 447 def query( 448 self, 449 *args, 450 top_k: int, 451 vector: Optional[List[float]] = None, 452 id: Optional[str] = None, 453 namespace: Optional[str] = None, 454 filter: Optional[FilterTypedDict] = None, 455 include_values: Optional[bool] = None, 456 include_metadata: Optional[bool] = None, 457 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 458 **kwargs, 459 ) -> Union[QueryResponse, ApplyResult]: 460 """ 461 The Query operation searches a namespace, using a query vector. 462 It retrieves the ids of the most similar items in a namespace, along with their similarity scores. 463 464 API reference: https://docs.pinecone.io/reference/query 465 466 Examples: 467 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') 468 >>> index.query(id='id1', top_k=10, namespace='my_namespace') 469 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) 470 >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) 471 >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, 472 >>> top_k=10, namespace='my_namespace') 473 >>> index.query(vector=[1, 2, 3], sparse_vector=SparseValues([1, 2], [0.2, 0.4]), 474 >>> top_k=10, namespace='my_namespace') 475 476 Args: 477 vector (List[float]): The query vector. This should be the same length as the dimension of the index 478 being queried. Each `query()` request can contain only one of the parameters 479 `id` or `vector`.. [optional] 480 id (str): The unique ID of the vector to be used as a query vector. 481 Each `query()` request can contain only one of the parameters 482 `vector` or `id`. [optional] 483 top_k (int): The number of results to return for each query. Must be an integer greater than 1. 484 namespace (str): The namespace to query vectors from. 485 If not specified, the default namespace is used. [optional] 486 filter (Dict[str, Union[str, float, int, bool, List, dict]): 487 The filter to apply. You can use vector metadata to limit your search. 488 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 489 include_values (bool): Indicates whether vector values are included in the response. 490 If omitted the server will use the default value of False [optional] 491 include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. 492 If omitted the server will use the default value of False [optional] 493 sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. 494 Expected to be either a SparseValues object or a dict of the form: 495 {'indices': List[int], 'values': List[float]}, where the lists each have the same length. 496 497 Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, 498 and namespace name. 499 """ 500 pass
The Query operation searches a namespace, using a query vector. It retrieves the ids of the most similar items in a namespace, along with their similarity scores.
API reference: https://docs.pinecone.io/reference/query
Examples:
>>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') >>> index.query(id='id1', top_k=10, namespace='my_namespace') >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, >>> top_k=10, namespace='my_namespace') >>> index.query(vector=[1, 2, 3], sparse_vector=SparseValues([1, 2], [0.2, 0.4]), >>> top_k=10, namespace='my_namespace')
Arguments:
- vector (List[float]): The query vector. This should be the same length as the dimension of the index
being queried. Each
query()
request can contain only one of the parametersid
orvector
.. [optional] - id (str): The unique ID of the vector to be used as a query vector.
Each
query()
request can contain only one of the parametersvector
orid
. [optional] - top_k (int): The number of results to return for each query. Must be an integer greater than 1.
- namespace (str): The namespace to query vectors from. If not specified, the default namespace is used. [optional]
- filter (Dict[str, Union[str, float, int, bool, List, dict]): The filter to apply. You can use vector metadata to limit your search. See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
- include_values (bool): Indicates whether vector values are included in the response. If omitted the server will use the default value of False [optional]
- include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. If omitted the server will use the default value of False [optional]
- sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. Expected to be either a SparseValues object or a dict of the form: {'indices': List[int], 'values': List[float]}, where the lists each have the same length.
Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, and namespace name.
502 @abstractmethod 503 def query_namespaces( 504 self, 505 vector: List[float], 506 namespaces: List[str], 507 top_k: Optional[int] = None, 508 filter: Optional[FilterTypedDict] = None, 509 include_values: Optional[bool] = None, 510 include_metadata: Optional[bool] = None, 511 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 512 **kwargs, 513 ) -> QueryNamespacesResults: 514 """The query_namespaces() method is used to make a query to multiple namespaces in parallel and combine the results into one result set. 515 516 Since several asynchronous calls are made on your behalf when calling this method, you will need to tune the pool_threads and connection_pool_maxsize parameter of the Index constructor to suite your workload. 517 518 Examples: 519 520 ```python 521 from pinecone import Pinecone 522 523 pc = Pinecone(api_key="your-api-key") 524 index = pc.Index( 525 host="index-name", 526 pool_threads=32, 527 connection_pool_maxsize=32 528 ) 529 530 query_vec = [0.1, 0.2, 0.3] # An embedding that matches the index dimension 531 combined_results = index.query_namespaces( 532 vector=query_vec, 533 namespaces=['ns1', 'ns2', 'ns3', 'ns4'], 534 metric="cosine", 535 top_k=10, 536 filter={'genre': {"$eq": "drama"}}, 537 include_values=True, 538 include_metadata=True 539 ) 540 for vec in combined_results.matches: 541 print(vec.id, vec.score) 542 print(combined_results.usage) 543 ``` 544 545 Args: 546 vector (List[float]): The query vector, must be the same length as the dimension of the index being queried. 547 namespaces (List[str]): The list of namespaces to query. 548 top_k (Optional[int], optional): The number of results you would like to request from each namespace. Defaults to 10. 549 metric (str): Must be one of 'cosine', 'euclidean', 'dotproduct'. This is needed in order to merge results across namespaces, since the interpretation of score depends on the index metric type. 550 filter (Optional[Dict[str, Union[str, float, int, bool, List, dict]]], optional): Pass an optional filter to filter results based on metadata. Defaults to None. 551 include_values (Optional[bool], optional): Boolean field indicating whether vector values should be included with results. Defaults to None. 552 include_metadata (Optional[bool], optional): Boolean field indicating whether vector metadata should be included with results. Defaults to None. 553 sparse_vector (Optional[ Union[SparseValues, Dict[str, Union[List[float], List[int]]]] ], optional): If you are working with a dotproduct index, you can pass a sparse vector as part of your hybrid search. Defaults to None. 554 555 Returns: 556 QueryNamespacesResults: A QueryNamespacesResults object containing the combined results from all namespaces, as well as the combined usage cost in read units. 557 """ 558 pass
The query_namespaces() method is used to make a query to multiple namespaces in parallel and combine the results into one result set.
Since several asynchronous calls are made on your behalf when calling this method, you will need to tune the pool_threads and connection_pool_maxsize parameter of the Index constructor to suite your workload.
Examples:
from pinecone import Pinecone
pc = Pinecone(api_key="your-api-key")
index = pc.Index(
host="index-name",
pool_threads=32,
connection_pool_maxsize=32
)
query_vec = [0.1, 0.2, 0.3] # An embedding that matches the index dimension
combined_results = index.query_namespaces(
vector=query_vec,
namespaces=['ns1', 'ns2', 'ns3', 'ns4'],
metric="cosine",
top_k=10,
filter={'genre': {"$eq": "drama"}},
include_values=True,
include_metadata=True
)
for vec in combined_results.matches:
print(vec.id, vec.score)
print(combined_results.usage)
Arguments:
- vector (List[float]): The query vector, must be the same length as the dimension of the index being queried.
- namespaces (List[str]): The list of namespaces to query.
- top_k (Optional[int], optional): The number of results you would like to request from each namespace. Defaults to 10.
- metric (str): Must be one of 'cosine', 'euclidean', 'dotproduct'. This is needed in order to merge results across namespaces, since the interpretation of score depends on the index metric type.
- filter (Optional[Dict[str, Union[str, float, int, bool, List, dict]]], optional): Pass an optional filter to filter results based on metadata. Defaults to None.
- include_values (Optional[bool], optional): Boolean field indicating whether vector values should be included with results. Defaults to None.
- include_metadata (Optional[bool], optional): Boolean field indicating whether vector metadata should be included with results. Defaults to None.
- sparse_vector (Optional[ Union[SparseValues, Dict[str, Union[List[float], List[int]]]] ], optional): If you are working with a dotproduct index, you can pass a sparse vector as part of your hybrid search. Defaults to None.
Returns:
QueryNamespacesResults: A QueryNamespacesResults object containing the combined results from all namespaces, as well as the combined usage cost in read units.
560 @abstractmethod 561 def update( 562 self, 563 id: str, 564 values: Optional[List[float]] = None, 565 set_metadata: Optional[VectorMetadataTypedDict] = None, 566 namespace: Optional[str] = None, 567 sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 568 **kwargs, 569 ) -> Dict[str, Any]: 570 """ 571 The Update operation updates vector in a namespace. 572 If a value is included, it will overwrite the previous value. 573 If a set_metadata is included, 574 the values of the fields specified in it will be added or overwrite the previous value. 575 576 API reference: https://docs.pinecone.io/reference/update 577 578 Examples: 579 >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') 580 >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace') 581 >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, 582 >>> namespace='my_namespace') 583 >>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]), 584 >>> namespace='my_namespace') 585 586 Args: 587 id (str): Vector's unique id. 588 values (List[float]): vector values to set. [optional] 589 set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): 590 metadata to set for vector. [optional] 591 namespace (str): Namespace name where to update the vector.. [optional] 592 sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. 593 Expected to be either a SparseValues object or a dict of the form: 594 {'indices': List[int], 'values': List[float]} where the lists each have the same length. 595 596 597 Returns: An empty dictionary if the update was successful. 598 """ 599 pass
The Update operation updates vector in a namespace. If a value is included, it will overwrite the previous value. If a set_metadata is included, the values of the fields specified in it will be added or overwrite the previous value.
API reference: https://docs.pinecone.io/reference/update
Examples:
>>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace') >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, >>> namespace='my_namespace') >>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]), >>> namespace='my_namespace')
Arguments:
- id (str): Vector's unique id.
- values (List[float]): vector values to set. [optional]
- set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): metadata to set for vector. [optional]
- namespace (str): Namespace name where to update the vector.. [optional]
- sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. Expected to be either a SparseValues object or a dict of the form: {'indices': List[int], 'values': List[float]} where the lists each have the same length.
Returns: An empty dictionary if the update was successful.
601 @abstractmethod 602 def describe_index_stats( 603 self, filter: Optional[FilterTypedDict] = None, **kwargs 604 ) -> DescribeIndexStatsResponse: 605 """ 606 The DescribeIndexStats operation returns statistics about the index's contents. 607 For example: The vector count per namespace and the number of dimensions. 608 609 API reference: https://docs.pinecone.io/reference/describe_index_stats_post 610 611 Examples: 612 >>> index.describe_index_stats() 613 >>> index.describe_index_stats(filter={'key': 'value'}) 614 615 Args: 616 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 617 If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. 618 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 619 620 Returns: DescribeIndexStatsResponse object which contains stats about the index. 621 """ 622 pass
The DescribeIndexStats operation returns statistics about the index's contents. For example: The vector count per namespace and the number of dimensions.
API reference: https://docs.pinecone.io/reference/describe_index_stats_post
Examples:
>>> index.describe_index_stats() >>> index.describe_index_stats(filter={'key': 'value'})
Arguments:
- filter (Dict[str, Union[str, float, int, bool, List, dict]]):
- If this parameter is present, the operation only returns statistics for vectors that satisfy the filter.
- See https: //www.pinecone.io/docs/metadata-filtering/.. [optional]
Returns: DescribeIndexStatsResponse object which contains stats about the index.
624 @abstractmethod 625 def list_paginated( 626 self, 627 prefix: Optional[str] = None, 628 limit: Optional[int] = None, 629 pagination_token: Optional[str] = None, 630 namespace: Optional[str] = None, 631 **kwargs, 632 ) -> ListResponse: 633 """ 634 The list_paginated operation finds vectors based on an id prefix within a single namespace. 635 It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. 636 This id list can then be passed to fetch or delete operations, depending on your use case. 637 638 Consider using the `list` method to avoid having to handle pagination tokens manually. 639 640 Examples: 641 >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') 642 >>> [v.id for v in results.vectors] 643 ['99', '990', '991', '992', '993'] 644 >>> results.pagination.next 645 eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 646 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next) 647 648 Args: 649 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 650 be used with the effect of listing all ids in a namespace [optional] 651 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 652 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 653 in the response if additional results are available. [optional] 654 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 655 656 Returns: ListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed. 657 """ 658 pass
The list_paginated operation finds vectors based on an id prefix within a single namespace. It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. This id list can then be passed to fetch or delete operations, depending on your use case.
Consider using the list
method to avoid having to handle pagination tokens manually.
Examples:
>>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') >>> [v.id for v in results.vectors] ['99', '990', '991', '992', '993'] >>> results.pagination.next eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next)
Arguments:
- prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will be used with the effect of listing all ids in a namespace [optional]
- limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
- pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned in the response if additional results are available. [optional]
- namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
Returns: ListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed.
660 @abstractmethod 661 def list(self, **kwargs): 662 """ 663 The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields 664 a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your 665 behalf. 666 667 Examples: 668 >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): 669 >>> print(ids) 670 ['99', '990', '991', '992', '993'] 671 ['994', '995', '996', '997', '998'] 672 ['999'] 673 674 Args: 675 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 676 be used with the effect of listing all ids in a namespace [optional] 677 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 678 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 679 in the response if additional results are available. [optional] 680 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 681 """ 682 pass
The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your behalf.
Examples:
>>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): >>> print(ids) ['99', '990', '991', '992', '993'] ['994', '995', '996', '997', '998'] ['999']
Arguments:
- prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will be used with the effect of listing all ids in a namespace [optional]
- limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
- pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned in the response if additional results are available. [optional]
- namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]