pinecone .data .index_asyncio_interface
1from abc import ABC, abstractmethod 2from typing import Union, List, Optional, Dict, Any 3 4from pinecone.core.openapi.db_data.models import ( 5 FetchResponse, 6 QueryResponse, 7 IndexDescription as DescribeIndexStatsResponse, 8 UpsertResponse, 9 Vector, 10 ListResponse, 11 SparseValues, 12 SearchRecordsResponse, 13) 14from .query_results_aggregator import QueryNamespacesResults 15from .types import ( 16 VectorTypedDict, 17 SparseVectorTypedDict, 18 VectorMetadataTypedDict, 19 FilterTypedDict, 20 VectorTuple, 21 VectorTupleWithMetadata, 22 SearchQueryTypedDict, 23 SearchRerankTypedDict, 24) 25from .dataclasses import SearchQuery, SearchRerank 26 27 28class IndexAsyncioInterface(ABC): 29 @abstractmethod 30 async def upsert( 31 self, 32 vectors: Union[ 33 List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict] 34 ], 35 namespace: Optional[str] = None, 36 batch_size: Optional[int] = None, 37 show_progress: bool = True, 38 **kwargs, 39 ) -> UpsertResponse: 40 """ 41 Args: 42 vectors (Union[List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict]]): A list of vectors to upsert. 43 namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] 44 batch_size (int): The number of vectors to upsert in each batch. 45 If not specified, all vectors will be upserted in a single batch. [optional] 46 show_progress (bool): Whether to show a progress bar using tqdm. 47 Applied only if batch_size is provided. Default is True. 48 49 Returns: 50 `UpsertResponse`, includes the number of vectors upserted. 51 52 53 The upsert operation writes vectors into a namespace. 54 If a new value is upserted for an existing vector id, it will overwrite the previous value. 55 56 To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel 57 58 ## Upserting dense vectors 59 60 **Note:** the dimension of each dense vector must match the dimension of the index. 61 62 A vector can be represented in a variety of ways. 63 64 ```python 65 import asyncio 66 from pinecone import Pinecone, Vector 67 68 async def main(): 69 pc = Pinecone() 70 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 71 # A Vector object 72 await idx.upsert( 73 namespace = 'my-namespace', 74 vectors = [ 75 Vector(id='id1', values=[0.1, 0.2, 0.3, 0.4], metadata={'metadata_key': 'metadata_value'}), 76 ] 77 ) 78 79 # A vector tuple 80 await idx.upsert( 81 namespace = 'my-namespace', 82 vectors = [ 83 ('id1', [0.1, 0.2, 0.3, 0.4]), 84 ] 85 ) 86 87 # A vector tuple with metadata 88 await idx.upsert( 89 namespace = 'my-namespace', 90 vectors = [ 91 ('id1', [0.1, 0.2, 0.3, 0.4], {'metadata_key': 'metadata_value'}), 92 ] 93 ) 94 95 # A vector dictionary 96 await idx.upsert( 97 namespace = 'my-namespace', 98 vectors = [ 99 {"id": 1, "values": [0.1, 0.2, 0.3, 0.4], "metadata": {"metadata_key": "metadata_value"}}, 100 ] 101 102 asyncio.run(main()) 103 ``` 104 105 ## Upserting sparse vectors 106 107 ```python 108 import asyncio 109 from pinecone import Pinecone, Vector, SparseValues 110 111 async def main(): 112 pc = Pinecone() 113 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 114 # A Vector object 115 await idx.upsert( 116 namespace = 'my-namespace', 117 vectors = [ 118 Vector(id='id1', sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4])), 119 ] 120 ) 121 122 # A dictionary 123 await idx.upsert( 124 namespace = 'my-namespace', 125 vectors = [ 126 {"id": 1, "sparse_values": {"indices": [1, 2], "values": [0.2, 0.4]}}, 127 ] 128 ) 129 130 asyncio.run(main()) 131 ``` 132 133 ## Batch upsert 134 135 If you have a large number of vectors, you can upsert them in batches. 136 137 ```python 138 import asyncio 139 from pinecone import Pinecone, Vector, SparseValues 140 141 async def main(): 142 pc = Pinecone() 143 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 144 145 await idx.upsert( 146 namespace = 'my-namespace', 147 vectors = [ 148 {'id': 'id1', 'values': [0.1, 0.2, 0.3, 0.4]}, 149 {'id': 'id2', 'values': [0.2, 0.3, 0.4, 0.5]}, 150 {'id': 'id3', 'values': [0.3, 0.4, 0.5, 0.6]}, 151 {'id': 'id4', 'values': [0.4, 0.5, 0.6, 0.7]}, 152 {'id': 'id5', 'values': [0.5, 0.6, 0.7, 0.8]}, 153 # More vectors here 154 ], 155 batch_size = 50 156 ) 157 158 asyncio.run(main()) 159 ``` 160 161 ## Visual progress bar with tqdm 162 163 To see a progress bar when upserting in batches, you will need to separately install the `tqdm` package. 164 If `tqdm` is present, the client will detect and use it to display progress when `show_progress=True`. 165 """ 166 pass 167 168 @abstractmethod 169 async def upsert_from_dataframe( 170 self, df, namespace: Optional[str] = None, batch_size: int = 500, show_progress: bool = True 171 ): 172 """This method has not been implemented yet for the IndexAsyncio class.""" 173 pass 174 175 @abstractmethod 176 async def delete( 177 self, 178 ids: Optional[List[str]] = None, 179 delete_all: Optional[bool] = None, 180 namespace: Optional[str] = None, 181 filter: Optional[FilterTypedDict] = None, 182 **kwargs, 183 ) -> Dict[str, Any]: 184 """ 185 Args: 186 ids (List[str]): Vector ids to delete [optional] 187 delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] 188 Default is False. 189 namespace (str): The namespace to delete vectors from [optional] 190 If not specified, the default namespace is used. 191 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 192 If specified, the metadata filter here will be used to select the vectors to delete. 193 This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. 194 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 195 196 197 The Delete operation deletes vectors from the index, from a single namespace. 198 199 No error is raised if the vector id does not exist. 200 201 Note: For any delete call, if namespace is not specified, the default namespace `""` is used. 202 Since the delete operation does not error when ids are not present, this means you may not receive 203 an error if you delete from the wrong namespace. 204 205 Delete can occur in the following mutual exclusive ways: 206 1. Delete by ids from a single namespace 207 2. Delete all vectors from a single namespace by setting delete_all to True 208 3. Delete all vectors from a single namespace by specifying a metadata filter 209 (note that for this option delete all must be set to False) 210 211 API reference: https://docs.pinecone.io/reference/delete_post 212 213 ```python 214 import asyncio 215 from pinecone import Pinecone, Vector, SparseValues 216 217 async def main(): 218 pc = Pinecone() 219 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 220 # Delete specific ids 221 await idx.delete( 222 ids=['id1', 'id2'], 223 namespace='my_namespace' 224 ) 225 226 # Delete everything in a namespace 227 await idx.delete( 228 delete_all=True, 229 namespace='my_namespace' 230 ) 231 232 # Delete by metadata filter 233 await idx.delete( 234 filter={'key': 'value'}, 235 namespace='my_namespace' 236 ) 237 238 asyncio.run(main()) 239 ``` 240 241 Returns: An empty dictionary if the delete operation was successful. 242 """ 243 pass 244 245 @abstractmethod 246 async def fetch( 247 self, ids: List[str], namespace: Optional[str] = None, **kwargs 248 ) -> FetchResponse: 249 """ 250 The fetch operation looks up and returns vectors, by ID, from a single namespace. 251 The returned vectors include the vector data and/or metadata. 252 253 API reference: https://docs.pinecone.io/reference/fetch 254 255 ``` 256 import asyncio 257 from pinecone import Pinecone, Vector, SparseValues 258 259 async def main(): 260 pc = Pinecone() 261 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 262 # Fetch specific ids in namespace 263 fetched = await idx.fetch( 264 ids=['id1', 'id2'], 265 namespace='my_namespace' 266 ) 267 for vec_id in fetched.vectors: 268 vector = fetched.vectors[vec_id] 269 print(vector.id) 270 print(vector.metadata) 271 print(vector.values) 272 273 asyncio.run(main()) 274 ``` 275 276 Args: 277 ids (List[str]): The vector IDs to fetch. 278 namespace (str): The namespace to fetch vectors from. 279 If not specified, the default namespace is used. [optional] 280 281 Returns: FetchResponse object which contains the list of Vector objects, and namespace name. 282 """ 283 pass 284 285 @abstractmethod 286 async def query( 287 self, 288 *args, 289 top_k: int, 290 vector: Optional[List[float]] = None, 291 id: Optional[str] = None, 292 namespace: Optional[str] = None, 293 filter: Optional[FilterTypedDict] = None, 294 include_values: Optional[bool] = None, 295 include_metadata: Optional[bool] = None, 296 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 297 **kwargs, 298 ) -> QueryResponse: 299 """ 300 The Query operation searches a namespace, using a query vector. 301 It retrieves the ids of the most similar items in a namespace, along with their similarity scores. 302 303 API reference: https://docs.pinecone.io/reference/query 304 305 ## Querying with dense vectors 306 307 ```python 308 import asyncio 309 from pinecone import Pinecone, Vector, SparseValues 310 311 async def main(): 312 pc = Pinecone() 313 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 314 query_embedding = [0.1, 0.2, 0.3, ...] # An embedding that matches the index dimension 315 316 # Query by vector values 317 results = await idx.query( 318 vector=query_embedding, 319 top_k=10, 320 filter={'genre': {"$eq": "drama"}}, # Optionally filter by metadata 321 namespace='my_namespace', 322 include_values=False, 323 include_metadata=True 324 ) 325 326 # Query using vector id (the values from this stored vector will be used to query) 327 results = await idx.query( 328 id='1', 329 top_k=10, 330 filter={"year": {"$gt": 2000}}, 331 namespace='my_namespace', 332 ) 333 334 asyncio.run(main()) 335 ``` 336 337 ## Query with sparse vectors 338 339 ```python 340 import asyncio 341 from pinecone import Pinecone, Vector, SparseValues 342 343 async def main(): 344 pc = Pinecone() 345 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 346 query_embedding = [0.1, 0.2, 0.3, ...] # An embedding that matches the index dimension 347 348 # Query by vector values 349 results = await idx.query( 350 vector=query_embedding, 351 top_k=10, 352 filter={'genre': {"$eq": "drama"}}, # Optionally filter by metadata 353 namespace='my_namespace', 354 include_values=False, 355 include_metadata=True 356 ) 357 358 # Query using vector id (the values from this stored vector will be used to query) 359 results = await idx.query( 360 id='1', 361 top_k=10, 362 filter={"year": {"$gt": 2000}}, 363 namespace='my_namespace', 364 ) 365 366 asyncio.run(main()) 367 ``` 368 369 Examples: 370 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') 371 >>> index.query(id='id1', top_k=10, namespace='my_namespace') 372 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) 373 >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) 374 >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, 375 >>> top_k=10, namespace='my_namespace') 376 >>> index.query(vector=[1, 2, 3], sparse_vector=SparseValues([1, 2], [0.2, 0.4]), 377 >>> top_k=10, namespace='my_namespace') 378 379 Args: 380 vector (List[float]): The query vector. This should be the same length as the dimension of the index 381 being queried. Each `query()` request can contain only one of the parameters 382 `id` or `vector`.. [optional] 383 id (str): The unique ID of the vector to be used as a query vector. 384 Each `query()` request can contain only one of the parameters 385 `vector` or `id`. [optional] 386 top_k (int): The number of results to return for each query. Must be an integer greater than 1. 387 namespace (str): The namespace to fetch vectors from. 388 If not specified, the default namespace is used. [optional] 389 filter (Dict[str, Union[str, float, int, bool, List, dict]): 390 The filter to apply. You can use vector metadata to limit your search. 391 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 392 include_values (bool): Indicates whether vector values are included in the response. 393 If omitted the server will use the default value of False [optional] 394 include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. 395 If omitted the server will use the default value of False [optional] 396 sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. 397 Expected to be either a SparseValues object or a dict of the form: 398 {'indices': List[int], 'values': List[float]}, where the lists each have the same length. 399 400 Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, 401 and namespace name. 402 """ 403 pass 404 405 @abstractmethod 406 async def query_namespaces( 407 self, 408 namespaces: List[str], 409 top_k: Optional[int] = None, 410 filter: Optional[FilterTypedDict] = None, 411 include_values: Optional[bool] = None, 412 include_metadata: Optional[bool] = None, 413 vector: Optional[List[float]] = None, 414 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 415 **kwargs, 416 ) -> QueryNamespacesResults: 417 """The query_namespaces() method is used to make a query to multiple namespaces in parallel and combine the results into one result set. 418 419 Args: 420 vector (List[float]): The query vector, must be the same length as the dimension of the index being queried. 421 namespaces (List[str]): The list of namespaces to query. 422 top_k (Optional[int], optional): The number of results you would like to request from each namespace. Defaults to 10. 423 filter (Optional[Dict[str, Union[str, float, int, bool, List, dict]]], optional): Pass an optional filter to filter results based on metadata. Defaults to None. 424 include_values (Optional[bool], optional): Boolean field indicating whether vector values should be included with results. Defaults to None. 425 include_metadata (Optional[bool], optional): Boolean field indicating whether vector metadata should be included with results. Defaults to None. 426 sparse_vector (Optional[ Union[SparseValues, Dict[str, Union[List[float], List[int]]]] ], optional): If you are working with a dotproduct index, you can pass a sparse vector as part of your hybrid search. Defaults to None. 427 428 Returns: 429 QueryNamespacesResults: A QueryNamespacesResults object containing the combined results from all namespaces, as well as the combined usage cost in read units. 430 431 Examples: 432 433 ```python 434 import asyncio 435 from pinecone import Pinecone 436 437 async def main(): 438 pc = Pinecone(api_key="your-api-key") 439 idx = pc.IndexAsyncio( 440 host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io", 441 ) 442 443 query_vec = [0.1, 0.2, 0.3] # An embedding that matches the index dimension 444 combined_results = await idx.query_namespaces( 445 vector=query_vec, 446 namespaces=['ns1', 'ns2', 'ns3', 'ns4'], 447 top_k=10, 448 filter={'genre': {"$eq": "drama"}}, 449 include_values=True, 450 include_metadata=True 451 ) 452 for vec in combined_results.matches: 453 print(vec.id, vec.score) 454 print(combined_results.usage) 455 456 await idx.close() 457 458 asyncio.run(main()) 459 ``` 460 """ 461 pass 462 463 @abstractmethod 464 async def update( 465 self, 466 id: str, 467 values: Optional[List[float]] = None, 468 set_metadata: Optional[VectorMetadataTypedDict] = None, 469 namespace: Optional[str] = None, 470 sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 471 **kwargs, 472 ) -> Dict[str, Any]: 473 """ 474 The Update operation updates vector in a namespace. 475 476 Args: 477 id (str): Vector's unique id. 478 values (List[float]): vector values to set. [optional] 479 set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): 480 metadata to set for vector. [optional] 481 namespace (str): Namespace name where to update the vector.. [optional] 482 sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. 483 Expected to be either a SparseValues object or a dict of the form: 484 {'indices': List[int], 'values': List[float]} where the lists each have the same length. 485 486 If a value is included, it will overwrite the previous value. 487 If a set_metadata is included, 488 the values of the fields specified in it will be added or overwrite the previous value. 489 490 API reference: https://docs.pinecone.io/reference/update 491 492 Examples: 493 ```python 494 import asyncio 495 from pinecone import Pinecone, Vector, SparseValues 496 497 async def main(): 498 pc = Pinecone() 499 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 500 # Update vector values 501 await idx.update( 502 id='id1', 503 values=[0.1, 0.2, 0.3, ...], 504 namespace='my_namespace' 505 ) 506 507 # Update metadata 508 await idx.update( 509 id='id1', 510 set_metadata={'key': 'value'}, 511 namespace='my_namespace' 512 ) 513 514 # Update sparse values 515 await idx.update( 516 id='id1', 517 sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, 518 namespace='my_namespace' 519 ) 520 521 # Update sparse values with SparseValues object 522 await idx.update( 523 id='id1', 524 sparse_values=SparseValues(indices=[234781, 5432], values=[0.2, 0.4]), 525 namespace='my_namespace' 526 ) 527 528 asyncio.run(main()) 529 ``` 530 531 """ 532 pass 533 534 @abstractmethod 535 async def describe_index_stats( 536 self, filter: Optional[FilterTypedDict] = None, **kwargs 537 ) -> DescribeIndexStatsResponse: 538 """ 539 The DescribeIndexStats operation returns statistics about the index's contents. 540 For example: The vector count per namespace and the number of dimensions. 541 542 API reference: https://docs.pinecone.io/reference/describe_index_stats_post 543 544 Args: 545 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 546 If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. 547 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 548 549 Returns: DescribeIndexStatsResponse object which contains stats about the index. 550 551 ```python 552 import asyncio 553 from pinecone import Pinecone, Vector, SparseValues 554 555 async def main(): 556 pc = Pinecone() 557 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 558 print(await idx.describe_index_stats()) 559 560 asyncio.run(main()) 561 ``` 562 """ 563 pass 564 565 @abstractmethod 566 async def list_paginated( 567 self, 568 prefix: Optional[str] = None, 569 limit: Optional[int] = None, 570 pagination_token: Optional[str] = None, 571 namespace: Optional[str] = None, 572 **kwargs, 573 ) -> ListResponse: 574 """ 575 The list_paginated operation finds vectors based on an id prefix within a single namespace. 576 It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. 577 This id list can then be passed to fetch or delete operations, depending on your use case. 578 579 Consider using the `list` method to avoid having to handle pagination tokens manually. 580 581 Examples: 582 >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') 583 >>> [v.id for v in results.vectors] 584 ['99', '990', '991', '992', '993'] 585 >>> results.pagination.next 586 eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 587 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next) 588 589 Args: 590 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 591 be used with the effect of listing all ids in a namespace [optional] 592 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 593 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 594 in the response if additional results are available. [optional] 595 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 596 597 Returns: ListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed. 598 """ 599 pass 600 601 @abstractmethod 602 async def list(self, **kwargs): 603 """ 604 The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields 605 a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your 606 behalf. 607 608 Examples: 609 >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): 610 >>> print(ids) 611 ['99', '990', '991', '992', '993'] 612 ['994', '995', '996', '997', '998'] 613 ['999'] 614 615 Args: 616 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 617 be used with the effect of listing all ids in a namespace [optional] 618 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 619 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 620 in the response if additional results are available. [optional] 621 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 622 """ 623 pass 624 625 @abstractmethod 626 async def upsert_records(self, namespace: str, records: List[Dict]): 627 """ 628 :param namespace: The namespace of the index to upsert records to. 629 :type namespace: str, required 630 :param records: The records to upsert into the index. 631 :type records: List[Dict], required 632 633 Upsert records to a namespace. A record is a dictionary that contains eitiher an `id` or `_id` 634 field along with other fields that will be stored as metadata. The `id` or `_id` field is used 635 as the unique identifier for the record. At least one field in the record should correspond to 636 a field mapping in the index's embed configuration. 637 638 When records are upserted, Pinecone converts mapped fields into embeddings and upserts them into 639 the specified namespacce of the index. 640 641 ```python 642 import asyncio 643 from pinecone import ( 644 Pinecone, 645 CloudProvider, 646 AwsRegion, 647 EmbedModel 648 IndexEmbed 649 ) 650 651 async def main(): 652 pc = Pinecone() 653 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 654 # upsert records 655 await idx.upsert_records( 656 namespace="my-namespace", 657 records=[ 658 { 659 "_id": "test1", 660 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 661 }, 662 { 663 "_id": "test2", 664 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 665 }, 666 { 667 "_id": "test3", 668 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 669 }, 670 { 671 "_id": "test4", 672 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 673 }, 674 { 675 "_id": "test5", 676 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 677 }, 678 { 679 "_id": "test6", 680 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 681 }, 682 ], 683 ) 684 685 from pinecone import SearchQuery, SearchRerank, RerankModel 686 687 # search for similar records 688 response = await idx.search_records( 689 namespace="my-namespace", 690 query=SearchQuery( 691 inputs={ 692 "text": "Apple corporation", 693 }, 694 top_k=3, 695 ), 696 rerank=SearchRerank( 697 model=RerankModel.Bge_Reranker_V2_M3, 698 rank_fields=["my_text_field"], 699 top_n=3, 700 ), 701 ) 702 703 asyncio.run(main()) 704 ``` 705 """ 706 pass 707 708 @abstractmethod 709 async def search( 710 self, 711 namespace: str, 712 query: Union[SearchQueryTypedDict, SearchQuery], 713 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 714 fields: Optional[List[str]] = ["*"], # Default to returning all fields 715 ) -> SearchRecordsResponse: 716 """ 717 :param namespace: The namespace in the index to search. 718 :type namespace: str, required 719 :param query: The SearchQuery to use for the search. 720 :type query: Union[Dict, SearchQuery], required 721 :param rerank: The SearchRerank to use with the search request. 722 :type rerank: Union[Dict, SearchRerank], optional 723 :return: The records that match the search. 724 725 Search for records. 726 727 This operation converts a query to a vector embedding and then searches a namespace. You 728 can optionally provide a reranking operation as part of the search. 729 730 ```python 731 import asyncio 732 from pinecone import ( 733 Pinecone, 734 CloudProvider, 735 AwsRegion, 736 EmbedModel 737 IndexEmbed 738 ) 739 740 async def main(): 741 pc = Pinecone() 742 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 743 # upsert records 744 await idx.upsert_records( 745 namespace="my-namespace", 746 records=[ 747 { 748 "_id": "test1", 749 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 750 }, 751 { 752 "_id": "test2", 753 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 754 }, 755 { 756 "_id": "test3", 757 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 758 }, 759 { 760 "_id": "test4", 761 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 762 }, 763 { 764 "_id": "test5", 765 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 766 }, 767 { 768 "_id": "test6", 769 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 770 }, 771 ], 772 ) 773 774 from pinecone import SearchQuery, SearchRerank, RerankModel 775 776 # search for similar records 777 response = await idx.search_records( 778 namespace="my-namespace", 779 query=SearchQuery( 780 inputs={ 781 "text": "Apple corporation", 782 }, 783 top_k=3, 784 ), 785 rerank=SearchRerank( 786 model=RerankModel.Bge_Reranker_V2_M3, 787 rank_fields=["my_text_field"], 788 top_n=3, 789 ), 790 ) 791 792 asyncio.run(main()) 793 ``` 794 795 """ 796 pass 797 798 @abstractmethod 799 async def search_records( 800 self, 801 namespace: str, 802 query: Union[SearchQueryTypedDict, SearchQuery], 803 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 804 fields: Optional[List[str]] = ["*"], # Default to returning all fields 805 ) -> SearchRecordsResponse: 806 """Alias of the search() method.""" 807 pass
29class IndexAsyncioInterface(ABC): 30 @abstractmethod 31 async def upsert( 32 self, 33 vectors: Union[ 34 List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict] 35 ], 36 namespace: Optional[str] = None, 37 batch_size: Optional[int] = None, 38 show_progress: bool = True, 39 **kwargs, 40 ) -> UpsertResponse: 41 """ 42 Args: 43 vectors (Union[List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict]]): A list of vectors to upsert. 44 namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] 45 batch_size (int): The number of vectors to upsert in each batch. 46 If not specified, all vectors will be upserted in a single batch. [optional] 47 show_progress (bool): Whether to show a progress bar using tqdm. 48 Applied only if batch_size is provided. Default is True. 49 50 Returns: 51 `UpsertResponse`, includes the number of vectors upserted. 52 53 54 The upsert operation writes vectors into a namespace. 55 If a new value is upserted for an existing vector id, it will overwrite the previous value. 56 57 To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel 58 59 ## Upserting dense vectors 60 61 **Note:** the dimension of each dense vector must match the dimension of the index. 62 63 A vector can be represented in a variety of ways. 64 65 ```python 66 import asyncio 67 from pinecone import Pinecone, Vector 68 69 async def main(): 70 pc = Pinecone() 71 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 72 # A Vector object 73 await idx.upsert( 74 namespace = 'my-namespace', 75 vectors = [ 76 Vector(id='id1', values=[0.1, 0.2, 0.3, 0.4], metadata={'metadata_key': 'metadata_value'}), 77 ] 78 ) 79 80 # A vector tuple 81 await idx.upsert( 82 namespace = 'my-namespace', 83 vectors = [ 84 ('id1', [0.1, 0.2, 0.3, 0.4]), 85 ] 86 ) 87 88 # A vector tuple with metadata 89 await idx.upsert( 90 namespace = 'my-namespace', 91 vectors = [ 92 ('id1', [0.1, 0.2, 0.3, 0.4], {'metadata_key': 'metadata_value'}), 93 ] 94 ) 95 96 # A vector dictionary 97 await idx.upsert( 98 namespace = 'my-namespace', 99 vectors = [ 100 {"id": 1, "values": [0.1, 0.2, 0.3, 0.4], "metadata": {"metadata_key": "metadata_value"}}, 101 ] 102 103 asyncio.run(main()) 104 ``` 105 106 ## Upserting sparse vectors 107 108 ```python 109 import asyncio 110 from pinecone import Pinecone, Vector, SparseValues 111 112 async def main(): 113 pc = Pinecone() 114 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 115 # A Vector object 116 await idx.upsert( 117 namespace = 'my-namespace', 118 vectors = [ 119 Vector(id='id1', sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4])), 120 ] 121 ) 122 123 # A dictionary 124 await idx.upsert( 125 namespace = 'my-namespace', 126 vectors = [ 127 {"id": 1, "sparse_values": {"indices": [1, 2], "values": [0.2, 0.4]}}, 128 ] 129 ) 130 131 asyncio.run(main()) 132 ``` 133 134 ## Batch upsert 135 136 If you have a large number of vectors, you can upsert them in batches. 137 138 ```python 139 import asyncio 140 from pinecone import Pinecone, Vector, SparseValues 141 142 async def main(): 143 pc = Pinecone() 144 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 145 146 await idx.upsert( 147 namespace = 'my-namespace', 148 vectors = [ 149 {'id': 'id1', 'values': [0.1, 0.2, 0.3, 0.4]}, 150 {'id': 'id2', 'values': [0.2, 0.3, 0.4, 0.5]}, 151 {'id': 'id3', 'values': [0.3, 0.4, 0.5, 0.6]}, 152 {'id': 'id4', 'values': [0.4, 0.5, 0.6, 0.7]}, 153 {'id': 'id5', 'values': [0.5, 0.6, 0.7, 0.8]}, 154 # More vectors here 155 ], 156 batch_size = 50 157 ) 158 159 asyncio.run(main()) 160 ``` 161 162 ## Visual progress bar with tqdm 163 164 To see a progress bar when upserting in batches, you will need to separately install the `tqdm` package. 165 If `tqdm` is present, the client will detect and use it to display progress when `show_progress=True`. 166 """ 167 pass 168 169 @abstractmethod 170 async def upsert_from_dataframe( 171 self, df, namespace: Optional[str] = None, batch_size: int = 500, show_progress: bool = True 172 ): 173 """This method has not been implemented yet for the IndexAsyncio class.""" 174 pass 175 176 @abstractmethod 177 async def delete( 178 self, 179 ids: Optional[List[str]] = None, 180 delete_all: Optional[bool] = None, 181 namespace: Optional[str] = None, 182 filter: Optional[FilterTypedDict] = None, 183 **kwargs, 184 ) -> Dict[str, Any]: 185 """ 186 Args: 187 ids (List[str]): Vector ids to delete [optional] 188 delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] 189 Default is False. 190 namespace (str): The namespace to delete vectors from [optional] 191 If not specified, the default namespace is used. 192 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 193 If specified, the metadata filter here will be used to select the vectors to delete. 194 This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. 195 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 196 197 198 The Delete operation deletes vectors from the index, from a single namespace. 199 200 No error is raised if the vector id does not exist. 201 202 Note: For any delete call, if namespace is not specified, the default namespace `""` is used. 203 Since the delete operation does not error when ids are not present, this means you may not receive 204 an error if you delete from the wrong namespace. 205 206 Delete can occur in the following mutual exclusive ways: 207 1. Delete by ids from a single namespace 208 2. Delete all vectors from a single namespace by setting delete_all to True 209 3. Delete all vectors from a single namespace by specifying a metadata filter 210 (note that for this option delete all must be set to False) 211 212 API reference: https://docs.pinecone.io/reference/delete_post 213 214 ```python 215 import asyncio 216 from pinecone import Pinecone, Vector, SparseValues 217 218 async def main(): 219 pc = Pinecone() 220 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 221 # Delete specific ids 222 await idx.delete( 223 ids=['id1', 'id2'], 224 namespace='my_namespace' 225 ) 226 227 # Delete everything in a namespace 228 await idx.delete( 229 delete_all=True, 230 namespace='my_namespace' 231 ) 232 233 # Delete by metadata filter 234 await idx.delete( 235 filter={'key': 'value'}, 236 namespace='my_namespace' 237 ) 238 239 asyncio.run(main()) 240 ``` 241 242 Returns: An empty dictionary if the delete operation was successful. 243 """ 244 pass 245 246 @abstractmethod 247 async def fetch( 248 self, ids: List[str], namespace: Optional[str] = None, **kwargs 249 ) -> FetchResponse: 250 """ 251 The fetch operation looks up and returns vectors, by ID, from a single namespace. 252 The returned vectors include the vector data and/or metadata. 253 254 API reference: https://docs.pinecone.io/reference/fetch 255 256 ``` 257 import asyncio 258 from pinecone import Pinecone, Vector, SparseValues 259 260 async def main(): 261 pc = Pinecone() 262 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 263 # Fetch specific ids in namespace 264 fetched = await idx.fetch( 265 ids=['id1', 'id2'], 266 namespace='my_namespace' 267 ) 268 for vec_id in fetched.vectors: 269 vector = fetched.vectors[vec_id] 270 print(vector.id) 271 print(vector.metadata) 272 print(vector.values) 273 274 asyncio.run(main()) 275 ``` 276 277 Args: 278 ids (List[str]): The vector IDs to fetch. 279 namespace (str): The namespace to fetch vectors from. 280 If not specified, the default namespace is used. [optional] 281 282 Returns: FetchResponse object which contains the list of Vector objects, and namespace name. 283 """ 284 pass 285 286 @abstractmethod 287 async def query( 288 self, 289 *args, 290 top_k: int, 291 vector: Optional[List[float]] = None, 292 id: Optional[str] = None, 293 namespace: Optional[str] = None, 294 filter: Optional[FilterTypedDict] = None, 295 include_values: Optional[bool] = None, 296 include_metadata: Optional[bool] = None, 297 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 298 **kwargs, 299 ) -> QueryResponse: 300 """ 301 The Query operation searches a namespace, using a query vector. 302 It retrieves the ids of the most similar items in a namespace, along with their similarity scores. 303 304 API reference: https://docs.pinecone.io/reference/query 305 306 ## Querying with dense vectors 307 308 ```python 309 import asyncio 310 from pinecone import Pinecone, Vector, SparseValues 311 312 async def main(): 313 pc = Pinecone() 314 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 315 query_embedding = [0.1, 0.2, 0.3, ...] # An embedding that matches the index dimension 316 317 # Query by vector values 318 results = await idx.query( 319 vector=query_embedding, 320 top_k=10, 321 filter={'genre': {"$eq": "drama"}}, # Optionally filter by metadata 322 namespace='my_namespace', 323 include_values=False, 324 include_metadata=True 325 ) 326 327 # Query using vector id (the values from this stored vector will be used to query) 328 results = await idx.query( 329 id='1', 330 top_k=10, 331 filter={"year": {"$gt": 2000}}, 332 namespace='my_namespace', 333 ) 334 335 asyncio.run(main()) 336 ``` 337 338 ## Query with sparse vectors 339 340 ```python 341 import asyncio 342 from pinecone import Pinecone, Vector, SparseValues 343 344 async def main(): 345 pc = Pinecone() 346 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 347 query_embedding = [0.1, 0.2, 0.3, ...] # An embedding that matches the index dimension 348 349 # Query by vector values 350 results = await idx.query( 351 vector=query_embedding, 352 top_k=10, 353 filter={'genre': {"$eq": "drama"}}, # Optionally filter by metadata 354 namespace='my_namespace', 355 include_values=False, 356 include_metadata=True 357 ) 358 359 # Query using vector id (the values from this stored vector will be used to query) 360 results = await idx.query( 361 id='1', 362 top_k=10, 363 filter={"year": {"$gt": 2000}}, 364 namespace='my_namespace', 365 ) 366 367 asyncio.run(main()) 368 ``` 369 370 Examples: 371 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') 372 >>> index.query(id='id1', top_k=10, namespace='my_namespace') 373 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) 374 >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) 375 >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, 376 >>> top_k=10, namespace='my_namespace') 377 >>> index.query(vector=[1, 2, 3], sparse_vector=SparseValues([1, 2], [0.2, 0.4]), 378 >>> top_k=10, namespace='my_namespace') 379 380 Args: 381 vector (List[float]): The query vector. This should be the same length as the dimension of the index 382 being queried. Each `query()` request can contain only one of the parameters 383 `id` or `vector`.. [optional] 384 id (str): The unique ID of the vector to be used as a query vector. 385 Each `query()` request can contain only one of the parameters 386 `vector` or `id`. [optional] 387 top_k (int): The number of results to return for each query. Must be an integer greater than 1. 388 namespace (str): The namespace to fetch vectors from. 389 If not specified, the default namespace is used. [optional] 390 filter (Dict[str, Union[str, float, int, bool, List, dict]): 391 The filter to apply. You can use vector metadata to limit your search. 392 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 393 include_values (bool): Indicates whether vector values are included in the response. 394 If omitted the server will use the default value of False [optional] 395 include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. 396 If omitted the server will use the default value of False [optional] 397 sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. 398 Expected to be either a SparseValues object or a dict of the form: 399 {'indices': List[int], 'values': List[float]}, where the lists each have the same length. 400 401 Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, 402 and namespace name. 403 """ 404 pass 405 406 @abstractmethod 407 async def query_namespaces( 408 self, 409 namespaces: List[str], 410 top_k: Optional[int] = None, 411 filter: Optional[FilterTypedDict] = None, 412 include_values: Optional[bool] = None, 413 include_metadata: Optional[bool] = None, 414 vector: Optional[List[float]] = None, 415 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 416 **kwargs, 417 ) -> QueryNamespacesResults: 418 """The query_namespaces() method is used to make a query to multiple namespaces in parallel and combine the results into one result set. 419 420 Args: 421 vector (List[float]): The query vector, must be the same length as the dimension of the index being queried. 422 namespaces (List[str]): The list of namespaces to query. 423 top_k (Optional[int], optional): The number of results you would like to request from each namespace. Defaults to 10. 424 filter (Optional[Dict[str, Union[str, float, int, bool, List, dict]]], optional): Pass an optional filter to filter results based on metadata. Defaults to None. 425 include_values (Optional[bool], optional): Boolean field indicating whether vector values should be included with results. Defaults to None. 426 include_metadata (Optional[bool], optional): Boolean field indicating whether vector metadata should be included with results. Defaults to None. 427 sparse_vector (Optional[ Union[SparseValues, Dict[str, Union[List[float], List[int]]]] ], optional): If you are working with a dotproduct index, you can pass a sparse vector as part of your hybrid search. Defaults to None. 428 429 Returns: 430 QueryNamespacesResults: A QueryNamespacesResults object containing the combined results from all namespaces, as well as the combined usage cost in read units. 431 432 Examples: 433 434 ```python 435 import asyncio 436 from pinecone import Pinecone 437 438 async def main(): 439 pc = Pinecone(api_key="your-api-key") 440 idx = pc.IndexAsyncio( 441 host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io", 442 ) 443 444 query_vec = [0.1, 0.2, 0.3] # An embedding that matches the index dimension 445 combined_results = await idx.query_namespaces( 446 vector=query_vec, 447 namespaces=['ns1', 'ns2', 'ns3', 'ns4'], 448 top_k=10, 449 filter={'genre': {"$eq": "drama"}}, 450 include_values=True, 451 include_metadata=True 452 ) 453 for vec in combined_results.matches: 454 print(vec.id, vec.score) 455 print(combined_results.usage) 456 457 await idx.close() 458 459 asyncio.run(main()) 460 ``` 461 """ 462 pass 463 464 @abstractmethod 465 async def update( 466 self, 467 id: str, 468 values: Optional[List[float]] = None, 469 set_metadata: Optional[VectorMetadataTypedDict] = None, 470 namespace: Optional[str] = None, 471 sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 472 **kwargs, 473 ) -> Dict[str, Any]: 474 """ 475 The Update operation updates vector in a namespace. 476 477 Args: 478 id (str): Vector's unique id. 479 values (List[float]): vector values to set. [optional] 480 set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): 481 metadata to set for vector. [optional] 482 namespace (str): Namespace name where to update the vector.. [optional] 483 sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. 484 Expected to be either a SparseValues object or a dict of the form: 485 {'indices': List[int], 'values': List[float]} where the lists each have the same length. 486 487 If a value is included, it will overwrite the previous value. 488 If a set_metadata is included, 489 the values of the fields specified in it will be added or overwrite the previous value. 490 491 API reference: https://docs.pinecone.io/reference/update 492 493 Examples: 494 ```python 495 import asyncio 496 from pinecone import Pinecone, Vector, SparseValues 497 498 async def main(): 499 pc = Pinecone() 500 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 501 # Update vector values 502 await idx.update( 503 id='id1', 504 values=[0.1, 0.2, 0.3, ...], 505 namespace='my_namespace' 506 ) 507 508 # Update metadata 509 await idx.update( 510 id='id1', 511 set_metadata={'key': 'value'}, 512 namespace='my_namespace' 513 ) 514 515 # Update sparse values 516 await idx.update( 517 id='id1', 518 sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, 519 namespace='my_namespace' 520 ) 521 522 # Update sparse values with SparseValues object 523 await idx.update( 524 id='id1', 525 sparse_values=SparseValues(indices=[234781, 5432], values=[0.2, 0.4]), 526 namespace='my_namespace' 527 ) 528 529 asyncio.run(main()) 530 ``` 531 532 """ 533 pass 534 535 @abstractmethod 536 async def describe_index_stats( 537 self, filter: Optional[FilterTypedDict] = None, **kwargs 538 ) -> DescribeIndexStatsResponse: 539 """ 540 The DescribeIndexStats operation returns statistics about the index's contents. 541 For example: The vector count per namespace and the number of dimensions. 542 543 API reference: https://docs.pinecone.io/reference/describe_index_stats_post 544 545 Args: 546 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 547 If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. 548 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 549 550 Returns: DescribeIndexStatsResponse object which contains stats about the index. 551 552 ```python 553 import asyncio 554 from pinecone import Pinecone, Vector, SparseValues 555 556 async def main(): 557 pc = Pinecone() 558 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 559 print(await idx.describe_index_stats()) 560 561 asyncio.run(main()) 562 ``` 563 """ 564 pass 565 566 @abstractmethod 567 async def list_paginated( 568 self, 569 prefix: Optional[str] = None, 570 limit: Optional[int] = None, 571 pagination_token: Optional[str] = None, 572 namespace: Optional[str] = None, 573 **kwargs, 574 ) -> ListResponse: 575 """ 576 The list_paginated operation finds vectors based on an id prefix within a single namespace. 577 It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. 578 This id list can then be passed to fetch or delete operations, depending on your use case. 579 580 Consider using the `list` method to avoid having to handle pagination tokens manually. 581 582 Examples: 583 >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') 584 >>> [v.id for v in results.vectors] 585 ['99', '990', '991', '992', '993'] 586 >>> results.pagination.next 587 eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 588 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next) 589 590 Args: 591 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 592 be used with the effect of listing all ids in a namespace [optional] 593 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 594 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 595 in the response if additional results are available. [optional] 596 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 597 598 Returns: ListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed. 599 """ 600 pass 601 602 @abstractmethod 603 async def list(self, **kwargs): 604 """ 605 The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields 606 a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your 607 behalf. 608 609 Examples: 610 >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): 611 >>> print(ids) 612 ['99', '990', '991', '992', '993'] 613 ['994', '995', '996', '997', '998'] 614 ['999'] 615 616 Args: 617 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 618 be used with the effect of listing all ids in a namespace [optional] 619 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 620 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 621 in the response if additional results are available. [optional] 622 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 623 """ 624 pass 625 626 @abstractmethod 627 async def upsert_records(self, namespace: str, records: List[Dict]): 628 """ 629 :param namespace: The namespace of the index to upsert records to. 630 :type namespace: str, required 631 :param records: The records to upsert into the index. 632 :type records: List[Dict], required 633 634 Upsert records to a namespace. A record is a dictionary that contains eitiher an `id` or `_id` 635 field along with other fields that will be stored as metadata. The `id` or `_id` field is used 636 as the unique identifier for the record. At least one field in the record should correspond to 637 a field mapping in the index's embed configuration. 638 639 When records are upserted, Pinecone converts mapped fields into embeddings and upserts them into 640 the specified namespacce of the index. 641 642 ```python 643 import asyncio 644 from pinecone import ( 645 Pinecone, 646 CloudProvider, 647 AwsRegion, 648 EmbedModel 649 IndexEmbed 650 ) 651 652 async def main(): 653 pc = Pinecone() 654 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 655 # upsert records 656 await idx.upsert_records( 657 namespace="my-namespace", 658 records=[ 659 { 660 "_id": "test1", 661 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 662 }, 663 { 664 "_id": "test2", 665 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 666 }, 667 { 668 "_id": "test3", 669 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 670 }, 671 { 672 "_id": "test4", 673 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 674 }, 675 { 676 "_id": "test5", 677 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 678 }, 679 { 680 "_id": "test6", 681 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 682 }, 683 ], 684 ) 685 686 from pinecone import SearchQuery, SearchRerank, RerankModel 687 688 # search for similar records 689 response = await idx.search_records( 690 namespace="my-namespace", 691 query=SearchQuery( 692 inputs={ 693 "text": "Apple corporation", 694 }, 695 top_k=3, 696 ), 697 rerank=SearchRerank( 698 model=RerankModel.Bge_Reranker_V2_M3, 699 rank_fields=["my_text_field"], 700 top_n=3, 701 ), 702 ) 703 704 asyncio.run(main()) 705 ``` 706 """ 707 pass 708 709 @abstractmethod 710 async def search( 711 self, 712 namespace: str, 713 query: Union[SearchQueryTypedDict, SearchQuery], 714 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 715 fields: Optional[List[str]] = ["*"], # Default to returning all fields 716 ) -> SearchRecordsResponse: 717 """ 718 :param namespace: The namespace in the index to search. 719 :type namespace: str, required 720 :param query: The SearchQuery to use for the search. 721 :type query: Union[Dict, SearchQuery], required 722 :param rerank: The SearchRerank to use with the search request. 723 :type rerank: Union[Dict, SearchRerank], optional 724 :return: The records that match the search. 725 726 Search for records. 727 728 This operation converts a query to a vector embedding and then searches a namespace. You 729 can optionally provide a reranking operation as part of the search. 730 731 ```python 732 import asyncio 733 from pinecone import ( 734 Pinecone, 735 CloudProvider, 736 AwsRegion, 737 EmbedModel 738 IndexEmbed 739 ) 740 741 async def main(): 742 pc = Pinecone() 743 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 744 # upsert records 745 await idx.upsert_records( 746 namespace="my-namespace", 747 records=[ 748 { 749 "_id": "test1", 750 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 751 }, 752 { 753 "_id": "test2", 754 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 755 }, 756 { 757 "_id": "test3", 758 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 759 }, 760 { 761 "_id": "test4", 762 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 763 }, 764 { 765 "_id": "test5", 766 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 767 }, 768 { 769 "_id": "test6", 770 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 771 }, 772 ], 773 ) 774 775 from pinecone import SearchQuery, SearchRerank, RerankModel 776 777 # search for similar records 778 response = await idx.search_records( 779 namespace="my-namespace", 780 query=SearchQuery( 781 inputs={ 782 "text": "Apple corporation", 783 }, 784 top_k=3, 785 ), 786 rerank=SearchRerank( 787 model=RerankModel.Bge_Reranker_V2_M3, 788 rank_fields=["my_text_field"], 789 top_n=3, 790 ), 791 ) 792 793 asyncio.run(main()) 794 ``` 795 796 """ 797 pass 798 799 @abstractmethod 800 async def search_records( 801 self, 802 namespace: str, 803 query: Union[SearchQueryTypedDict, SearchQuery], 804 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 805 fields: Optional[List[str]] = ["*"], # Default to returning all fields 806 ) -> SearchRecordsResponse: 807 """Alias of the search() method.""" 808 pass
Helper class that provides a standard way to create an ABC using inheritance.
30 @abstractmethod 31 async def upsert( 32 self, 33 vectors: Union[ 34 List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict] 35 ], 36 namespace: Optional[str] = None, 37 batch_size: Optional[int] = None, 38 show_progress: bool = True, 39 **kwargs, 40 ) -> UpsertResponse: 41 """ 42 Args: 43 vectors (Union[List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict]]): A list of vectors to upsert. 44 namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] 45 batch_size (int): The number of vectors to upsert in each batch. 46 If not specified, all vectors will be upserted in a single batch. [optional] 47 show_progress (bool): Whether to show a progress bar using tqdm. 48 Applied only if batch_size is provided. Default is True. 49 50 Returns: 51 `UpsertResponse`, includes the number of vectors upserted. 52 53 54 The upsert operation writes vectors into a namespace. 55 If a new value is upserted for an existing vector id, it will overwrite the previous value. 56 57 To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel 58 59 ## Upserting dense vectors 60 61 **Note:** the dimension of each dense vector must match the dimension of the index. 62 63 A vector can be represented in a variety of ways. 64 65 ```python 66 import asyncio 67 from pinecone import Pinecone, Vector 68 69 async def main(): 70 pc = Pinecone() 71 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 72 # A Vector object 73 await idx.upsert( 74 namespace = 'my-namespace', 75 vectors = [ 76 Vector(id='id1', values=[0.1, 0.2, 0.3, 0.4], metadata={'metadata_key': 'metadata_value'}), 77 ] 78 ) 79 80 # A vector tuple 81 await idx.upsert( 82 namespace = 'my-namespace', 83 vectors = [ 84 ('id1', [0.1, 0.2, 0.3, 0.4]), 85 ] 86 ) 87 88 # A vector tuple with metadata 89 await idx.upsert( 90 namespace = 'my-namespace', 91 vectors = [ 92 ('id1', [0.1, 0.2, 0.3, 0.4], {'metadata_key': 'metadata_value'}), 93 ] 94 ) 95 96 # A vector dictionary 97 await idx.upsert( 98 namespace = 'my-namespace', 99 vectors = [ 100 {"id": 1, "values": [0.1, 0.2, 0.3, 0.4], "metadata": {"metadata_key": "metadata_value"}}, 101 ] 102 103 asyncio.run(main()) 104 ``` 105 106 ## Upserting sparse vectors 107 108 ```python 109 import asyncio 110 from pinecone import Pinecone, Vector, SparseValues 111 112 async def main(): 113 pc = Pinecone() 114 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 115 # A Vector object 116 await idx.upsert( 117 namespace = 'my-namespace', 118 vectors = [ 119 Vector(id='id1', sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4])), 120 ] 121 ) 122 123 # A dictionary 124 await idx.upsert( 125 namespace = 'my-namespace', 126 vectors = [ 127 {"id": 1, "sparse_values": {"indices": [1, 2], "values": [0.2, 0.4]}}, 128 ] 129 ) 130 131 asyncio.run(main()) 132 ``` 133 134 ## Batch upsert 135 136 If you have a large number of vectors, you can upsert them in batches. 137 138 ```python 139 import asyncio 140 from pinecone import Pinecone, Vector, SparseValues 141 142 async def main(): 143 pc = Pinecone() 144 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 145 146 await idx.upsert( 147 namespace = 'my-namespace', 148 vectors = [ 149 {'id': 'id1', 'values': [0.1, 0.2, 0.3, 0.4]}, 150 {'id': 'id2', 'values': [0.2, 0.3, 0.4, 0.5]}, 151 {'id': 'id3', 'values': [0.3, 0.4, 0.5, 0.6]}, 152 {'id': 'id4', 'values': [0.4, 0.5, 0.6, 0.7]}, 153 {'id': 'id5', 'values': [0.5, 0.6, 0.7, 0.8]}, 154 # More vectors here 155 ], 156 batch_size = 50 157 ) 158 159 asyncio.run(main()) 160 ``` 161 162 ## Visual progress bar with tqdm 163 164 To see a progress bar when upserting in batches, you will need to separately install the `tqdm` package. 165 If `tqdm` is present, the client will detect and use it to display progress when `show_progress=True`. 166 """ 167 pass
Arguments:
- vectors (Union[List[Vector], List[VectorTuple], List[VectorTupleWithMetadata], List[VectorTypedDict]]): A list of vectors to upsert.
- namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional]
- batch_size (int): The number of vectors to upsert in each batch. If not specified, all vectors will be upserted in a single batch. [optional]
- show_progress (bool): Whether to show a progress bar using tqdm. Applied only if batch_size is provided. Default is True.
Returns:
UpsertResponse
, includes the number of vectors upserted.
The upsert operation writes vectors into a namespace. If a new value is upserted for an existing vector id, it will overwrite the previous value.
To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel
Upserting dense vectors
Note: the dimension of each dense vector must match the dimension of the index.
A vector can be represented in a variety of ways.
import asyncio
from pinecone import Pinecone, Vector
async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
# A Vector object
await idx.upsert(
namespace = 'my-namespace',
vectors = [
Vector(id='id1', values=[0.1, 0.2, 0.3, 0.4], metadata={'metadata_key': 'metadata_value'}),
]
)
# A vector tuple
await idx.upsert(
namespace = 'my-namespace',
vectors = [
('id1', [0.1, 0.2, 0.3, 0.4]),
]
)
# A vector tuple with metadata
await idx.upsert(
namespace = 'my-namespace',
vectors = [
('id1', [0.1, 0.2, 0.3, 0.4], {'metadata_key': 'metadata_value'}),
]
)
# A vector dictionary
await idx.upsert(
namespace = 'my-namespace',
vectors = [
{"id": 1, "values": [0.1, 0.2, 0.3, 0.4], "metadata": {"metadata_key": "metadata_value"}},
]
asyncio.run(main())
Upserting sparse vectors
import asyncio
from pinecone import Pinecone, Vector, SparseValues
async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
# A Vector object
await idx.upsert(
namespace = 'my-namespace',
vectors = [
Vector(id='id1', sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4])),
]
)
# A dictionary
await idx.upsert(
namespace = 'my-namespace',
vectors = [
{"id": 1, "sparse_values": {"indices": [1, 2], "values": [0.2, 0.4]}},
]
)
asyncio.run(main())
Batch upsert
If you have a large number of vectors, you can upsert them in batches.
import asyncio
from pinecone import Pinecone, Vector, SparseValues
async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
await idx.upsert(
namespace = 'my-namespace',
vectors = [
{'id': 'id1', 'values': [0.1, 0.2, 0.3, 0.4]},
{'id': 'id2', 'values': [0.2, 0.3, 0.4, 0.5]},
{'id': 'id3', 'values': [0.3, 0.4, 0.5, 0.6]},
{'id': 'id4', 'values': [0.4, 0.5, 0.6, 0.7]},
{'id': 'id5', 'values': [0.5, 0.6, 0.7, 0.8]},
# More vectors here
],
batch_size = 50
)
asyncio.run(main())
Visual progress bar with tqdm
To see a progress bar when upserting in batches, you will need to separately install the tqdm
package.
If tqdm
is present, the client will detect and use it to display progress when show_progress=True
.
169 @abstractmethod 170 async def upsert_from_dataframe( 171 self, df, namespace: Optional[str] = None, batch_size: int = 500, show_progress: bool = True 172 ): 173 """This method has not been implemented yet for the IndexAsyncio class.""" 174 pass
This method has not been implemented yet for the IndexAsyncio class.
176 @abstractmethod 177 async def delete( 178 self, 179 ids: Optional[List[str]] = None, 180 delete_all: Optional[bool] = None, 181 namespace: Optional[str] = None, 182 filter: Optional[FilterTypedDict] = None, 183 **kwargs, 184 ) -> Dict[str, Any]: 185 """ 186 Args: 187 ids (List[str]): Vector ids to delete [optional] 188 delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] 189 Default is False. 190 namespace (str): The namespace to delete vectors from [optional] 191 If not specified, the default namespace is used. 192 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 193 If specified, the metadata filter here will be used to select the vectors to delete. 194 This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. 195 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 196 197 198 The Delete operation deletes vectors from the index, from a single namespace. 199 200 No error is raised if the vector id does not exist. 201 202 Note: For any delete call, if namespace is not specified, the default namespace `""` is used. 203 Since the delete operation does not error when ids are not present, this means you may not receive 204 an error if you delete from the wrong namespace. 205 206 Delete can occur in the following mutual exclusive ways: 207 1. Delete by ids from a single namespace 208 2. Delete all vectors from a single namespace by setting delete_all to True 209 3. Delete all vectors from a single namespace by specifying a metadata filter 210 (note that for this option delete all must be set to False) 211 212 API reference: https://docs.pinecone.io/reference/delete_post 213 214 ```python 215 import asyncio 216 from pinecone import Pinecone, Vector, SparseValues 217 218 async def main(): 219 pc = Pinecone() 220 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 221 # Delete specific ids 222 await idx.delete( 223 ids=['id1', 'id2'], 224 namespace='my_namespace' 225 ) 226 227 # Delete everything in a namespace 228 await idx.delete( 229 delete_all=True, 230 namespace='my_namespace' 231 ) 232 233 # Delete by metadata filter 234 await idx.delete( 235 filter={'key': 'value'}, 236 namespace='my_namespace' 237 ) 238 239 asyncio.run(main()) 240 ``` 241 242 Returns: An empty dictionary if the delete operation was successful. 243 """ 244 pass
Arguments:
- ids (List[str]): Vector ids to delete [optional]
- delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] Default is False.
- namespace (str): The namespace to delete vectors from [optional] If not specified, the default namespace is used.
- filter (Dict[str, Union[str, float, int, bool, List, dict]]): If specified, the metadata filter here will be used to select the vectors to delete. This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
The Delete operation deletes vectors from the index, from a single namespace.
No error is raised if the vector id does not exist.
Note: For any delete call, if namespace is not specified, the default namespace ""
is used.
Since the delete operation does not error when ids are not present, this means you may not receive
an error if you delete from the wrong namespace.
Delete can occur in the following mutual exclusive ways:
- Delete by ids from a single namespace
- Delete all vectors from a single namespace by setting delete_all to True
- Delete all vectors from a single namespace by specifying a metadata filter (note that for this option delete all must be set to False)
API reference: https://docs.pinecone.io/reference/delete_post
import asyncio
from pinecone import Pinecone, Vector, SparseValues
async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
# Delete specific ids
await idx.delete(
ids=['id1', 'id2'],
namespace='my_namespace'
)
# Delete everything in a namespace
await idx.delete(
delete_all=True,
namespace='my_namespace'
)
# Delete by metadata filter
await idx.delete(
filter={'key': 'value'},
namespace='my_namespace'
)
asyncio.run(main())
Returns: An empty dictionary if the delete operation was successful.
246 @abstractmethod 247 async def fetch( 248 self, ids: List[str], namespace: Optional[str] = None, **kwargs 249 ) -> FetchResponse: 250 """ 251 The fetch operation looks up and returns vectors, by ID, from a single namespace. 252 The returned vectors include the vector data and/or metadata. 253 254 API reference: https://docs.pinecone.io/reference/fetch 255 256 ``` 257 import asyncio 258 from pinecone import Pinecone, Vector, SparseValues 259 260 async def main(): 261 pc = Pinecone() 262 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 263 # Fetch specific ids in namespace 264 fetched = await idx.fetch( 265 ids=['id1', 'id2'], 266 namespace='my_namespace' 267 ) 268 for vec_id in fetched.vectors: 269 vector = fetched.vectors[vec_id] 270 print(vector.id) 271 print(vector.metadata) 272 print(vector.values) 273 274 asyncio.run(main()) 275 ``` 276 277 Args: 278 ids (List[str]): The vector IDs to fetch. 279 namespace (str): The namespace to fetch vectors from. 280 If not specified, the default namespace is used. [optional] 281 282 Returns: FetchResponse object which contains the list of Vector objects, and namespace name. 283 """ 284 pass
The fetch operation looks up and returns vectors, by ID, from a single namespace. The returned vectors include the vector data and/or metadata.
API reference: https://docs.pinecone.io/reference/fetch
import asyncio
from pinecone import Pinecone, Vector, SparseValues
async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
# Fetch specific ids in namespace
fetched = await idx.fetch(
ids=['id1', 'id2'],
namespace='my_namespace'
)
for vec_id in fetched.vectors:
vector = fetched.vectors[vec_id]
print(vector.id)
print(vector.metadata)
print(vector.values)
asyncio.run(main())
Arguments:
- ids (List[str]): The vector IDs to fetch.
- namespace (str): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
Returns: FetchResponse object which contains the list of Vector objects, and namespace name.
286 @abstractmethod 287 async def query( 288 self, 289 *args, 290 top_k: int, 291 vector: Optional[List[float]] = None, 292 id: Optional[str] = None, 293 namespace: Optional[str] = None, 294 filter: Optional[FilterTypedDict] = None, 295 include_values: Optional[bool] = None, 296 include_metadata: Optional[bool] = None, 297 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 298 **kwargs, 299 ) -> QueryResponse: 300 """ 301 The Query operation searches a namespace, using a query vector. 302 It retrieves the ids of the most similar items in a namespace, along with their similarity scores. 303 304 API reference: https://docs.pinecone.io/reference/query 305 306 ## Querying with dense vectors 307 308 ```python 309 import asyncio 310 from pinecone import Pinecone, Vector, SparseValues 311 312 async def main(): 313 pc = Pinecone() 314 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 315 query_embedding = [0.1, 0.2, 0.3, ...] # An embedding that matches the index dimension 316 317 # Query by vector values 318 results = await idx.query( 319 vector=query_embedding, 320 top_k=10, 321 filter={'genre': {"$eq": "drama"}}, # Optionally filter by metadata 322 namespace='my_namespace', 323 include_values=False, 324 include_metadata=True 325 ) 326 327 # Query using vector id (the values from this stored vector will be used to query) 328 results = await idx.query( 329 id='1', 330 top_k=10, 331 filter={"year": {"$gt": 2000}}, 332 namespace='my_namespace', 333 ) 334 335 asyncio.run(main()) 336 ``` 337 338 ## Query with sparse vectors 339 340 ```python 341 import asyncio 342 from pinecone import Pinecone, Vector, SparseValues 343 344 async def main(): 345 pc = Pinecone() 346 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 347 query_embedding = [0.1, 0.2, 0.3, ...] # An embedding that matches the index dimension 348 349 # Query by vector values 350 results = await idx.query( 351 vector=query_embedding, 352 top_k=10, 353 filter={'genre': {"$eq": "drama"}}, # Optionally filter by metadata 354 namespace='my_namespace', 355 include_values=False, 356 include_metadata=True 357 ) 358 359 # Query using vector id (the values from this stored vector will be used to query) 360 results = await idx.query( 361 id='1', 362 top_k=10, 363 filter={"year": {"$gt": 2000}}, 364 namespace='my_namespace', 365 ) 366 367 asyncio.run(main()) 368 ``` 369 370 Examples: 371 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') 372 >>> index.query(id='id1', top_k=10, namespace='my_namespace') 373 >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) 374 >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) 375 >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, 376 >>> top_k=10, namespace='my_namespace') 377 >>> index.query(vector=[1, 2, 3], sparse_vector=SparseValues([1, 2], [0.2, 0.4]), 378 >>> top_k=10, namespace='my_namespace') 379 380 Args: 381 vector (List[float]): The query vector. This should be the same length as the dimension of the index 382 being queried. Each `query()` request can contain only one of the parameters 383 `id` or `vector`.. [optional] 384 id (str): The unique ID of the vector to be used as a query vector. 385 Each `query()` request can contain only one of the parameters 386 `vector` or `id`. [optional] 387 top_k (int): The number of results to return for each query. Must be an integer greater than 1. 388 namespace (str): The namespace to fetch vectors from. 389 If not specified, the default namespace is used. [optional] 390 filter (Dict[str, Union[str, float, int, bool, List, dict]): 391 The filter to apply. You can use vector metadata to limit your search. 392 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 393 include_values (bool): Indicates whether vector values are included in the response. 394 If omitted the server will use the default value of False [optional] 395 include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. 396 If omitted the server will use the default value of False [optional] 397 sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. 398 Expected to be either a SparseValues object or a dict of the form: 399 {'indices': List[int], 'values': List[float]}, where the lists each have the same length. 400 401 Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, 402 and namespace name. 403 """ 404 pass
The Query operation searches a namespace, using a query vector. It retrieves the ids of the most similar items in a namespace, along with their similarity scores.
API reference: https://docs.pinecone.io/reference/query
Querying with dense vectors
import asyncio
from pinecone import Pinecone, Vector, SparseValues
async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
query_embedding = [0.1, 0.2, 0.3, ...] # An embedding that matches the index dimension
# Query by vector values
results = await idx.query(
vector=query_embedding,
top_k=10,
filter={'genre': {"$eq": "drama"}}, # Optionally filter by metadata
namespace='my_namespace',
include_values=False,
include_metadata=True
)
# Query using vector id (the values from this stored vector will be used to query)
results = await idx.query(
id='1',
top_k=10,
filter={"year": {"$gt": 2000}},
namespace='my_namespace',
)
asyncio.run(main())
Query with sparse vectors
import asyncio
from pinecone import Pinecone, Vector, SparseValues
async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
query_embedding = [0.1, 0.2, 0.3, ...] # An embedding that matches the index dimension
# Query by vector values
results = await idx.query(
vector=query_embedding,
top_k=10,
filter={'genre': {"$eq": "drama"}}, # Optionally filter by metadata
namespace='my_namespace',
include_values=False,
include_metadata=True
)
# Query using vector id (the values from this stored vector will be used to query)
results = await idx.query(
id='1',
top_k=10,
filter={"year": {"$gt": 2000}},
namespace='my_namespace',
)
asyncio.run(main())
Examples:
>>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') >>> index.query(id='id1', top_k=10, namespace='my_namespace') >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, >>> top_k=10, namespace='my_namespace') >>> index.query(vector=[1, 2, 3], sparse_vector=SparseValues([1, 2], [0.2, 0.4]), >>> top_k=10, namespace='my_namespace')
Arguments:
- vector (List[float]): The query vector. This should be the same length as the dimension of the index
being queried. Each
query()
request can contain only one of the parametersid
orvector
.. [optional] - id (str): The unique ID of the vector to be used as a query vector.
Each
query()
request can contain only one of the parametersvector
orid
. [optional] - top_k (int): The number of results to return for each query. Must be an integer greater than 1.
- namespace (str): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
- filter (Dict[str, Union[str, float, int, bool, List, dict]): The filter to apply. You can use vector metadata to limit your search. See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
- include_values (bool): Indicates whether vector values are included in the response. If omitted the server will use the default value of False [optional]
- include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. If omitted the server will use the default value of False [optional]
- sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. Expected to be either a SparseValues object or a dict of the form: {'indices': List[int], 'values': List[float]}, where the lists each have the same length.
Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, and namespace name.
406 @abstractmethod 407 async def query_namespaces( 408 self, 409 namespaces: List[str], 410 top_k: Optional[int] = None, 411 filter: Optional[FilterTypedDict] = None, 412 include_values: Optional[bool] = None, 413 include_metadata: Optional[bool] = None, 414 vector: Optional[List[float]] = None, 415 sparse_vector: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 416 **kwargs, 417 ) -> QueryNamespacesResults: 418 """The query_namespaces() method is used to make a query to multiple namespaces in parallel and combine the results into one result set. 419 420 Args: 421 vector (List[float]): The query vector, must be the same length as the dimension of the index being queried. 422 namespaces (List[str]): The list of namespaces to query. 423 top_k (Optional[int], optional): The number of results you would like to request from each namespace. Defaults to 10. 424 filter (Optional[Dict[str, Union[str, float, int, bool, List, dict]]], optional): Pass an optional filter to filter results based on metadata. Defaults to None. 425 include_values (Optional[bool], optional): Boolean field indicating whether vector values should be included with results. Defaults to None. 426 include_metadata (Optional[bool], optional): Boolean field indicating whether vector metadata should be included with results. Defaults to None. 427 sparse_vector (Optional[ Union[SparseValues, Dict[str, Union[List[float], List[int]]]] ], optional): If you are working with a dotproduct index, you can pass a sparse vector as part of your hybrid search. Defaults to None. 428 429 Returns: 430 QueryNamespacesResults: A QueryNamespacesResults object containing the combined results from all namespaces, as well as the combined usage cost in read units. 431 432 Examples: 433 434 ```python 435 import asyncio 436 from pinecone import Pinecone 437 438 async def main(): 439 pc = Pinecone(api_key="your-api-key") 440 idx = pc.IndexAsyncio( 441 host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io", 442 ) 443 444 query_vec = [0.1, 0.2, 0.3] # An embedding that matches the index dimension 445 combined_results = await idx.query_namespaces( 446 vector=query_vec, 447 namespaces=['ns1', 'ns2', 'ns3', 'ns4'], 448 top_k=10, 449 filter={'genre': {"$eq": "drama"}}, 450 include_values=True, 451 include_metadata=True 452 ) 453 for vec in combined_results.matches: 454 print(vec.id, vec.score) 455 print(combined_results.usage) 456 457 await idx.close() 458 459 asyncio.run(main()) 460 ``` 461 """ 462 pass
The query_namespaces() method is used to make a query to multiple namespaces in parallel and combine the results into one result set.
Arguments:
- vector (List[float]): The query vector, must be the same length as the dimension of the index being queried.
- namespaces (List[str]): The list of namespaces to query.
- top_k (Optional[int], optional): The number of results you would like to request from each namespace. Defaults to 10.
- filter (Optional[Dict[str, Union[str, float, int, bool, List, dict]]], optional): Pass an optional filter to filter results based on metadata. Defaults to None.
- include_values (Optional[bool], optional): Boolean field indicating whether vector values should be included with results. Defaults to None.
- include_metadata (Optional[bool], optional): Boolean field indicating whether vector metadata should be included with results. Defaults to None.
- sparse_vector (Optional[ Union[SparseValues, Dict[str, Union[List[float], List[int]]]] ], optional): If you are working with a dotproduct index, you can pass a sparse vector as part of your hybrid search. Defaults to None.
Returns:
QueryNamespacesResults: A QueryNamespacesResults object containing the combined results from all namespaces, as well as the combined usage cost in read units.
Examples:
import asyncio
from pinecone import Pinecone
async def main():
pc = Pinecone(api_key="your-api-key")
idx = pc.IndexAsyncio(
host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io",
)
query_vec = [0.1, 0.2, 0.3] # An embedding that matches the index dimension
combined_results = await idx.query_namespaces(
vector=query_vec,
namespaces=['ns1', 'ns2', 'ns3', 'ns4'],
top_k=10,
filter={'genre': {"$eq": "drama"}},
include_values=True,
include_metadata=True
)
for vec in combined_results.matches:
print(vec.id, vec.score)
print(combined_results.usage)
await idx.close()
asyncio.run(main())
464 @abstractmethod 465 async def update( 466 self, 467 id: str, 468 values: Optional[List[float]] = None, 469 set_metadata: Optional[VectorMetadataTypedDict] = None, 470 namespace: Optional[str] = None, 471 sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, 472 **kwargs, 473 ) -> Dict[str, Any]: 474 """ 475 The Update operation updates vector in a namespace. 476 477 Args: 478 id (str): Vector's unique id. 479 values (List[float]): vector values to set. [optional] 480 set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): 481 metadata to set for vector. [optional] 482 namespace (str): Namespace name where to update the vector.. [optional] 483 sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. 484 Expected to be either a SparseValues object or a dict of the form: 485 {'indices': List[int], 'values': List[float]} where the lists each have the same length. 486 487 If a value is included, it will overwrite the previous value. 488 If a set_metadata is included, 489 the values of the fields specified in it will be added or overwrite the previous value. 490 491 API reference: https://docs.pinecone.io/reference/update 492 493 Examples: 494 ```python 495 import asyncio 496 from pinecone import Pinecone, Vector, SparseValues 497 498 async def main(): 499 pc = Pinecone() 500 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 501 # Update vector values 502 await idx.update( 503 id='id1', 504 values=[0.1, 0.2, 0.3, ...], 505 namespace='my_namespace' 506 ) 507 508 # Update metadata 509 await idx.update( 510 id='id1', 511 set_metadata={'key': 'value'}, 512 namespace='my_namespace' 513 ) 514 515 # Update sparse values 516 await idx.update( 517 id='id1', 518 sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, 519 namespace='my_namespace' 520 ) 521 522 # Update sparse values with SparseValues object 523 await idx.update( 524 id='id1', 525 sparse_values=SparseValues(indices=[234781, 5432], values=[0.2, 0.4]), 526 namespace='my_namespace' 527 ) 528 529 asyncio.run(main()) 530 ``` 531 532 """ 533 pass
The Update operation updates vector in a namespace.
Arguments:
- id (str): Vector's unique id.
- values (List[float]): vector values to set. [optional]
- set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): metadata to set for vector. [optional]
- namespace (str): Namespace name where to update the vector.. [optional]
- sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. Expected to be either a SparseValues object or a dict of the form: {'indices': List[int], 'values': List[float]} where the lists each have the same length.
If a value is included, it will overwrite the previous value. If a set_metadata is included, the values of the fields specified in it will be added or overwrite the previous value.
API reference: https://docs.pinecone.io/reference/update
Examples:
import asyncio
from pinecone import Pinecone, Vector, SparseValues
async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
# Update vector values
await idx.update(
id='id1',
values=[0.1, 0.2, 0.3, ...],
namespace='my_namespace'
)
# Update metadata
await idx.update(
id='id1',
set_metadata={'key': 'value'},
namespace='my_namespace'
)
# Update sparse values
await idx.update(
id='id1',
sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
namespace='my_namespace'
)
# Update sparse values with SparseValues object
await idx.update(
id='id1',
sparse_values=SparseValues(indices=[234781, 5432], values=[0.2, 0.4]),
namespace='my_namespace'
)
asyncio.run(main())
535 @abstractmethod 536 async def describe_index_stats( 537 self, filter: Optional[FilterTypedDict] = None, **kwargs 538 ) -> DescribeIndexStatsResponse: 539 """ 540 The DescribeIndexStats operation returns statistics about the index's contents. 541 For example: The vector count per namespace and the number of dimensions. 542 543 API reference: https://docs.pinecone.io/reference/describe_index_stats_post 544 545 Args: 546 filter (Dict[str, Union[str, float, int, bool, List, dict]]): 547 If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. 548 See https://www.pinecone.io/docs/metadata-filtering/.. [optional] 549 550 Returns: DescribeIndexStatsResponse object which contains stats about the index. 551 552 ```python 553 import asyncio 554 from pinecone import Pinecone, Vector, SparseValues 555 556 async def main(): 557 pc = Pinecone() 558 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 559 print(await idx.describe_index_stats()) 560 561 asyncio.run(main()) 562 ``` 563 """ 564 pass
The DescribeIndexStats operation returns statistics about the index's contents. For example: The vector count per namespace and the number of dimensions.
API reference: https://docs.pinecone.io/reference/describe_index_stats_post
Arguments:
- filter (Dict[str, Union[str, float, int, bool, List, dict]]):
- If this parameter is present, the operation only returns statistics for vectors that satisfy the filter.
- See https: //www.pinecone.io/docs/metadata-filtering/.. [optional]
Returns: DescribeIndexStatsResponse object which contains stats about the index.
import asyncio
from pinecone import Pinecone, Vector, SparseValues
async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
print(await idx.describe_index_stats())
asyncio.run(main())
566 @abstractmethod 567 async def list_paginated( 568 self, 569 prefix: Optional[str] = None, 570 limit: Optional[int] = None, 571 pagination_token: Optional[str] = None, 572 namespace: Optional[str] = None, 573 **kwargs, 574 ) -> ListResponse: 575 """ 576 The list_paginated operation finds vectors based on an id prefix within a single namespace. 577 It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. 578 This id list can then be passed to fetch or delete operations, depending on your use case. 579 580 Consider using the `list` method to avoid having to handle pagination tokens manually. 581 582 Examples: 583 >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') 584 >>> [v.id for v in results.vectors] 585 ['99', '990', '991', '992', '993'] 586 >>> results.pagination.next 587 eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 588 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next) 589 590 Args: 591 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 592 be used with the effect of listing all ids in a namespace [optional] 593 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 594 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 595 in the response if additional results are available. [optional] 596 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 597 598 Returns: ListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed. 599 """ 600 pass
The list_paginated operation finds vectors based on an id prefix within a single namespace. It returns matching ids in a paginated form, with a pagination token to fetch the next page of results. This id list can then be passed to fetch or delete operations, depending on your use case.
Consider using the list
method to avoid having to handle pagination tokens manually.
Examples:
>>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace') >>> [v.id for v in results.vectors] ['99', '990', '991', '992', '993'] >>> results.pagination.next eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 >>> next_results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace', pagination_token=results.pagination.next)
Arguments:
- prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will be used with the effect of listing all ids in a namespace [optional]
- limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
- pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned in the response if additional results are available. [optional]
- namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
Returns: ListResponse object which contains the list of ids, the namespace name, pagination information, and usage showing the number of read_units consumed.
602 @abstractmethod 603 async def list(self, **kwargs): 604 """ 605 The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields 606 a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your 607 behalf. 608 609 Examples: 610 >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): 611 >>> print(ids) 612 ['99', '990', '991', '992', '993'] 613 ['994', '995', '996', '997', '998'] 614 ['999'] 615 616 Args: 617 prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will 618 be used with the effect of listing all ids in a namespace [optional] 619 limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] 620 pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned 621 in the response if additional results are available. [optional] 622 namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional] 623 """ 624 pass
The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your behalf.
Examples:
>>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'): >>> print(ids) ['99', '990', '991', '992', '993'] ['994', '995', '996', '997', '998'] ['999']
Arguments:
- prefix (Optional[str]): The id prefix to match. If unspecified, an empty string prefix will be used with the effect of listing all ids in a namespace [optional]
- limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
- pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned in the response if additional results are available. [optional]
- namespace (Optional[str]): The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
626 @abstractmethod 627 async def upsert_records(self, namespace: str, records: List[Dict]): 628 """ 629 :param namespace: The namespace of the index to upsert records to. 630 :type namespace: str, required 631 :param records: The records to upsert into the index. 632 :type records: List[Dict], required 633 634 Upsert records to a namespace. A record is a dictionary that contains eitiher an `id` or `_id` 635 field along with other fields that will be stored as metadata. The `id` or `_id` field is used 636 as the unique identifier for the record. At least one field in the record should correspond to 637 a field mapping in the index's embed configuration. 638 639 When records are upserted, Pinecone converts mapped fields into embeddings and upserts them into 640 the specified namespacce of the index. 641 642 ```python 643 import asyncio 644 from pinecone import ( 645 Pinecone, 646 CloudProvider, 647 AwsRegion, 648 EmbedModel 649 IndexEmbed 650 ) 651 652 async def main(): 653 pc = Pinecone() 654 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 655 # upsert records 656 await idx.upsert_records( 657 namespace="my-namespace", 658 records=[ 659 { 660 "_id": "test1", 661 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 662 }, 663 { 664 "_id": "test2", 665 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 666 }, 667 { 668 "_id": "test3", 669 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 670 }, 671 { 672 "_id": "test4", 673 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 674 }, 675 { 676 "_id": "test5", 677 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 678 }, 679 { 680 "_id": "test6", 681 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 682 }, 683 ], 684 ) 685 686 from pinecone import SearchQuery, SearchRerank, RerankModel 687 688 # search for similar records 689 response = await idx.search_records( 690 namespace="my-namespace", 691 query=SearchQuery( 692 inputs={ 693 "text": "Apple corporation", 694 }, 695 top_k=3, 696 ), 697 rerank=SearchRerank( 698 model=RerankModel.Bge_Reranker_V2_M3, 699 rank_fields=["my_text_field"], 700 top_n=3, 701 ), 702 ) 703 704 asyncio.run(main()) 705 ``` 706 """ 707 pass
Parameters
- namespace: The namespace of the index to upsert records to.
- records: The records to upsert into the index.
Upsert records to a namespace. A record is a dictionary that contains eitiher an id
or _id
field along with other fields that will be stored as metadata. The id
or _id
field is used
as the unique identifier for the record. At least one field in the record should correspond to
a field mapping in the index's embed configuration.
When records are upserted, Pinecone converts mapped fields into embeddings and upserts them into the specified namespacce of the index.
import asyncio
from pinecone import (
Pinecone,
CloudProvider,
AwsRegion,
EmbedModel
IndexEmbed
)
async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
# upsert records
await idx.upsert_records(
namespace="my-namespace",
records=[
{
"_id": "test1",
"my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.",
},
{
"_id": "test2",
"my_text_field": "The tech company Apple is known for its innovative products like the iPhone.",
},
{
"_id": "test3",
"my_text_field": "Many people enjoy eating apples as a healthy snack.",
},
{
"_id": "test4",
"my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
},
{
"_id": "test5",
"my_text_field": "An apple a day keeps the doctor away, as the saying goes.",
},
{
"_id": "test6",
"my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.",
},
],
)
from pinecone import SearchQuery, SearchRerank, RerankModel
# search for similar records
response = await idx.search_records(
namespace="my-namespace",
query=SearchQuery(
inputs={
"text": "Apple corporation",
},
top_k=3,
),
rerank=SearchRerank(
model=RerankModel.Bge_Reranker_V2_M3,
rank_fields=["my_text_field"],
top_n=3,
),
)
asyncio.run(main())
709 @abstractmethod 710 async def search( 711 self, 712 namespace: str, 713 query: Union[SearchQueryTypedDict, SearchQuery], 714 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 715 fields: Optional[List[str]] = ["*"], # Default to returning all fields 716 ) -> SearchRecordsResponse: 717 """ 718 :param namespace: The namespace in the index to search. 719 :type namespace: str, required 720 :param query: The SearchQuery to use for the search. 721 :type query: Union[Dict, SearchQuery], required 722 :param rerank: The SearchRerank to use with the search request. 723 :type rerank: Union[Dict, SearchRerank], optional 724 :return: The records that match the search. 725 726 Search for records. 727 728 This operation converts a query to a vector embedding and then searches a namespace. You 729 can optionally provide a reranking operation as part of the search. 730 731 ```python 732 import asyncio 733 from pinecone import ( 734 Pinecone, 735 CloudProvider, 736 AwsRegion, 737 EmbedModel 738 IndexEmbed 739 ) 740 741 async def main(): 742 pc = Pinecone() 743 async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: 744 # upsert records 745 await idx.upsert_records( 746 namespace="my-namespace", 747 records=[ 748 { 749 "_id": "test1", 750 "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", 751 }, 752 { 753 "_id": "test2", 754 "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", 755 }, 756 { 757 "_id": "test3", 758 "my_text_field": "Many people enjoy eating apples as a healthy snack.", 759 }, 760 { 761 "_id": "test4", 762 "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", 763 }, 764 { 765 "_id": "test5", 766 "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", 767 }, 768 { 769 "_id": "test6", 770 "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", 771 }, 772 ], 773 ) 774 775 from pinecone import SearchQuery, SearchRerank, RerankModel 776 777 # search for similar records 778 response = await idx.search_records( 779 namespace="my-namespace", 780 query=SearchQuery( 781 inputs={ 782 "text": "Apple corporation", 783 }, 784 top_k=3, 785 ), 786 rerank=SearchRerank( 787 model=RerankModel.Bge_Reranker_V2_M3, 788 rank_fields=["my_text_field"], 789 top_n=3, 790 ), 791 ) 792 793 asyncio.run(main()) 794 ``` 795 796 """ 797 pass
Parameters
- namespace: The namespace in the index to search.
- query: The SearchQuery to use for the search.
- rerank: The SearchRerank to use with the search request.
Returns
The records that match the search.
Search for records.
This operation converts a query to a vector embedding and then searches a namespace. You can optionally provide a reranking operation as part of the search.
import asyncio
from pinecone import (
Pinecone,
CloudProvider,
AwsRegion,
EmbedModel
IndexEmbed
)
async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
# upsert records
await idx.upsert_records(
namespace="my-namespace",
records=[
{
"_id": "test1",
"my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.",
},
{
"_id": "test2",
"my_text_field": "The tech company Apple is known for its innovative products like the iPhone.",
},
{
"_id": "test3",
"my_text_field": "Many people enjoy eating apples as a healthy snack.",
},
{
"_id": "test4",
"my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
},
{
"_id": "test5",
"my_text_field": "An apple a day keeps the doctor away, as the saying goes.",
},
{
"_id": "test6",
"my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.",
},
],
)
from pinecone import SearchQuery, SearchRerank, RerankModel
# search for similar records
response = await idx.search_records(
namespace="my-namespace",
query=SearchQuery(
inputs={
"text": "Apple corporation",
},
top_k=3,
),
rerank=SearchRerank(
model=RerankModel.Bge_Reranker_V2_M3,
rank_fields=["my_text_field"],
top_n=3,
),
)
asyncio.run(main())
799 @abstractmethod 800 async def search_records( 801 self, 802 namespace: str, 803 query: Union[SearchQueryTypedDict, SearchQuery], 804 rerank: Optional[Union[SearchRerankTypedDict, SearchRerank]] = None, 805 fields: Optional[List[str]] = ["*"], # Default to returning all fields 806 ) -> SearchRecordsResponse: 807 """Alias of the search() method.""" 808 pass
Alias of the search() method.