From e41b382e1c4ed24e25eb07f196370cb2f201d740 Mon Sep 17 00:00:00 2001 From: 0xcha05 <103983696+0xcha05@users.noreply.github.com> Date: Mon, 3 Jul 2023 00:16:19 +0530 Subject: [PATCH] Added filter and delete all option to delete function in Pinecone integration, updated base VectorStore's delete function (#6876) ### Description: Updated the delete function in the Pinecone integration to allow for deletion of vectors by specifying a filter condition, and to delete all vectors in a namespace. Made the ids parameter optional in the delete function in the base VectorStore class and allowed for additional keyword arguments. Updated the delete function in several classes (Redis, Chroma, Supabase, Deeplake, Elastic, Weaviate, and Cassandra) to match the changes made in the base VectorStore class. This involved making the ids parameter optional and allowing for additional keyword arguments. --- langchain/vectorstores/base.py | 9 +++--- langchain/vectorstores/cassandra.py | 9 ++++-- langchain/vectorstores/chroma.py | 2 +- langchain/vectorstores/deeplake.py | 23 +++++--------- .../vectorstores/elastic_vector_search.py | 5 ++- langchain/vectorstores/pinecone.py | 31 ++++++++++++++----- langchain/vectorstores/redis.py | 2 +- langchain/vectorstores/supabase.py | 6 +++- langchain/vectorstores/weaviate.py | 5 ++- 9 files changed, 58 insertions(+), 34 deletions(-) diff --git a/langchain/vectorstores/base.py b/langchain/vectorstores/base.py index 1e574af5d..0328a3bcb 100644 --- a/langchain/vectorstores/base.py +++ b/langchain/vectorstores/base.py @@ -53,20 +53,19 @@ class VectorStore(ABC): List of ids from adding the texts into the vectorstore. """ - def delete(self, ids: List[str]) -> Optional[bool]: - """Delete by vector ID. + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + """Delete by vector ID or other criteria. Args: ids: List of ids to delete. + **kwargs: Other keyword arguments that subclasses might use. Returns: Optional[bool]: True if deletion is successful, False otherwise, None if not implemented. """ - raise NotImplementedError( - "delete_by_id method must be implemented by subclass." - ) + raise NotImplementedError("delete method must be implemented by subclass.") async def aadd_texts( self, diff --git a/langchain/vectorstores/cassandra.py b/langchain/vectorstores/cassandra.py index 68bbf2850..6b752c76a 100644 --- a/langchain/vectorstores/cassandra.py +++ b/langchain/vectorstores/cassandra.py @@ -91,8 +91,9 @@ class Cassandra(VectorStore): def delete_by_document_id(self, document_id: str) -> None: return self.table.delete(document_id) - def delete(self, ids: List[str]) -> Optional[bool]: - """Delete by vector ID. + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + """Delete by vector IDs. + Args: ids: List of ids to delete. @@ -101,6 +102,10 @@ class Cassandra(VectorStore): Optional[bool]: True if deletion is successful, False otherwise, None if not implemented. """ + + if ids is None: + raise ValueError("No ids provided to delete.") + for document_id in ids: self.delete_by_document_id(document_id) return True diff --git a/langchain/vectorstores/chroma.py b/langchain/vectorstores/chroma.py index 394a6026f..6ca60def7 100644 --- a/langchain/vectorstores/chroma.py +++ b/langchain/vectorstores/chroma.py @@ -470,7 +470,7 @@ class Chroma(VectorStore): client=client, ) - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: diff --git a/langchain/vectorstores/deeplake.py b/langchain/vectorstores/deeplake.py index 952100592..5200898cb 100644 --- a/langchain/vectorstores/deeplake.py +++ b/langchain/vectorstores/deeplake.py @@ -744,30 +744,23 @@ class DeepLake(VectorStore): ) return deeplake_dataset - def delete( - self, - ids: Any[List[str], None] = None, - filter: Any[Dict[str, str], None] = None, - delete_all: Any[bool, None] = None, - ) -> bool: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> bool: """Delete the entities in the dataset. Args: ids (Optional[List[str]], optional): The document_ids to delete. Defaults to None. - filter (Optional[Dict[str, str]], optional): The filter to delete by. - Defaults to None. - delete_all (Optional[bool], optional): Whether to drop the dataset. - Defaults to None. + **kwargs: Other keyword arguments that subclasses might use. + - filter (Optional[Dict[str, str]], optional): The filter to delete by. + - delete_all (Optional[bool], optional): Whether to drop the dataset. Returns: bool: Whether the delete operation was successful. """ - self.vectorstore.delete( - ids=ids, - filter=filter, - delete_all=delete_all, - ) + filter = kwargs.get("filter") + delete_all = kwargs.get("delete_all") + + self.vectorstore.delete(ids=ids, filter=filter, delete_all=delete_all) return True diff --git a/langchain/vectorstores/elastic_vector_search.py b/langchain/vectorstores/elastic_vector_search.py index 8d453538f..ac38d37c2 100644 --- a/langchain/vectorstores/elastic_vector_search.py +++ b/langchain/vectorstores/elastic_vector_search.py @@ -317,13 +317,16 @@ class ElasticVectorSearch(VectorStore, ABC): ) return response - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: ids: List of ids to delete. """ + if ids is None: + raise ValueError("No ids provided to delete.") + # TODO: Check if this can be done in bulk for id in ids: self.client.delete(index=self.index_name, id=id) diff --git a/langchain/vectorstores/pinecone.py b/langchain/vectorstores/pinecone.py index 3d4e12c6b..552fa2ef1 100644 --- a/langchain/vectorstores/pinecone.py +++ b/langchain/vectorstores/pinecone.py @@ -354,16 +354,33 @@ class Pinecone(VectorStore): pinecone.Index(index_name), embedding.embed_query, text_key, namespace ) - def delete(self, ids: List[str], namespace: Optional[str] = None) -> None: - """Delete by vector IDs. + def delete( + self, + ids: Optional[List[str]] = None, + delete_all: Optional[bool] = None, + namespace: Optional[str] = None, + filter: Optional[dict] = None, + **kwargs: Any, + ) -> None: + """Delete by vector IDs or filter. Args: ids: List of ids to delete. + filter: Dictionary of conditions to filter vectors to delete. """ - # This is the maximum number of IDs that can be deleted if namespace is None: namespace = self._namespace - chunk_size = 1000 - for i in range(0, len(ids), chunk_size): - chunk = ids[i : i + chunk_size] - self._index.delete(ids=chunk, namespace=namespace) + + if delete_all: + self._index.delete(delete_all=True, namespace=namespace, **kwargs) + elif ids is not None: + chunk_size = 1000 + for i in range(0, len(ids), chunk_size): + chunk = ids[i : i + chunk_size] + self._index.delete(ids=chunk, namespace=namespace, **kwargs) + elif filter is not None: + self._index.delete(filter=filter, namespace=namespace, **kwargs) + else: + raise ValueError("Either ids, delete_all, or filter must be provided.") + + return None diff --git a/langchain/vectorstores/redis.py b/langchain/vectorstores/redis.py index 31c95e1f9..10bda5123 100644 --- a/langchain/vectorstores/redis.py +++ b/langchain/vectorstores/redis.py @@ -469,7 +469,7 @@ class Redis(VectorStore): @staticmethod def delete( - ids: List[str], + ids: Optional[List[str]] = None, **kwargs: Any, ) -> bool: """ diff --git a/langchain/vectorstores/supabase.py b/langchain/vectorstores/supabase.py index 94a2c199c..063bcd376 100644 --- a/langchain/vectorstores/supabase.py +++ b/langchain/vectorstores/supabase.py @@ -346,12 +346,16 @@ class SupabaseVectorStore(VectorStore): ) return docs - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: ids: List of ids to delete. """ + + if ids is None: + raise ValueError("No ids provided to delete.") + rows: List[dict[str, Any]] = [ { "id": id, diff --git a/langchain/vectorstores/weaviate.py b/langchain/vectorstores/weaviate.py index 10d14f3d0..f623082e4 100644 --- a/langchain/vectorstores/weaviate.py +++ b/langchain/vectorstores/weaviate.py @@ -470,13 +470,16 @@ class Weaviate(VectorStore): by_text=by_text, ) - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: ids: List of ids to delete. """ + if ids is None: + raise ValueError("No ids provided to delete.") + # TODO: Check if this can be done in bulk for id in ids: self._client.data_object.delete(uuid=id)