mirror of
https://github.com/kennethreitz/langchain.git
synced 2026-06-05 23:00:18 +00:00
Harrison/move vectorstore base (#11030)
This commit is contained in:
@@ -36,7 +36,7 @@
|
||||
"from langchain.chains import LLMChain\nfrom langchain.llms import OpenAI\nfrom langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.llms import BaseLLM\n",
|
||||
"from langchain.vectorstores.base import VectorStore\n",
|
||||
"from langchain.schema.vectorstore import VectorStore\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"from langchain.chains.base import Chain\n",
|
||||
"from langchain_experimental.autonomous_agents import BabyAGI"
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
"from langchain.chains import LLMChain\nfrom langchain.llms import OpenAI\nfrom langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.llms import BaseLLM\n",
|
||||
"from langchain.vectorstores.base import VectorStore\n",
|
||||
"from langchain.schema.vectorstore import VectorStore\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"from langchain.chains.base import Chain\n",
|
||||
"from langchain_experimental.autonomous_agents import BabyAGI"
|
||||
|
||||
@@ -10,9 +10,9 @@ from langchain.schema import (
|
||||
Document,
|
||||
)
|
||||
from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage
|
||||
from langchain.schema.vectorstore import VectorStoreRetriever
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.human.tool import HumanInputRun
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
|
||||
from langchain_experimental.autonomous_agents.autogpt.output_parser import (
|
||||
AutoGPTOutputParser,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from langchain.memory.chat_memory import BaseChatMemory, get_prompt_input_key
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
from langchain.schema.vectorstore import VectorStoreRetriever
|
||||
|
||||
from langchain_experimental.pydantic_v1 import Field
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@ from langchain.prompts.chat import (
|
||||
BaseChatPromptTemplate,
|
||||
)
|
||||
from langchain.schema.messages import BaseMessage, HumanMessage, SystemMessage
|
||||
from langchain.schema.vectorstore import VectorStoreRetriever
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
|
||||
from langchain_experimental.autonomous_agents.autogpt.prompt_generator import get_prompt
|
||||
from langchain_experimental.pydantic_v1 import BaseModel
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional
|
||||
from langchain.callbacks.manager import CallbackManagerForChainRun
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.schema.language_model import BaseLanguageModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
from langchain_experimental.autonomous_agents.baby_agi.task_creation import (
|
||||
TaskCreationChain,
|
||||
|
||||
@@ -5,12 +5,12 @@ from langchain.agents.agent_toolkits.base import BaseToolkit
|
||||
from langchain.llms.openai import OpenAI
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.schema.language_model import BaseLanguageModel
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.tools import BaseTool
|
||||
from langchain.tools.vectorstore.tool import (
|
||||
VectorStoreQATool,
|
||||
VectorStoreQAWithSourcesTool,
|
||||
)
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
|
||||
class VectorStoreInfo(BaseModel):
|
||||
|
||||
@@ -22,7 +22,7 @@ from langchain.pydantic_v1 import Extra, Field, root_validator
|
||||
from langchain.schema import BasePromptTemplate, BaseRetriever, Document
|
||||
from langchain.schema.language_model import BaseLanguageModel
|
||||
from langchain.schema.messages import BaseMessage
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
# Depending on the memory type and configuration, the chat history format may differ.
|
||||
# This needs to be consolidated.
|
||||
|
||||
@@ -11,7 +11,7 @@ from langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
||||
from langchain.chains.qa_with_sources.base import BaseQAWithSourcesChain
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.pydantic_v1 import Field, root_validator
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
|
||||
class VectorDBQAWithSourcesChain(BaseQAWithSourcesChain):
|
||||
|
||||
@@ -21,7 +21,7 @@ from langchain.prompts import PromptTemplate
|
||||
from langchain.pydantic_v1 import Extra, Field, root_validator
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
from langchain.schema.language_model import BaseLanguageModel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
|
||||
class BaseRetrievalQA(Chain):
|
||||
@@ -198,7 +198,7 @@ class RetrievalQA(BaseRetrievalQA):
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import RetrievalQA
|
||||
from langchain.faiss import FAISS
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
from langchain.schema.vectorstore import VectorStoreRetriever
|
||||
retriever = VectorStoreRetriever(vectorstore=FAISS(...))
|
||||
retrievalQA = RetrievalQA.from_llm(llm=OpenAI(), retriever=retriever)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ from langchain.chains.router.base import RouterChain
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.pydantic_v1 import Extra
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
|
||||
class EmbeddingRouterChain(RouterChain):
|
||||
|
||||
@@ -25,7 +25,7 @@ from langchain.document_loaders.base import BaseLoader
|
||||
from langchain.indexes.base import NAMESPACE_UUID, RecordManager
|
||||
from langchain.pydantic_v1 import root_validator
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@@ -9,8 +9,8 @@ from langchain.pydantic_v1 import BaseModel, Extra, Field
|
||||
from langchain.schema import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.language_model import BaseLanguageModel
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.chroma import Chroma
|
||||
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ from langchain.memory.chat_memory import BaseMemory
|
||||
from langchain.memory.utils import get_prompt_input_key
|
||||
from langchain.pydantic_v1 import Field
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
from langchain.schema.vectorstore import VectorStoreRetriever
|
||||
|
||||
|
||||
class VectorStoreRetrieverMemory(BaseMemory):
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import Any, Dict, List, Optional, Type
|
||||
from langchain.prompts.example_selector.base import BaseExampleSelector
|
||||
from langchain.pydantic_v1 import BaseModel, Extra
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
|
||||
def sorted_values(values: Dict[str, str]) -> List[Any]:
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
||||
from langchain.pydantic_v1 import Field
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
|
||||
def _get_hours_passed(time: datetime.datetime, ref_time: datetime.datetime) -> float:
|
||||
|
||||
@@ -16,9 +16,9 @@ from langchain.output_parsers.pydantic import PydanticOutputParser
|
||||
from langchain.prompts import BasePromptTemplate, PromptTemplate
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
|
||||
from langchain.utilities import GoogleSearchAPIWrapper
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -0,0 +1,611 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import math
|
||||
import warnings
|
||||
from abc import ABC, abstractmethod
|
||||
from functools import partial
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
ClassVar,
|
||||
Collection,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
TypeVar,
|
||||
)
|
||||
|
||||
from langchain.pydantic_v1 import Field, root_validator
|
||||
from langchain.schema import BaseRetriever
|
||||
from langchain.schema.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForRetrieverRun,
|
||||
CallbackManagerForRetrieverRun,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VST = TypeVar("VST", bound="VectorStore")
|
||||
|
||||
|
||||
class VectorStore(ABC):
|
||||
"""Interface for vector store."""
|
||||
|
||||
@abstractmethod
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embeddings and add to the vectorstore.
|
||||
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
kwargs: vectorstore specific parameters
|
||||
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Optional[Embeddings]:
|
||||
"""Access the query embedding object if available."""
|
||||
logger.debug(
|
||||
f"{Embeddings.__name__} is not implemented for {self.__class__.__name__}"
|
||||
)
|
||||
return None
|
||||
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
|
||||
"""Delete by vector ID or other criteria.
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
**kwargs: Other keyword arguments that subclasses might use.
|
||||
|
||||
Returns:
|
||||
Optional[bool]: True if deletion is successful,
|
||||
False otherwise, None if not implemented.
|
||||
"""
|
||||
|
||||
raise NotImplementedError("delete method must be implemented by subclass.")
|
||||
|
||||
async def aadd_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embeddings and add to the vectorstore."""
|
||||
raise NotImplementedError
|
||||
|
||||
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
|
||||
"""Run more documents through the embeddings and add to the vectorstore.
|
||||
|
||||
Args:
|
||||
documents (List[Document]: Documents to add to the vectorstore.
|
||||
|
||||
Returns:
|
||||
List[str]: List of IDs of the added texts.
|
||||
"""
|
||||
# TODO: Handle the case where the user doesn't provide ids on the Collection
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
return self.add_texts(texts, metadatas, **kwargs)
|
||||
|
||||
async def aadd_documents(
|
||||
self, documents: List[Document], **kwargs: Any
|
||||
) -> List[str]:
|
||||
"""Run more documents through the embeddings and add to the vectorstore.
|
||||
|
||||
Args:
|
||||
documents (List[Document]: Documents to add to the vectorstore.
|
||||
|
||||
Returns:
|
||||
List[str]: List of IDs of the added texts.
|
||||
"""
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
return await self.aadd_texts(texts, metadatas, **kwargs)
|
||||
|
||||
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
|
||||
"""Return docs most similar to query using specified search type."""
|
||||
if search_type == "similarity":
|
||||
return self.similarity_search(query, **kwargs)
|
||||
elif search_type == "mmr":
|
||||
return self.max_marginal_relevance_search(query, **kwargs)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"search_type of {search_type} not allowed. Expected "
|
||||
"search_type to be 'similarity' or 'mmr'."
|
||||
)
|
||||
|
||||
async def asearch(
|
||||
self, query: str, search_type: str, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to query using specified search type."""
|
||||
if search_type == "similarity":
|
||||
return await self.asimilarity_search(query, **kwargs)
|
||||
elif search_type == "mmr":
|
||||
return await self.amax_marginal_relevance_search(query, **kwargs)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"search_type of {search_type} not allowed. Expected "
|
||||
"search_type to be 'similarity' or 'mmr'."
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def similarity_search(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to query."""
|
||||
|
||||
@staticmethod
|
||||
def _euclidean_relevance_score_fn(distance: float) -> float:
|
||||
"""Return a similarity score on a scale [0, 1]."""
|
||||
# The 'correct' relevance function
|
||||
# may differ depending on a few things, including:
|
||||
# - the distance / similarity metric used by the VectorStore
|
||||
# - the scale of your embeddings (OpenAI's are unit normed. Many
|
||||
# others are not!)
|
||||
# - embedding dimensionality
|
||||
# - etc.
|
||||
# This function converts the euclidean norm of normalized embeddings
|
||||
# (0 is most similar, sqrt(2) most dissimilar)
|
||||
# to a similarity function (0 to 1)
|
||||
return 1.0 - distance / math.sqrt(2)
|
||||
|
||||
@staticmethod
|
||||
def _cosine_relevance_score_fn(distance: float) -> float:
|
||||
"""Normalize the distance to a score on a scale [0, 1]."""
|
||||
|
||||
return 1.0 - distance
|
||||
|
||||
@staticmethod
|
||||
def _max_inner_product_relevance_score_fn(distance: float) -> float:
|
||||
"""Normalize the distance to a score on a scale [0, 1]."""
|
||||
if distance > 0:
|
||||
return 1.0 - distance
|
||||
|
||||
return -1.0 * distance
|
||||
|
||||
def _select_relevance_score_fn(self) -> Callable[[float], float]:
|
||||
"""
|
||||
The 'correct' relevance function
|
||||
may differ depending on a few things, including:
|
||||
- the distance / similarity metric used by the VectorStore
|
||||
- the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
|
||||
- embedding dimensionality
|
||||
- etc.
|
||||
|
||||
Vectorstores should define their own selection based method of relevance.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def similarity_search_with_score(
|
||||
self, *args: Any, **kwargs: Any
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Run similarity search with distance."""
|
||||
raise NotImplementedError
|
||||
|
||||
def _similarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""
|
||||
Default similarity search with relevance scores. Modify if necessary
|
||||
in subclass.
|
||||
Return docs and relevance scores in the range [0, 1].
|
||||
|
||||
0 is dissimilar, 1 is most similar.
|
||||
|
||||
Args:
|
||||
query: input text
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||
filter the resulting set of retrieved docs
|
||||
|
||||
Returns:
|
||||
List of Tuples of (doc, similarity_score)
|
||||
"""
|
||||
relevance_score_fn = self._select_relevance_score_fn()
|
||||
docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
|
||||
return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores]
|
||||
|
||||
def similarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs and relevance scores in the range [0, 1].
|
||||
|
||||
0 is dissimilar, 1 is most similar.
|
||||
|
||||
Args:
|
||||
query: input text
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||
filter the resulting set of retrieved docs
|
||||
|
||||
Returns:
|
||||
List of Tuples of (doc, similarity_score)
|
||||
"""
|
||||
score_threshold = kwargs.pop("score_threshold", None)
|
||||
|
||||
docs_and_similarities = self._similarity_search_with_relevance_scores(
|
||||
query, k=k, **kwargs
|
||||
)
|
||||
if any(
|
||||
similarity < 0.0 or similarity > 1.0
|
||||
for _, similarity in docs_and_similarities
|
||||
):
|
||||
warnings.warn(
|
||||
"Relevance scores must be between"
|
||||
f" 0 and 1, got {docs_and_similarities}"
|
||||
)
|
||||
|
||||
if score_threshold is not None:
|
||||
docs_and_similarities = [
|
||||
(doc, similarity)
|
||||
for doc, similarity in docs_and_similarities
|
||||
if similarity >= score_threshold
|
||||
]
|
||||
if len(docs_and_similarities) == 0:
|
||||
warnings.warn(
|
||||
"No relevant docs were retrieved using the relevance score"
|
||||
f" threshold {score_threshold}"
|
||||
)
|
||||
return docs_and_similarities
|
||||
|
||||
async def asimilarity_search_with_relevance_scores(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs most similar to query."""
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(
|
||||
self.similarity_search_with_relevance_scores, query, k=k, **kwargs
|
||||
)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
async def asimilarity_search(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to query."""
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(self.similarity_search, query, k=k, **kwargs)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def similarity_search_by_vector(
|
||||
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to embedding vector.
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the query vector.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def asimilarity_search_by_vector(
|
||||
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to embedding vector."""
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def max_marginal_relevance_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
Returns:
|
||||
List of Documents selected by maximal marginal relevance.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def amax_marginal_relevance_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance."""
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(
|
||||
self.max_marginal_relevance_search,
|
||||
query,
|
||||
k=k,
|
||||
fetch_k=fetch_k,
|
||||
lambda_mult=lambda_mult,
|
||||
**kwargs,
|
||||
)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def max_marginal_relevance_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
Returns:
|
||||
List of Documents selected by maximal marginal relevance.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def amax_marginal_relevance_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance."""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def from_documents(
|
||||
cls: Type[VST],
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
"""Return VectorStore initialized from documents and embeddings."""
|
||||
texts = [d.page_content for d in documents]
|
||||
metadatas = [d.metadata for d in documents]
|
||||
return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs)
|
||||
|
||||
@classmethod
|
||||
async def afrom_documents(
|
||||
cls: Type[VST],
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
"""Return VectorStore initialized from documents and embeddings."""
|
||||
texts = [d.page_content for d in documents]
|
||||
metadatas = [d.metadata for d in documents]
|
||||
return await cls.afrom_texts(texts, embedding, metadatas=metadatas, **kwargs)
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def from_texts(
|
||||
cls: Type[VST],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
"""Return VectorStore initialized from texts and embeddings."""
|
||||
|
||||
@classmethod
|
||||
async def afrom_texts(
|
||||
cls: Type[VST],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
"""Return VectorStore initialized from texts and embeddings."""
|
||||
raise NotImplementedError
|
||||
|
||||
def _get_retriever_tags(self) -> List[str]:
|
||||
"""Get tags for retriever."""
|
||||
tags = [self.__class__.__name__]
|
||||
if self.embeddings:
|
||||
tags.append(self.embeddings.__class__.__name__)
|
||||
return tags
|
||||
|
||||
def as_retriever(self, **kwargs: Any) -> VectorStoreRetriever:
|
||||
"""Return VectorStoreRetriever initialized from this VectorStore.
|
||||
|
||||
Args:
|
||||
search_type (Optional[str]): Defines the type of search that
|
||||
the Retriever should perform.
|
||||
Can be "similarity" (default), "mmr", or
|
||||
"similarity_score_threshold".
|
||||
search_kwargs (Optional[Dict]): Keyword arguments to pass to the
|
||||
search function. Can include things like:
|
||||
k: Amount of documents to return (Default: 4)
|
||||
score_threshold: Minimum relevance threshold
|
||||
for similarity_score_threshold
|
||||
fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
|
||||
lambda_mult: Diversity of results returned by MMR;
|
||||
1 for minimum diversity and 0 for maximum. (Default: 0.5)
|
||||
filter: Filter by document metadata
|
||||
|
||||
Returns:
|
||||
VectorStoreRetriever: Retriever class for VectorStore.
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# Retrieve more documents with higher diversity
|
||||
# Useful if your dataset has many similar documents
|
||||
docsearch.as_retriever(
|
||||
search_type="mmr",
|
||||
search_kwargs={'k': 6, 'lambda_mult': 0.25}
|
||||
)
|
||||
|
||||
# Fetch more documents for the MMR algorithm to consider
|
||||
# But only return the top 5
|
||||
docsearch.as_retriever(
|
||||
search_type="mmr",
|
||||
search_kwargs={'k': 5, 'fetch_k': 50}
|
||||
)
|
||||
|
||||
# Only retrieve documents that have a relevance score
|
||||
# Above a certain threshold
|
||||
docsearch.as_retriever(
|
||||
search_type="similarity_score_threshold",
|
||||
search_kwargs={'score_threshold': 0.8}
|
||||
)
|
||||
|
||||
# Only get the single most similar document from the dataset
|
||||
docsearch.as_retriever(search_kwargs={'k': 1})
|
||||
|
||||
# Use a filter to only retrieve documents from a specific paper
|
||||
docsearch.as_retriever(
|
||||
search_kwargs={'filter': {'paper_title':'GPT-4 Technical Report'}}
|
||||
)
|
||||
"""
|
||||
tags = kwargs.pop("tags", None) or []
|
||||
tags.extend(self._get_retriever_tags())
|
||||
|
||||
return VectorStoreRetriever(vectorstore=self, **kwargs, tags=tags)
|
||||
|
||||
|
||||
class VectorStoreRetriever(BaseRetriever):
|
||||
"""Base Retriever class for VectorStore."""
|
||||
|
||||
vectorstore: VectorStore
|
||||
"""VectorStore to use for retrieval."""
|
||||
search_type: str = "similarity"
|
||||
"""Type of search to perform. Defaults to "similarity"."""
|
||||
search_kwargs: dict = Field(default_factory=dict)
|
||||
"""Keyword arguments to pass to the search function."""
|
||||
allowed_search_types: ClassVar[Collection[str]] = (
|
||||
"similarity",
|
||||
"similarity_score_threshold",
|
||||
"mmr",
|
||||
)
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@root_validator()
|
||||
def validate_search_type(cls, values: Dict) -> Dict:
|
||||
"""Validate search type."""
|
||||
search_type = values["search_type"]
|
||||
if search_type not in cls.allowed_search_types:
|
||||
raise ValueError(
|
||||
f"search_type of {search_type} not allowed. Valid values are: "
|
||||
f"{cls.allowed_search_types}"
|
||||
)
|
||||
if search_type == "similarity_score_threshold":
|
||||
score_threshold = values["search_kwargs"].get("score_threshold")
|
||||
if (score_threshold is None) or (not isinstance(score_threshold, float)):
|
||||
raise ValueError(
|
||||
"`score_threshold` is not specified with a float value(0~1) "
|
||||
"in `search_kwargs`."
|
||||
)
|
||||
return values
|
||||
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
if self.search_type == "similarity":
|
||||
docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
|
||||
elif self.search_type == "similarity_score_threshold":
|
||||
docs_and_similarities = (
|
||||
self.vectorstore.similarity_search_with_relevance_scores(
|
||||
query, **self.search_kwargs
|
||||
)
|
||||
)
|
||||
docs = [doc for doc, _ in docs_and_similarities]
|
||||
elif self.search_type == "mmr":
|
||||
docs = self.vectorstore.max_marginal_relevance_search(
|
||||
query, **self.search_kwargs
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"search_type of {self.search_type} not allowed.")
|
||||
return docs
|
||||
|
||||
async def _aget_relevant_documents(
|
||||
self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
if self.search_type == "similarity":
|
||||
docs = await self.vectorstore.asimilarity_search(
|
||||
query, **self.search_kwargs
|
||||
)
|
||||
elif self.search_type == "similarity_score_threshold":
|
||||
docs_and_similarities = (
|
||||
await self.vectorstore.asimilarity_search_with_relevance_scores(
|
||||
query, **self.search_kwargs
|
||||
)
|
||||
)
|
||||
docs = [doc for doc, _ in docs_and_similarities]
|
||||
elif self.search_type == "mmr":
|
||||
docs = await self.vectorstore.amax_marginal_relevance_search(
|
||||
query, **self.search_kwargs
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"search_type of {self.search_type} not allowed.")
|
||||
return docs
|
||||
|
||||
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
|
||||
"""Add documents to vectorstore."""
|
||||
return self.vectorstore.add_documents(documents, **kwargs)
|
||||
|
||||
async def aadd_documents(
|
||||
self, documents: List[Document], **kwargs: Any
|
||||
) -> List[str]:
|
||||
"""Add documents to vectorstore."""
|
||||
return await self.vectorstore.aadd_documents(documents, **kwargs)
|
||||
@@ -8,8 +8,8 @@ from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
|
||||
from langchain.llms.openai import OpenAI
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.schema.language_model import BaseLanguageModel
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
|
||||
class BaseVectorStoreTool(BaseModel):
|
||||
|
||||
@@ -18,6 +18,7 @@ and retrieve the data that are 'most similar' to the embedded query.
|
||||
|
||||
Embeddings, Document
|
||||
""" # noqa: E501
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.alibabacloud_opensearch import (
|
||||
AlibabaCloudOpenSearch,
|
||||
AlibabaCloudOpenSearchSettings,
|
||||
@@ -28,7 +29,6 @@ from langchain.vectorstores.atlas import AtlasDB
|
||||
from langchain.vectorstores.awadb import AwaDB
|
||||
from langchain.vectorstores.azuresearch import AzureSearch
|
||||
from langchain.vectorstores.bageldb import Bagel
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.cassandra import Cassandra
|
||||
from langchain.vectorstores.chroma import Chroma
|
||||
from langchain.vectorstores.clarifai import Clarifai
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
@@ -14,8 +14,8 @@ except ImportError:
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
_LANGCHAIN_DEFAULT_EMBEDDING_DIM = 1536
|
||||
_LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain_document"
|
||||
|
||||
@@ -13,7 +13,7 @@ from langchain.docstore.base import Docstore
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.docstore.in_memory import InMemoryDocstore
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
INDEX_METRICS = frozenset(["angular", "euclidean", "manhattan", "hamming", "dot"])
|
||||
|
||||
@@ -8,7 +8,7 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
@@ -26,8 +26,8 @@ from langchain.docstore.document import Document
|
||||
from langchain.pydantic_v1 import root_validator
|
||||
from langchain.schema import BaseRetriever
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
@@ -20,8 +20,8 @@ if TYPE_CHECKING:
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import xor_args
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
DEFAULT_K = 5
|
||||
|
||||
|
||||
@@ -1,608 +1,3 @@
|
||||
from __future__ import annotations
|
||||
from langchain.schema.vectorstore import VectorStore, VectorStoreRetriever
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import math
|
||||
import warnings
|
||||
from abc import ABC, abstractmethod
|
||||
from functools import partial
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
ClassVar,
|
||||
Collection,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
TypeVar,
|
||||
)
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForRetrieverRun,
|
||||
CallbackManagerForRetrieverRun,
|
||||
)
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.pydantic_v1 import Field, root_validator
|
||||
from langchain.schema import BaseRetriever
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VST = TypeVar("VST", bound="VectorStore")
|
||||
|
||||
|
||||
class VectorStore(ABC):
|
||||
"""Interface for vector store."""
|
||||
|
||||
@abstractmethod
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embeddings and add to the vectorstore.
|
||||
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
kwargs: vectorstore specific parameters
|
||||
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Optional[Embeddings]:
|
||||
"""Access the query embedding object if available."""
|
||||
logger.debug(
|
||||
f"{Embeddings.__name__} is not implemented for {self.__class__.__name__}"
|
||||
)
|
||||
return None
|
||||
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
|
||||
"""Delete by vector ID or other criteria.
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
**kwargs: Other keyword arguments that subclasses might use.
|
||||
|
||||
Returns:
|
||||
Optional[bool]: True if deletion is successful,
|
||||
False otherwise, None if not implemented.
|
||||
"""
|
||||
|
||||
raise NotImplementedError("delete method must be implemented by subclass.")
|
||||
|
||||
async def aadd_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embeddings and add to the vectorstore."""
|
||||
raise NotImplementedError
|
||||
|
||||
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
|
||||
"""Run more documents through the embeddings and add to the vectorstore.
|
||||
|
||||
Args:
|
||||
documents (List[Document]: Documents to add to the vectorstore.
|
||||
|
||||
Returns:
|
||||
List[str]: List of IDs of the added texts.
|
||||
"""
|
||||
# TODO: Handle the case where the user doesn't provide ids on the Collection
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
return self.add_texts(texts, metadatas, **kwargs)
|
||||
|
||||
async def aadd_documents(
|
||||
self, documents: List[Document], **kwargs: Any
|
||||
) -> List[str]:
|
||||
"""Run more documents through the embeddings and add to the vectorstore.
|
||||
|
||||
Args:
|
||||
documents (List[Document]: Documents to add to the vectorstore.
|
||||
|
||||
Returns:
|
||||
List[str]: List of IDs of the added texts.
|
||||
"""
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
return await self.aadd_texts(texts, metadatas, **kwargs)
|
||||
|
||||
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
|
||||
"""Return docs most similar to query using specified search type."""
|
||||
if search_type == "similarity":
|
||||
return self.similarity_search(query, **kwargs)
|
||||
elif search_type == "mmr":
|
||||
return self.max_marginal_relevance_search(query, **kwargs)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"search_type of {search_type} not allowed. Expected "
|
||||
"search_type to be 'similarity' or 'mmr'."
|
||||
)
|
||||
|
||||
async def asearch(
|
||||
self, query: str, search_type: str, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to query using specified search type."""
|
||||
if search_type == "similarity":
|
||||
return await self.asimilarity_search(query, **kwargs)
|
||||
elif search_type == "mmr":
|
||||
return await self.amax_marginal_relevance_search(query, **kwargs)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"search_type of {search_type} not allowed. Expected "
|
||||
"search_type to be 'similarity' or 'mmr'."
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def similarity_search(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to query."""
|
||||
|
||||
@staticmethod
|
||||
def _euclidean_relevance_score_fn(distance: float) -> float:
|
||||
"""Return a similarity score on a scale [0, 1]."""
|
||||
# The 'correct' relevance function
|
||||
# may differ depending on a few things, including:
|
||||
# - the distance / similarity metric used by the VectorStore
|
||||
# - the scale of your embeddings (OpenAI's are unit normed. Many
|
||||
# others are not!)
|
||||
# - embedding dimensionality
|
||||
# - etc.
|
||||
# This function converts the euclidean norm of normalized embeddings
|
||||
# (0 is most similar, sqrt(2) most dissimilar)
|
||||
# to a similarity function (0 to 1)
|
||||
return 1.0 - distance / math.sqrt(2)
|
||||
|
||||
@staticmethod
|
||||
def _cosine_relevance_score_fn(distance: float) -> float:
|
||||
"""Normalize the distance to a score on a scale [0, 1]."""
|
||||
|
||||
return 1.0 - distance
|
||||
|
||||
@staticmethod
|
||||
def _max_inner_product_relevance_score_fn(distance: float) -> float:
|
||||
"""Normalize the distance to a score on a scale [0, 1]."""
|
||||
if distance > 0:
|
||||
return 1.0 - distance
|
||||
|
||||
return -1.0 * distance
|
||||
|
||||
def _select_relevance_score_fn(self) -> Callable[[float], float]:
|
||||
"""
|
||||
The 'correct' relevance function
|
||||
may differ depending on a few things, including:
|
||||
- the distance / similarity metric used by the VectorStore
|
||||
- the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
|
||||
- embedding dimensionality
|
||||
- etc.
|
||||
|
||||
Vectorstores should define their own selection based method of relevance.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def similarity_search_with_score(
|
||||
self, *args: Any, **kwargs: Any
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Run similarity search with distance."""
|
||||
raise NotImplementedError
|
||||
|
||||
def _similarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""
|
||||
Default similarity search with relevance scores. Modify if necessary
|
||||
in subclass.
|
||||
Return docs and relevance scores in the range [0, 1].
|
||||
|
||||
0 is dissimilar, 1 is most similar.
|
||||
|
||||
Args:
|
||||
query: input text
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||
filter the resulting set of retrieved docs
|
||||
|
||||
Returns:
|
||||
List of Tuples of (doc, similarity_score)
|
||||
"""
|
||||
relevance_score_fn = self._select_relevance_score_fn()
|
||||
docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
|
||||
return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores]
|
||||
|
||||
def similarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs and relevance scores in the range [0, 1].
|
||||
|
||||
0 is dissimilar, 1 is most similar.
|
||||
|
||||
Args:
|
||||
query: input text
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||
filter the resulting set of retrieved docs
|
||||
|
||||
Returns:
|
||||
List of Tuples of (doc, similarity_score)
|
||||
"""
|
||||
score_threshold = kwargs.pop("score_threshold", None)
|
||||
|
||||
docs_and_similarities = self._similarity_search_with_relevance_scores(
|
||||
query, k=k, **kwargs
|
||||
)
|
||||
if any(
|
||||
similarity < 0.0 or similarity > 1.0
|
||||
for _, similarity in docs_and_similarities
|
||||
):
|
||||
warnings.warn(
|
||||
"Relevance scores must be between"
|
||||
f" 0 and 1, got {docs_and_similarities}"
|
||||
)
|
||||
|
||||
if score_threshold is not None:
|
||||
docs_and_similarities = [
|
||||
(doc, similarity)
|
||||
for doc, similarity in docs_and_similarities
|
||||
if similarity >= score_threshold
|
||||
]
|
||||
if len(docs_and_similarities) == 0:
|
||||
warnings.warn(
|
||||
"No relevant docs were retrieved using the relevance score"
|
||||
f" threshold {score_threshold}"
|
||||
)
|
||||
return docs_and_similarities
|
||||
|
||||
async def asimilarity_search_with_relevance_scores(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs most similar to query."""
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(
|
||||
self.similarity_search_with_relevance_scores, query, k=k, **kwargs
|
||||
)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
async def asimilarity_search(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to query."""
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(self.similarity_search, query, k=k, **kwargs)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def similarity_search_by_vector(
|
||||
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to embedding vector.
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the query vector.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def asimilarity_search_by_vector(
|
||||
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to embedding vector."""
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def max_marginal_relevance_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
Returns:
|
||||
List of Documents selected by maximal marginal relevance.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def amax_marginal_relevance_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance."""
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(
|
||||
self.max_marginal_relevance_search,
|
||||
query,
|
||||
k=k,
|
||||
fetch_k=fetch_k,
|
||||
lambda_mult=lambda_mult,
|
||||
**kwargs,
|
||||
)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def max_marginal_relevance_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
Returns:
|
||||
List of Documents selected by maximal marginal relevance.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def amax_marginal_relevance_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance."""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def from_documents(
|
||||
cls: Type[VST],
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
"""Return VectorStore initialized from documents and embeddings."""
|
||||
texts = [d.page_content for d in documents]
|
||||
metadatas = [d.metadata for d in documents]
|
||||
return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs)
|
||||
|
||||
@classmethod
|
||||
async def afrom_documents(
|
||||
cls: Type[VST],
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
"""Return VectorStore initialized from documents and embeddings."""
|
||||
texts = [d.page_content for d in documents]
|
||||
metadatas = [d.metadata for d in documents]
|
||||
return await cls.afrom_texts(texts, embedding, metadatas=metadatas, **kwargs)
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def from_texts(
|
||||
cls: Type[VST],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
"""Return VectorStore initialized from texts and embeddings."""
|
||||
|
||||
@classmethod
|
||||
async def afrom_texts(
|
||||
cls: Type[VST],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
"""Return VectorStore initialized from texts and embeddings."""
|
||||
raise NotImplementedError
|
||||
|
||||
def _get_retriever_tags(self) -> List[str]:
|
||||
"""Get tags for retriever."""
|
||||
tags = [self.__class__.__name__]
|
||||
if self.embeddings:
|
||||
tags.append(self.embeddings.__class__.__name__)
|
||||
return tags
|
||||
|
||||
def as_retriever(self, **kwargs: Any) -> VectorStoreRetriever:
|
||||
"""Return VectorStoreRetriever initialized from this VectorStore.
|
||||
|
||||
Args:
|
||||
search_type (Optional[str]): Defines the type of search that
|
||||
the Retriever should perform.
|
||||
Can be "similarity" (default), "mmr", or
|
||||
"similarity_score_threshold".
|
||||
search_kwargs (Optional[Dict]): Keyword arguments to pass to the
|
||||
search function. Can include things like:
|
||||
k: Amount of documents to return (Default: 4)
|
||||
score_threshold: Minimum relevance threshold
|
||||
for similarity_score_threshold
|
||||
fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
|
||||
lambda_mult: Diversity of results returned by MMR;
|
||||
1 for minimum diversity and 0 for maximum. (Default: 0.5)
|
||||
filter: Filter by document metadata
|
||||
|
||||
Returns:
|
||||
VectorStoreRetriever: Retriever class for VectorStore.
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# Retrieve more documents with higher diversity
|
||||
# Useful if your dataset has many similar documents
|
||||
docsearch.as_retriever(
|
||||
search_type="mmr",
|
||||
search_kwargs={'k': 6, 'lambda_mult': 0.25}
|
||||
)
|
||||
|
||||
# Fetch more documents for the MMR algorithm to consider
|
||||
# But only return the top 5
|
||||
docsearch.as_retriever(
|
||||
search_type="mmr",
|
||||
search_kwargs={'k': 5, 'fetch_k': 50}
|
||||
)
|
||||
|
||||
# Only retrieve documents that have a relevance score
|
||||
# Above a certain threshold
|
||||
docsearch.as_retriever(
|
||||
search_type="similarity_score_threshold",
|
||||
search_kwargs={'score_threshold': 0.8}
|
||||
)
|
||||
|
||||
# Only get the single most similar document from the dataset
|
||||
docsearch.as_retriever(search_kwargs={'k': 1})
|
||||
|
||||
# Use a filter to only retrieve documents from a specific paper
|
||||
docsearch.as_retriever(
|
||||
search_kwargs={'filter': {'paper_title':'GPT-4 Technical Report'}}
|
||||
)
|
||||
"""
|
||||
tags = kwargs.pop("tags", None) or []
|
||||
tags.extend(self._get_retriever_tags())
|
||||
|
||||
return VectorStoreRetriever(vectorstore=self, **kwargs, tags=tags)
|
||||
|
||||
|
||||
class VectorStoreRetriever(BaseRetriever):
|
||||
"""Base Retriever class for VectorStore."""
|
||||
|
||||
vectorstore: VectorStore
|
||||
"""VectorStore to use for retrieval."""
|
||||
search_type: str = "similarity"
|
||||
"""Type of search to perform. Defaults to "similarity"."""
|
||||
search_kwargs: dict = Field(default_factory=dict)
|
||||
"""Keyword arguments to pass to the search function."""
|
||||
allowed_search_types: ClassVar[Collection[str]] = (
|
||||
"similarity",
|
||||
"similarity_score_threshold",
|
||||
"mmr",
|
||||
)
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@root_validator()
|
||||
def validate_search_type(cls, values: Dict) -> Dict:
|
||||
"""Validate search type."""
|
||||
search_type = values["search_type"]
|
||||
if search_type not in cls.allowed_search_types:
|
||||
raise ValueError(
|
||||
f"search_type of {search_type} not allowed. Valid values are: "
|
||||
f"{cls.allowed_search_types}"
|
||||
)
|
||||
if search_type == "similarity_score_threshold":
|
||||
score_threshold = values["search_kwargs"].get("score_threshold")
|
||||
if (score_threshold is None) or (not isinstance(score_threshold, float)):
|
||||
raise ValueError(
|
||||
"`score_threshold` is not specified with a float value(0~1) "
|
||||
"in `search_kwargs`."
|
||||
)
|
||||
return values
|
||||
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
if self.search_type == "similarity":
|
||||
docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
|
||||
elif self.search_type == "similarity_score_threshold":
|
||||
docs_and_similarities = (
|
||||
self.vectorstore.similarity_search_with_relevance_scores(
|
||||
query, **self.search_kwargs
|
||||
)
|
||||
)
|
||||
docs = [doc for doc, _ in docs_and_similarities]
|
||||
elif self.search_type == "mmr":
|
||||
docs = self.vectorstore.max_marginal_relevance_search(
|
||||
query, **self.search_kwargs
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"search_type of {self.search_type} not allowed.")
|
||||
return docs
|
||||
|
||||
async def _aget_relevant_documents(
|
||||
self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
if self.search_type == "similarity":
|
||||
docs = await self.vectorstore.asimilarity_search(
|
||||
query, **self.search_kwargs
|
||||
)
|
||||
elif self.search_type == "similarity_score_threshold":
|
||||
docs_and_similarities = (
|
||||
await self.vectorstore.asimilarity_search_with_relevance_scores(
|
||||
query, **self.search_kwargs
|
||||
)
|
||||
)
|
||||
docs = [doc for doc, _ in docs_and_similarities]
|
||||
elif self.search_type == "mmr":
|
||||
docs = await self.vectorstore.amax_marginal_relevance_search(
|
||||
query, **self.search_kwargs
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"search_type of {self.search_type} not allowed.")
|
||||
return docs
|
||||
|
||||
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
|
||||
"""Add documents to vectorstore."""
|
||||
return self.vectorstore.add_documents(documents, **kwargs)
|
||||
|
||||
async def aadd_documents(
|
||||
self, documents: List[Document], **kwargs: Any
|
||||
) -> List[str]:
|
||||
"""Add documents to vectorstore."""
|
||||
return await self.vectorstore.aadd_documents(documents, **kwargs)
|
||||
__all__ = ["VectorStore", "VectorStoreRetriever"]
|
||||
|
||||
@@ -22,7 +22,7 @@ if typing.TYPE_CHECKING:
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
CVST = TypeVar("CVST", bound="Cassandra")
|
||||
|
||||
@@ -18,8 +18,8 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import xor_args
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
@@ -10,7 +10,7 @@ import requests
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.pydantic_v1 import BaseSettings
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
@@ -14,8 +14,8 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -16,7 +16,7 @@ except ImportError:
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -8,7 +8,7 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -18,8 +18,8 @@ from typing import (
|
||||
from langchain._api import deprecated
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
@@ -16,7 +16,7 @@ from typing import (
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import DistanceStrategy
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Type
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pyepsilla import vectordb
|
||||
|
||||
@@ -23,7 +23,7 @@ from langchain.docstore.base import AddableMixin, Docstore
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.docstore.in_memory import InMemoryDocstore
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import DistanceStrategy, maximal_marginal_relevance
|
||||
|
||||
|
||||
|
||||
@@ -7,8 +7,8 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
ADA_TOKEN_COUNT = 1536
|
||||
_LANGCHAIN_DEFAULT_TABLE_NAME = "langchain_pg_embedding"
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import Any, Iterable, List, Optional
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
|
||||
class LanceDB(VectorStore):
|
||||
|
||||
@@ -13,7 +13,7 @@ import requests
|
||||
from langchain.pydantic_v1 import Field
|
||||
from langchain.schema import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
|
||||
from langchain.schema.vectorstore import VectorStore, VectorStoreRetriever
|
||||
|
||||
|
||||
class ModelChoices(str, Enum):
|
||||
|
||||
@@ -17,7 +17,7 @@ from typing import (
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import marqo
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Type
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings import TensorflowHubEmbeddings
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from google.cloud import storage
|
||||
|
||||
@@ -5,8 +5,8 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Ty
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from meilisearch import Client
|
||||
|
||||
@@ -8,7 +8,7 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -18,7 +18,7 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.pydantic_v1 import BaseSettings
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
@@ -16,8 +16,8 @@ from typing import (
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import DistanceStrategy
|
||||
|
||||
DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.COSINE
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Any, Dict, Iterable, List, Optional, Type
|
||||
|
||||
from langchain.schema.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VST, VectorStore
|
||||
from langchain.schema.vectorstore import VST, VectorStore
|
||||
|
||||
FIELD_TYPES = {
|
||||
"f": "files",
|
||||
|
||||
@@ -8,8 +8,8 @@ import numpy as np
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
IMPORT_OPENSEARCH_PY_ERROR = (
|
||||
|
||||
@@ -11,8 +11,8 @@ from sqlalchemy.orm import Session, declarative_base, relationship
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
Base = declarative_base() # type: Any
|
||||
|
||||
|
||||
@@ -27,8 +27,8 @@ from sqlalchemy.orm import Session, declarative_base
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
@@ -9,8 +9,8 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils.iter import batch_iterate
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import DistanceStrategy, maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
@@ -30,6 +30,7 @@ from langchain.callbacks.manager import (
|
||||
)
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore, VectorStoreRetriever
|
||||
from langchain.utilities.redis import (
|
||||
_array_to_buffer,
|
||||
_buffer_to_array,
|
||||
@@ -37,7 +38,6 @@ from langchain.utilities.redis import (
|
||||
get_client,
|
||||
)
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
|
||||
from langchain.vectorstores.redis.constants import (
|
||||
REDIS_REQUIRED_MODULES,
|
||||
REDIS_TAG_SEPARATOR,
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import Any, Iterable, List, Optional, Tuple
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ from langchain.docstore.base import AddableMixin, Docstore
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.docstore.in_memory import InMemoryDocstore
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import DistanceStrategy
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ from langchain.callbacks.manager import (
|
||||
)
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
|
||||
from langchain.schema.vectorstore import VectorStore, VectorStoreRetriever
|
||||
from langchain.vectorstores.utils import DistanceStrategy
|
||||
|
||||
DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.DOT_PRODUCT
|
||||
|
||||
@@ -12,8 +12,8 @@ from uuid import uuid4
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import guard_import
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
DEFAULT_K = 4 # Number of Documents to return.
|
||||
|
||||
@@ -15,7 +15,7 @@ from typing import (
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import sqlite3
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.pydantic_v1 import BaseSettings
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
logger = logging.getLogger()
|
||||
DEBUG = False
|
||||
|
||||
@@ -18,7 +18,7 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
@@ -7,8 +7,8 @@ from typing import Any, Iterable, List, Optional, Type
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -10,8 +10,8 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import guard_import
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -20,8 +20,8 @@ from typing import (
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import DistanceStrategy
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
@@ -5,8 +5,8 @@ from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utils import get_from_env
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typesense.client import Client
|
||||
|
||||
@@ -8,7 +8,7 @@ from langchain.docstore.base import AddableMixin, Docstore
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.docstore.in_memory import InMemoryDocstore
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
|
||||
def dependable_usearch_import() -> Any:
|
||||
|
||||
@@ -7,7 +7,7 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import vearch
|
||||
|
||||
@@ -11,7 +11,7 @@ import requests
|
||||
from langchain.pydantic_v1 import Field
|
||||
from langchain.schema import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
|
||||
from langchain.schema.vectorstore import VectorStore, VectorStoreRetriever
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
|
||||
class XataVectorStore(VectorStore):
|
||||
|
||||
@@ -9,7 +9,7 @@ import numpy as np
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
@@ -9,7 +9,7 @@ from langchain.indexes import index
|
||||
from langchain.indexes._sql_record_manager import SQLRecordManager
|
||||
from langchain.schema import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VST, VectorStore
|
||||
from langchain.schema.vectorstore import VST, VectorStore
|
||||
|
||||
|
||||
class ToyLoader(BaseLoader):
|
||||
|
||||
@@ -11,7 +11,7 @@ from langchain.retrievers.time_weighted_retriever import (
|
||||
)
|
||||
from langchain.schema import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
|
||||
|
||||
def _get_example_memories(k: int = 4) -> List[Document]:
|
||||
|
||||
Reference in New Issue
Block a user