diff --git a/libs/langchain/langchain/document_loaders/airbyte.py b/libs/langchain/langchain/document_loaders/airbyte.py index 05f3ca62c..aec832248 100644 --- a/libs/langchain/langchain/document_loaders/airbyte.py +++ b/libs/langchain/langchain/document_loaders/airbyte.py @@ -62,7 +62,7 @@ class AirbyteCDKLoader(BaseLoader): class AirbyteHubspotLoader(AirbyteCDKLoader): - """Loads records from Hubspot using an Airbyte source connector.""" + """Load from `Hubspot` using an `Airbyte` source connector.""" def __init__( self, @@ -94,7 +94,7 @@ class AirbyteHubspotLoader(AirbyteCDKLoader): class AirbyteStripeLoader(AirbyteCDKLoader): - """Loads records from Stripe using an Airbyte source connector.""" + """Load from `Stripe` using an `Airbyte` source connector.""" def __init__( self, @@ -126,7 +126,7 @@ class AirbyteStripeLoader(AirbyteCDKLoader): class AirbyteTypeformLoader(AirbyteCDKLoader): - """Loads records from Typeform using an Airbyte source connector.""" + """Load from `Typeform` using an `Airbyte` source connector.""" def __init__( self, @@ -158,7 +158,7 @@ class AirbyteTypeformLoader(AirbyteCDKLoader): class AirbyteZendeskSupportLoader(AirbyteCDKLoader): - """Loads records from Zendesk Support using an Airbyte source connector.""" + """Load from `Zendesk Support` using an `Airbyte` source connector.""" def __init__( self, @@ -190,7 +190,7 @@ class AirbyteZendeskSupportLoader(AirbyteCDKLoader): class AirbyteShopifyLoader(AirbyteCDKLoader): - """Loads records from Shopify using an Airbyte source connector.""" + """Load from `Shopify` using an `Airbyte` source connector.""" def __init__( self, @@ -222,7 +222,7 @@ class AirbyteShopifyLoader(AirbyteCDKLoader): class AirbyteSalesforceLoader(AirbyteCDKLoader): - """Loads records from Salesforce using an Airbyte source connector.""" + """Load from `Salesforce` using an `Airbyte` source connector.""" def __init__( self, @@ -254,7 +254,7 @@ class AirbyteSalesforceLoader(AirbyteCDKLoader): class AirbyteGongLoader(AirbyteCDKLoader): - """Loads records from Gong using an Airbyte source connector.""" + """Load from `Gong` using an `Airbyte` source connector.""" def __init__( self, diff --git a/libs/langchain/langchain/document_loaders/blob_loaders/file_system.py b/libs/langchain/langchain/document_loaders/blob_loaders/file_system.py index aecc1d62f..0ac6ad0d8 100644 --- a/libs/langchain/langchain/document_loaders/blob_loaders/file_system.py +++ b/libs/langchain/langchain/document_loaders/blob_loaders/file_system.py @@ -37,7 +37,7 @@ def _make_iterator( class FileSystemBlobLoader(BlobLoader): - """Blob loader for the local file system. + """Load blobs in the local file system. Example: @@ -58,7 +58,7 @@ class FileSystemBlobLoader(BlobLoader): suffixes: Optional[Sequence[str]] = None, show_progress: bool = False, ) -> None: - """Initialize with path to directory and how to glob over it. + """Initialize with a path to directory and how to glob over it. Args: path: Path to directory to load from diff --git a/libs/langchain/langchain/document_loaders/blob_loaders/schema.py b/libs/langchain/langchain/document_loaders/blob_loaders/schema.py index 4435075d5..4e1978c44 100644 --- a/libs/langchain/langchain/document_loaders/blob_loaders/schema.py +++ b/libs/langchain/langchain/document_loaders/blob_loaders/schema.py @@ -19,7 +19,7 @@ PathLike = Union[str, PurePath] class Blob(BaseModel): - """A blob is used to represent raw data by either reference or value. + """Blob represents raw data by either reference or value. Provides an interface to materialize the blob in different representations, and help to decouple the development of data loaders from the downstream parsing of diff --git a/libs/langchain/langchain/document_loaders/chromium.py b/libs/langchain/langchain/document_loaders/chromium.py index 2c0929469..fa757f2b0 100644 --- a/libs/langchain/langchain/document_loaders/chromium.py +++ b/libs/langchain/langchain/document_loaders/chromium.py @@ -9,8 +9,8 @@ logger = logging.getLogger(__name__) class AsyncChromiumLoader(BaseLoader): - """Scrape HTML content from provided URLs using a - headless instance of the Chromium browser.""" + """Scrape HTML pages from URLs using a + headless instance of the Chromium.""" def __init__( self, diff --git a/libs/langchain/langchain/document_loaders/csv_loader.py b/libs/langchain/langchain/document_loaders/csv_loader.py index 45133786f..f2ab7c4ea 100644 --- a/libs/langchain/langchain/document_loaders/csv_loader.py +++ b/libs/langchain/langchain/document_loaders/csv_loader.py @@ -78,7 +78,9 @@ class CSVLoader(BaseLoader): class UnstructuredCSVLoader(UnstructuredFileLoader): - """Loader that uses unstructured to load CSV files. Like other + """Load `CSV` files using `Unstructured`. + + Like other Unstructured loaders, UnstructuredCSVLoader can be used in both "single" and "elements" mode. If you use the loader in "elements" mode, the CSV file will be a single Unstructured Table element. diff --git a/libs/langchain/langchain/document_loaders/email.py b/libs/langchain/langchain/document_loaders/email.py index f23ef88d3..f85f95a9a 100644 --- a/libs/langchain/langchain/document_loaders/email.py +++ b/libs/langchain/langchain/document_loaders/email.py @@ -10,7 +10,7 @@ from langchain.document_loaders.unstructured import ( class UnstructuredEmailLoader(UnstructuredFileLoader): - """Load email files with `unstructured`. + """Load email files using `Unstructured`. Works with both .eml and .msg files. You can process attachments in addition to the diff --git a/libs/langchain/langchain/document_loaders/embaas.py b/libs/langchain/langchain/document_loaders/embaas.py index 9efa6c4c5..0afe0b719 100644 --- a/libs/langchain/langchain/document_loaders/embaas.py +++ b/libs/langchain/langchain/document_loaders/embaas.py @@ -52,14 +52,14 @@ class EmbaasDocumentExtractionPayload(EmbaasDocumentExtractionParameters): class BaseEmbaasLoader(BaseModel): - """Base loader for embedding a model into an `Embaas` document extraction API.""" + """Base loader for `Embaas` document extraction API.""" embaas_api_key: Optional[str] = None - """The API key for the embaas document extraction API.""" + """The API key for the Embaas document extraction API.""" api_url: str = EMBAAS_DOC_API_URL - """The URL of the embaas document extraction API.""" + """The URL of the Embaas document extraction API.""" params: EmbaasDocumentExtractionParameters = EmbaasDocumentExtractionParameters() - """Additional parameters to pass to the embaas document extraction API.""" + """Additional parameters to pass to the Embaas document extraction API.""" @root_validator(pre=True) def validate_environment(cls, values: Dict) -> Dict: @@ -163,13 +163,13 @@ class EmbaasBlobLoader(BaseEmbaasLoader, BaseBlobParser): except requests.exceptions.RequestException as e: if e.response is None or not e.response.text: raise ValueError( - f"Error raised by embaas document text extraction API: {e}" + f"Error raised by Embaas document text extraction API: {e}" ) parsed_response = e.response.json() if "message" in parsed_response: raise ValueError( - f"Validation Error raised by embaas document text extraction API:" + f"Validation Error raised by Embaas document text extraction API:" f" {parsed_response['message']}" ) raise diff --git a/libs/langchain/langchain/document_loaders/helpers.py b/libs/langchain/langchain/document_loaders/helpers.py index c48d0b8eb..6e0f8b9bf 100644 --- a/libs/langchain/langchain/document_loaders/helpers.py +++ b/libs/langchain/langchain/document_loaders/helpers.py @@ -5,7 +5,7 @@ from typing import List, NamedTuple, Optional, cast class FileEncoding(NamedTuple): - """A file encoding as the NamedTuple.""" + """File encoding as the NamedTuple.""" encoding: Optional[str] """The encoding of the file.""" diff --git a/libs/langchain/langchain/document_loaders/notebook.py b/libs/langchain/langchain/document_loaders/notebook.py index e9f84666b..77a01b6c4 100644 --- a/libs/langchain/langchain/document_loaders/notebook.py +++ b/libs/langchain/langchain/document_loaders/notebook.py @@ -56,7 +56,7 @@ def concatenate_cells( def remove_newlines(x: Any) -> Any: - """Recursively removes newlines, no matter the data structure they are stored in.""" + """Recursively remove newlines, no matter the data structure they are stored in.""" import pandas as pd if isinstance(x, str): diff --git a/libs/langchain/langchain/document_loaders/parsers/generic.py b/libs/langchain/langchain/document_loaders/parsers/generic.py index 80545281c..3d4c0a5ee 100644 --- a/libs/langchain/langchain/document_loaders/parsers/generic.py +++ b/libs/langchain/langchain/document_loaders/parsers/generic.py @@ -10,7 +10,7 @@ from langchain.schema import Document class MimeTypeBasedParser(BaseBlobParser): - """A parser that uses mime-types to determine how to parse a blob. + """Parser that uses `mime`-types to parse a blob. This parser is useful for simple pipelines where the mime-type is sufficient to determine how to parse a blob. diff --git a/libs/langchain/langchain/document_loaders/parsers/grobid.py b/libs/langchain/langchain/document_loaders/parsers/grobid.py index c19b21e23..ea21ac8a4 100644 --- a/libs/langchain/langchain/document_loaders/parsers/grobid.py +++ b/libs/langchain/langchain/document_loaders/parsers/grobid.py @@ -11,13 +11,13 @@ logger = logging.getLogger(__name__) class ServerUnavailableException(Exception): - """Exception raised when the GROBID server is unavailable.""" + """Exception raised when the Grobid server is unavailable.""" pass class GrobidParser(BaseBlobParser): - """Loader that uses Grobid to load article PDF files.""" + """Load article `PDF` files using `Grobid`.""" def __init__( self, diff --git a/libs/langchain/langchain/document_loaders/parsers/html/bs4.py b/libs/langchain/langchain/document_loaders/parsers/html/bs4.py index e56aec31a..334c1689e 100644 --- a/libs/langchain/langchain/document_loaders/parsers/html/bs4.py +++ b/libs/langchain/langchain/document_loaders/parsers/html/bs4.py @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) class BS4HTMLParser(BaseBlobParser): - """Parser that uses beautiful soup to parse HTML files.""" + """Pparse HTML files using `Beautiful Soup`.""" def __init__( self, diff --git a/libs/langchain/langchain/document_loaders/parsers/language/code_segmenter.py b/libs/langchain/langchain/document_loaders/parsers/language/code_segmenter.py index 100fb78e8..2efb2add4 100644 --- a/libs/langchain/langchain/document_loaders/parsers/language/code_segmenter.py +++ b/libs/langchain/langchain/document_loaders/parsers/language/code_segmenter.py @@ -3,7 +3,7 @@ from typing import List class CodeSegmenter(ABC): - """The abstract class for the code segmenter.""" + """Abstract class for the code segmenter.""" def __init__(self, code: str): self.code = code diff --git a/libs/langchain/langchain/document_loaders/parsers/language/javascript.py b/libs/langchain/langchain/document_loaders/parsers/language/javascript.py index cb53bfb42..258345f8b 100644 --- a/libs/langchain/langchain/document_loaders/parsers/language/javascript.py +++ b/libs/langchain/langchain/document_loaders/parsers/language/javascript.py @@ -4,7 +4,7 @@ from langchain.document_loaders.parsers.language.code_segmenter import CodeSegme class JavaScriptSegmenter(CodeSegmenter): - """The code segmenter for JavaScript.""" + """Code segmenter for JavaScript.""" def __init__(self, code: str): super().__init__(code) diff --git a/libs/langchain/langchain/document_loaders/parsers/language/language_parser.py b/libs/langchain/langchain/document_loaders/parsers/language/language_parser.py index 12a11380c..97d26a99e 100644 --- a/libs/langchain/langchain/document_loaders/parsers/language/language_parser.py +++ b/libs/langchain/langchain/document_loaders/parsers/language/language_parser.py @@ -19,8 +19,7 @@ LANGUAGE_SEGMENTERS: Dict[str, Any] = { class LanguageParser(BaseBlobParser): - """ - Language parser that split code using the respective language syntax. + """Parse using the respective programming language syntax. Each top-level function and class in the code is loaded into separate documents. Furthermore, an extra document is generated, containing the remaining top-level code diff --git a/libs/langchain/langchain/document_loaders/parsers/language/python.py b/libs/langchain/langchain/document_loaders/parsers/language/python.py index 4446b4a21..642f6b32c 100644 --- a/libs/langchain/langchain/document_loaders/parsers/language/python.py +++ b/libs/langchain/langchain/document_loaders/parsers/language/python.py @@ -5,7 +5,7 @@ from langchain.document_loaders.parsers.language.code_segmenter import CodeSegme class PythonSegmenter(CodeSegmenter): - """The code segmenter for Python.""" + """Code segmenter for `Python`.""" def __init__(self, code: str): super().__init__(code) diff --git a/libs/langchain/langchain/document_loaders/parsers/pdf.py b/libs/langchain/langchain/document_loaders/parsers/pdf.py index dde96eb8f..00d8d9ade 100644 --- a/libs/langchain/langchain/document_loaders/parsers/pdf.py +++ b/libs/langchain/langchain/document_loaders/parsers/pdf.py @@ -8,7 +8,7 @@ from langchain.schema import Document class PyPDFParser(BaseBlobParser): - """Loads a PDF with pypdf and chunks at character level.""" + """Load `PDF` using `pypdf` and chunk at character level.""" def __init__(self, password: Optional[Union[str, bytes]] = None): self.password = password @@ -29,7 +29,7 @@ class PyPDFParser(BaseBlobParser): class PDFMinerParser(BaseBlobParser): - """Parse PDFs with PDFMiner.""" + """Parse `PDF` using `PDFMiner`.""" def lazy_parse(self, blob: Blob) -> Iterator[Document]: """Lazily parse the blob.""" @@ -42,7 +42,7 @@ class PDFMinerParser(BaseBlobParser): class PyMuPDFParser(BaseBlobParser): - """Parse PDFs with PyMuPDF.""" + """Parse `PDF` using `PyMuPDF`.""" def __init__(self, text_kwargs: Optional[Mapping[str, Any]] = None) -> None: """Initialize the parser. @@ -81,7 +81,7 @@ class PyMuPDFParser(BaseBlobParser): class PyPDFium2Parser(BaseBlobParser): - """Parse PDFs with PyPDFium2.""" + """Parse `PDF` with `PyPDFium2`.""" def __init__(self) -> None: """Initialize the parser.""" @@ -114,7 +114,7 @@ class PyPDFium2Parser(BaseBlobParser): class PDFPlumberParser(BaseBlobParser): - """Parse PDFs with PDFPlumber.""" + """Parse `PDF` with `PDFPlumber`.""" def __init__(self, text_kwargs: Optional[Mapping[str, Any]] = None) -> None: """Initialize the parser. @@ -153,7 +153,7 @@ class PDFPlumberParser(BaseBlobParser): class AmazonTextractPDFParser(BaseBlobParser): - """Sends PDF files to Amazon Textract and parses them to generate Documents. + """Send `PDF` files to `Amazon Textract` and parse them. For parsing multi-page PDFs, they have to reside on S3. """ diff --git a/libs/langchain/langchain/document_loaders/pdf.py b/libs/langchain/langchain/document_loaders/pdf.py index b671d90eb..301af6953 100644 --- a/libs/langchain/langchain/document_loaders/pdf.py +++ b/libs/langchain/langchain/document_loaders/pdf.py @@ -459,7 +459,7 @@ class PDFPlumberLoader(BasePDFLoader): class AmazonTextractPDFLoader(BasePDFLoader): - """ "Load `PDF` files from a local file system, HTTP or S3. + """Load `PDF` files from a local file system, HTTP or S3. To authenticate, the AWS client uses the following methods to automatically load credentials: diff --git a/libs/langchain/langchain/document_loaders/telegram.py b/libs/langchain/langchain/document_loaders/telegram.py index 9b4f81f4a..86e857cd3 100644 --- a/libs/langchain/langchain/document_loaders/telegram.py +++ b/libs/langchain/langchain/document_loaders/telegram.py @@ -47,7 +47,7 @@ class TelegramChatFileLoader(BaseLoader): def text_to_docs(text: Union[str, List[str]]) -> List[Document]: - """Converts a string or list of strings to a list of Documents with metadata.""" + """Convert a string or list of strings to a list of Documents with metadata.""" if isinstance(text, str): # Take a single string as one page text = [text] @@ -78,7 +78,7 @@ def text_to_docs(text: Union[str, List[str]]) -> List[Document]: class TelegramChatApiLoader(BaseLoader): - """Loads Telegram chat json directory dump.""" + """Load `Telegram` chat json directory dump.""" def __init__( self, diff --git a/libs/langchain/langchain/document_loaders/unstructured.py b/libs/langchain/langchain/document_loaders/unstructured.py index 748a29d34..8e55b1dd0 100644 --- a/libs/langchain/langchain/document_loaders/unstructured.py +++ b/libs/langchain/langchain/document_loaders/unstructured.py @@ -8,7 +8,7 @@ from langchain.document_loaders.base import BaseLoader def satisfies_min_unstructured_version(min_version: str) -> bool: - """Checks to see if the installed unstructured version exceeds the minimum version + """Check if the installed `Unstructured` version exceeds the minimum version for the feature in question.""" from unstructured.__version__ import __version__ as __unstructured_version__ @@ -25,7 +25,7 @@ def satisfies_min_unstructured_version(min_version: str) -> bool: def validate_unstructured_version(min_unstructured_version: str) -> None: - """Raises an error if the unstructured version does not exceed the + """Raise an error if the `Unstructured` version does not exceed the specified minimum.""" if not satisfies_min_unstructured_version(min_unstructured_version): raise ValueError( @@ -34,7 +34,7 @@ def validate_unstructured_version(min_unstructured_version: str) -> None: class UnstructuredBaseLoader(BaseLoader, ABC): - """Loader that uses Unstructured to load files.""" + """Base Loader that uses `Unstructured`.""" def __init__( self, @@ -181,7 +181,7 @@ def get_elements_from_api( api_key: str = "", **unstructured_kwargs: Any, ) -> List: - """Retrieves a list of elements from the Unstructured API.""" + """Retrieve a list of elements from the `Unstructured API`.""" if isinstance(file, collections.abc.Sequence) or isinstance(file_path, list): from unstructured.partition.api import partition_multiple_via_api diff --git a/libs/langchain/langchain/document_loaders/youtube.py b/libs/langchain/langchain/document_loaders/youtube.py index 9724ffda0..9998435a3 100644 --- a/libs/langchain/langchain/document_loaders/youtube.py +++ b/libs/langchain/langchain/document_loaders/youtube.py @@ -19,7 +19,7 @@ SCOPES = ["https://www.googleapis.com/auth/youtube.readonly"] @dataclass class GoogleApiClient: - """A Generic Google Api Client. + """Generic Google API Client. To use, you should have the ``google_auth_oauthlib,youtube_transcript_api,google`` python package installed.