From 5cd244e9b7217824c01e271e30f81ef9b923d79c Mon Sep 17 00:00:00 2001 From: Taqi Jaffri Date: Sat, 19 Aug 2023 13:48:15 -0700 Subject: [PATCH] CR feedback --- .../integrations/document_loaders/docugami.ipynb | 12 ++---------- .../langchain/langchain/document_loaders/docugami.py | 1 + 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/docs/extras/integrations/document_loaders/docugami.ipynb b/docs/extras/integrations/document_loaders/docugami.ipynb index d3f94a8d1..607cf2b14 100644 --- a/docs/extras/integrations/document_loaders/docugami.ipynb +++ b/docs/extras/integrations/document_loaders/docugami.ipynb @@ -19,18 +19,10 @@ "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: lxml in /root/Source/github/docugami.langchain/libs/langchain/.venv/lib/python3.9/site-packages (4.9.3)\n" - ] - } - ], + "outputs": [], "source": [ "# You need the lxml package to use the DocugamiLoader\n", - "!poetry run pip install lxml" + "!poetry run pip install lxml --quiet" ] }, { diff --git a/libs/langchain/langchain/document_loaders/docugami.py b/libs/langchain/langchain/document_loaders/docugami.py index af2c95f57..cd05a9612 100644 --- a/libs/langchain/langchain/document_loaders/docugami.py +++ b/libs/langchain/langchain/document_loaders/docugami.py @@ -147,6 +147,7 @@ class DocugamiLoader(BaseLoader, BaseModel): metadata = { XPATH_KEY: _xpath_for_chunk(node), DOCUMENT_ID_KEY: document[DOCUMENT_ID_KEY], + DOCUMENT_NAME_KEY: document[DOCUMENT_NAME_KEY], DOCUMENT_SOURCE_KEY: document[DOCUMENT_NAME_KEY], STRUCTURE_KEY: node.attrib.get("structure", ""), TAG_KEY: re.sub(r"\{.*\}", "", node.tag),