From 07c26497530b88fbfe93edcedd65eda70e6c3952 Mon Sep 17 00:00:00 2001
From: Shwu Ku <65639964+EricLiclair@users.noreply.github.com>
Date: Wed, 25 Oct 2023 23:25:13 +0530
Subject: [PATCH] response parser for ArceeRetriever (#12270)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- **Description:** Response parser for arcee retriever,
- **Issue:** follow-up pr on #11578 and
[discussion](https://github.com/arcee-ai/arcee-python/issues/15#issuecomment-1759874053),
  - **Dependencies:** NA

This pr implements a parser for the response from ArceeRetreiver to
convert to langchain `Document`. This closes the loop of generation and
retrieval for Arcee DALMs in langchain.

The reference for the response parser is
[api-docs:retrieve](https://api.arcee.ai/docs#/v2/retrieve_model)

Attaching screenshot of working implementation:
<img width="1984" alt="Screenshot 2023-10-25 at 7 42 34 PM"
src="https://github.com/langchain-ai/langchain/assets/65639964/026987b9-34b2-4e4b-b87d-69fcd0c6641a">
\*api key deleted

---
Successful tests, lints, etc.
```shell
Re-run pytest with --snapshot-update to delete unused snapshots.
==================================================================================================================== slowest 5 durations =====================================================================================================================
1.56s call     tests/unit_tests/schema/runnable/test_runnable.py::test_retrying
0.63s call     tests/unit_tests/schema/runnable/test_runnable.py::test_map_astream
0.33s call     tests/unit_tests/schema/runnable/test_runnable.py::test_map_stream_iterator_input
0.30s call     tests/unit_tests/schema/runnable/test_runnable.py::test_map_astream_iterator_input
0.20s call     tests/unit_tests/indexes/test_indexing.py::test_cleanup_with_different_batchsize
======================================================================================================= 1265 passed, 270 skipped, 32 warnings in 6.55s =======================================================================================================
[ "." = "" ] || poetry run black .
All done! ✨ 🍰 ✨
1871 files left unchanged.
[ "." = "" ] || poetry run ruff --select I --fix .
./scripts/check_pydantic.sh .
./scripts/check_imports.sh
poetry run ruff .
[ "." = "" ] || poetry run black . --check
All done! ✨ 🍰 ✨
1871 files would be left unchanged.
[ "." = "" ] || poetry run mypy .
Success: no issues found in 1868 source files
poetry run codespell --toml pyproject.toml
poetry run codespell --toml pyproject.toml -w
```

Co-authored-by: Shubham Kushwaha <shwu@Shubhams-MacBook-Pro.local>
---
 libs/langchain/langchain/utilities/arcee.py | 46 +++++++++++++++++++--
 1 file changed, 43 insertions(+), 3 deletions(-)
diff --git a/libs/langchain/langchain/utilities/arcee.py b/libs/langchain/langchain/utilities/arcee.py
index f79da073e..4927acf83 100644
--- a/libs/langchain/langchain/utilities/arcee.py
+++ b/libs/langchain/langchain/utilities/arcee.py
@@ -59,6 +59,43 @@ class DALMFilter(BaseModel):
         return values
 
 
+class ArceeDocumentSource(BaseModel):
+    """Source of an Arcee document."""
+
+    document: str
+    name: str
+    id: str
+
+
+class ArceeDocument(BaseModel):
+    """Arcee document."""
+
+    index: str
+    id: str
+    score: float
+    source: ArceeDocumentSource
+
+
+class ArceeDocumentAdapter:
+    """Adapter for Arcee documents"""
+
+    @classmethod
+    def adapt(cls, arcee_document: ArceeDocument) -> Document:
+        """Adapts an `ArceeDocument` to a langchain's `Document` object."""
+        return Document(
+            page_content=arcee_document.source.document,
+            metadata={
+                # arcee document; source metadata
+                "name": arcee_document.source.name,
+                "source_id": arcee_document.source.id,
+                # arcee document metadata
+                "index": arcee_document.index,
+                "id": arcee_document.id,
+                "score": arcee_document.score,
+            },
+        )
+
+
 class ArceeWrapper:
     """Wrapper for Arcee API."""
 
@@ -172,7 +209,7 @@ class ArceeWrapper:
 
         response = self._make_request(
             method="post",
-            route=ArceeRoute.generate,
+            route=ArceeRoute.generate.value,
             body=self._make_request_body_for_models(
                 prompt=prompt,
                 **kwargs,
@@ -196,10 +233,13 @@ class ArceeWrapper:
 
         response = self._make_request(
             method="post",
-            route=ArceeRoute.retrieve,
+            route=ArceeRoute.retrieve.value,
             body=self._make_request_body_for_models(
                 prompt=query,
                 **kwargs,
             ),
         )
-        return [Document(**doc) for doc in response["documents"]]
+        return [
+            ArceeDocumentAdapter.adapt(ArceeDocument(**doc))
+            for doc in response["results"]
+        ]