mirror of
https://github.com/kennethreitz/langchain.git
synced 2026-06-05 23:00:18 +00:00
9aef79c2e3
Work in Progress. WIP Not ready... Adds Document Loader support for [Geopandas.GeoDataFrames](https://geopandas.org/) Example: - [x] stub out `GeoDataFrameLoader` class - [x] stub out integration tests - [ ] Experiment with different geometry text representations - [ ] Verify CRS is successfully added in metadata - [ ] Test effectiveness of searches on geometries - [ ] Test with different geometry types (point, line, polygon with multi-variants). - [ ] Add documentation --------- Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: Lance Martin <122662504+rlancemartin@users.noreply.github.com>
42 lines
1.2 KiB
Python
42 lines
1.2 KiB
Python
from typing import TYPE_CHECKING
|
|
|
|
import geopandas
|
|
import pytest
|
|
|
|
from langchain.document_loaders import GeoDataFrameLoader
|
|
from langchain.schema import Document
|
|
|
|
if TYPE_CHECKING:
|
|
from geopandas import GeoDataFrame
|
|
else:
|
|
GeoDataFrame = "geopandas.GeoDataFrame"
|
|
|
|
|
|
@pytest.mark.requires("geopandas")
|
|
def sample_gdf() -> GeoDataFrame:
|
|
path_to_data = geopandas.datasets.get_path("nybb")
|
|
gdf = geopandas.read_file(path_to_data)
|
|
gdf["area"] = gdf.area
|
|
gdf["crs"] = gdf.crs.to_string()
|
|
return gdf.head(2)
|
|
|
|
|
|
@pytest.mark.requires("geopandas")
|
|
def test_load_returns_list_of_documents(sample_gdf: GeoDataFrame) -> None:
|
|
loader = GeoDataFrameLoader(sample_gdf)
|
|
docs = loader.load()
|
|
assert isinstance(docs, list)
|
|
assert all(isinstance(doc, Document) for doc in docs)
|
|
assert len(docs) == 2
|
|
|
|
|
|
@pytest.mark.requires("geopandas")
|
|
def test_load_converts_dataframe_columns_to_document_metadata(
|
|
sample_gdf: GeoDataFrame,
|
|
) -> None:
|
|
loader = GeoDataFrameLoader(sample_gdf)
|
|
docs = loader.load()
|
|
for i, doc in enumerate(docs):
|
|
assert doc.metadata["area"] == sample_gdf.loc[i, "area"]
|
|
assert doc.metadata["crs"] == sample_gdf.loc[i, "crs"]
|