Files
langchain/tests/integration_tests/document_loaders/test_geodataframe.py
T
Brendan Collins 9aef79c2e3 Add Geopandas.GeoDataFrame Document Loader (#3817)
Work in Progress.
WIP
Not ready...

Adds Document Loader support for
[Geopandas.GeoDataFrames](https://geopandas.org/)

Example:
- [x] stub out `GeoDataFrameLoader` class
- [x] stub out integration tests
- [ ] Experiment with different geometry text representations
- [ ] Verify CRS is successfully added in metadata
- [ ] Test effectiveness of searches on geometries
- [ ] Test with different geometry types (point, line, polygon with
multi-variants).
- [ ] Add documentation

---------

Co-authored-by: Lance Martin <lance@langchain.dev>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Lance Martin <122662504+rlancemartin@users.noreply.github.com>
2023-07-19 12:14:41 -07:00

42 lines
1.2 KiB
Python

from typing import TYPE_CHECKING
import geopandas
import pytest
from langchain.document_loaders import GeoDataFrameLoader
from langchain.schema import Document
if TYPE_CHECKING:
from geopandas import GeoDataFrame
else:
GeoDataFrame = "geopandas.GeoDataFrame"
@pytest.mark.requires("geopandas")
def sample_gdf() -> GeoDataFrame:
path_to_data = geopandas.datasets.get_path("nybb")
gdf = geopandas.read_file(path_to_data)
gdf["area"] = gdf.area
gdf["crs"] = gdf.crs.to_string()
return gdf.head(2)
@pytest.mark.requires("geopandas")
def test_load_returns_list_of_documents(sample_gdf: GeoDataFrame) -> None:
loader = GeoDataFrameLoader(sample_gdf)
docs = loader.load()
assert isinstance(docs, list)
assert all(isinstance(doc, Document) for doc in docs)
assert len(docs) == 2
@pytest.mark.requires("geopandas")
def test_load_converts_dataframe_columns_to_document_metadata(
sample_gdf: GeoDataFrame,
) -> None:
loader = GeoDataFrameLoader(sample_gdf)
docs = loader.load()
for i, doc in enumerate(docs):
assert doc.metadata["area"] == sample_gdf.loc[i, "area"]
assert doc.metadata["crs"] == sample_gdf.loc[i, "crs"]