Files
langchain/tests/integration_tests/document_loaders/test_url_playwright.py
T
EllieRoseS c087ce74f7 Added matching async load func to PlaywrightURLLoader (#5938)
Fixes # (issue)

The existing PlaywrightURLLoader load() function uses a synchronous
browser which is not compatible with jupyter.
This PR adds a sister function aload() which can be run insisde a
notebook.

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-07-13 17:51:38 -04:00

42 lines
1.1 KiB
Python

"""Tests for the Playwright URL loader"""
import pytest
from langchain.document_loaders import PlaywrightURLLoader
def test_playwright_url_loader() -> None:
"""Test Playwright URL loader."""
urls = [
"https://www.youtube.com/watch?v=dQw4w9WgXcQ",
"https://goo.gl/maps/NDSHwePEyaHMFGwh8",
"https://techmeme.com",
"https://techcrunch.com",
]
loader = PlaywrightURLLoader(
urls=urls,
remove_selectors=["header", "footer"],
continue_on_failure=False,
headless=True,
)
docs = loader.load()
assert len(docs) > 0
@pytest.mark.asyncio
async def test_playwright_async_url_loader() -> None:
"""Test Playwright async URL loader."""
urls = [
"https://www.youtube.com/watch?v=dQw4w9WgXcQ",
"https://goo.gl/maps/NDSHwePEyaHMFGwh8",
"https://techmeme.com",
"https://techcrunch.com",
]
loader = PlaywrightURLLoader(
urls=urls,
remove_selectors=["header", "footer"],
continue_on_failure=False,
headless=True,
)
docs = await loader.aload()
assert len(docs) > 0