Files
langchain/langchain/document_loaders/html.py
T
2023-02-12 07:36:11 -08:00

14 lines
408 B
Python

"""Loader that loads PDF files."""
from typing import List
from langchain.document_loaders.unstructured import UnstructuredFileLoader
class UnstructuredHTMLLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load HTML files."""
def _get_elements(self) -> List:
from unstructured.partition.html import partition_html
return partition_html(filename=self.file_path)