mirror of
https://github.com/kennethreitz/langchain.git
synced 2026-06-05 23:00:18 +00:00
aa9d5707e0
This PR contributes a `PythonLoader`, which inherits from `TextLoader` but detects and sets the encoding automatically.
15 lines
405 B
Python
15 lines
405 B
Python
import tokenize
|
|
|
|
from langchain.document_loaders.text import TextLoader
|
|
|
|
|
|
class PythonLoader(TextLoader):
|
|
"""
|
|
Load Python files, respecting any non-default encoding if specified.
|
|
"""
|
|
|
|
def __init__(self, file_path: str):
|
|
with open(file_path, "rb") as f:
|
|
encoding, _ = tokenize.detect_encoding(f.readline)
|
|
super().__init__(file_path=file_path, encoding=encoding)
|