diff --git a/requests_html.py b/requests_html.py index 4c4a23f..85d5bb6 100644 --- a/requests_html.py +++ b/requests_html.py @@ -214,8 +214,14 @@ class HTML(BaseParser): """An HTML document, ready for parsing.""" def __init__(self, *, url=DEFAULT_URL, html, default_encoding=DEFAULT_ENCODING) -> None: + + # Convert incoming unicode HTML into bytes. + if isinstance(html, str): + html = html.encode(DEFAULT_ENCODING) + super(HTML, self).__init__( - element=PyQuery(html)('html'), + # Convert unicode HTML to bytes. + element=PyQuery(html)('html') or PyQuery('{}'.format(html))('html'), html=html, url=url, default_encoding=default_encoding diff --git a/tests/test_requests_html.py b/tests/test_requests_html.py index 540a6ee..8c0c912 100644 --- a/tests/test_requests_html.py +++ b/tests/test_requests_html.py @@ -1,6 +1,6 @@ import os -from requests_html import HTMLSession +from requests_html import HTMLSession, HTML from requests_file import FileAdapter session = HTMLSession() @@ -59,6 +59,14 @@ def test_xpath(): html = r.html.xpath('/html', first=True) assert 'no-js' in html.attrs['class'] -if __name__ == '__main__': - test_css_selector() + +def test_html_loading(): + doc = """""" + html = HTML(html=doc) + + assert 'https://httpbin.org' in html.links + + +if __name__ == '__main__': + test_html_loading()