mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
+7
-1
@@ -214,8 +214,14 @@ class HTML(BaseParser):
|
||||
"""An HTML document, ready for parsing."""
|
||||
|
||||
def __init__(self, *, url=DEFAULT_URL, html, default_encoding=DEFAULT_ENCODING) -> None:
|
||||
|
||||
# Convert incoming unicode HTML into bytes.
|
||||
if isinstance(html, str):
|
||||
html = html.encode(DEFAULT_ENCODING)
|
||||
|
||||
super(HTML, self).__init__(
|
||||
element=PyQuery(html)('html'),
|
||||
# Convert unicode HTML to bytes.
|
||||
element=PyQuery(html)('html') or PyQuery('<html>{}</html>'.format(html))('html'),
|
||||
html=html,
|
||||
url=url,
|
||||
default_encoding=default_encoding
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
|
||||
from requests_html import HTMLSession
|
||||
from requests_html import HTMLSession, HTML
|
||||
from requests_file import FileAdapter
|
||||
|
||||
session = HTMLSession()
|
||||
@@ -59,6 +59,14 @@ def test_xpath():
|
||||
html = r.html.xpath('/html', first=True)
|
||||
assert 'no-js' in html.attrs['class']
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_css_selector()
|
||||
|
||||
def test_html_loading():
|
||||
doc = """<a href='https://httpbin.org'>"""
|
||||
html = HTML(html=doc)
|
||||
|
||||
assert 'https://httpbin.org' in html.links
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_html_loading()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user