import os
from functools import partial
import pytest
from pyppeteer.browser import Browser
from pyppeteer.page import Page
from requests_html import HTMLSession, AsyncHTMLSession, HTML
from requests_file import FileAdapter
session = HTMLSession()
session.mount('file://', FileAdapter())
def get():
path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html'))
url = 'file://{}'.format(path)
return session.get(url)
@pytest.fixture
def async_get(event_loop):
""" AsyncSession cannot be created global since it will create
a different loop from pytest-asyncio. """
async_session = AsyncHTMLSession()
async_session.mount('file://', FileAdapter())
path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html'))
url = 'file://{}'.format(path)
return partial(async_session.get, url)
@pytest.mark.ok
def test_file_get():
r = get()
assert r.status_code == 200
@pytest.mark.ok
@pytest.mark.asyncio
async def test_async_file_get(async_get):
r = await async_get()
assert r.status_code == 200
@pytest.mark.ok
def test_class_seperation():
r = get()
about = r.html.find('#about', first=True)
assert len(about.attrs['class']) == 2
@pytest.mark.ok
def test_css_selector():
r = get()
about = r.html.find('#about', first=True)
for menu_item in (
'About', 'Applications', 'Quotes', 'Getting Started', 'Help',
'Python Brochure'
):
assert menu_item in about.text.split('\n')
assert menu_item in about.full_text.split('\n')
@pytest.mark.ok
def test_containing():
r = get()
python = r.html.find(containing='python')
assert len(python) == 192
for e in python:
assert 'python' in e.full_text.lower()
@pytest.mark.ok
def test_attrs():
r = get()
about = r.html.find('#about', first=True)
assert 'aria-haspopup' in about.attrs
assert len(about.attrs['class']) == 2
@pytest.mark.ok
def test_links():
r = get()
about = r.html.find('#about', first=True)
assert len(about.links) == 6
assert len(about.absolute_links) == 6
@pytest.mark.ok
@pytest.mark.asyncio
async def test_async_links(async_get):
r = await async_get()
about = r.html.find('#about', first=True)
assert len(about.links) == 6
assert len(about.absolute_links) == 6
@pytest.mark.ok
def test_search():
r = get()
style = r.html.search('Python is a {} language')[0]
assert style == 'programming'
@pytest.mark.ok
def test_xpath():
r = get()
html = r.html.xpath('/html', first=True)
assert 'no-js' in html.attrs['class']
a_hrefs = r.html.xpath('//a/@href')
assert '#site-map' in a_hrefs
@pytest.mark.ok
def test_html_loading():
doc = """"""
html = HTML(html=doc)
assert 'https://httpbin.org' in html.links
assert isinstance(html.raw_html, bytes)
assert isinstance(html.html, str)
@pytest.mark.ok
def test_anchor_links():
r = get()
r.html.skip_anchors = False
assert '#site-map' in r.html.links
@pytest.mark.ok
@pytest.mark.parametrize('url,link,expected', [
('http://example.com/', 'test.html', 'http://example.com/test.html'),
('http://example.com', 'test.html', 'http://example.com/test.html'),
('http://example.com/foo/', 'test.html', 'http://example.com/foo/test.html'),
('http://example.com/foo/bar', 'test.html', 'http://example.com/foo/test.html'),
('http://example.com/foo/', '/test.html', 'http://example.com/test.html'),
('http://example.com/', 'http://xkcd.com/about/', 'http://xkcd.com/about/'),
('http://example.com/', '//xkcd.com/about/', 'http://xkcd.com/about/'),
])
def test_absolute_links(url, link, expected):
head_template = """"""
body_template = """Next"""
# Test without `` tag (url is base)
html = HTML(html=body_template.format(link), url=url)
assert html.absolute_links.pop() == expected
# Test with `` tag (url is other)
html = HTML(
html=head_template.format(url) + body_template.format(link),
url='http://example.com/foobar/')
assert html.absolute_links.pop() == expected
@pytest.mark.parser
def test_parser():
doc = """httpbin.org\n"""
html = HTML(html=doc)
assert html.find('html')
assert html.element('a').text().strip() == 'httpbin.org'
@pytest.mark.render
def test_render():
r = get()
script = """
() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
}
}
"""
val = r.html.render(script=script)
for value in ('width', 'height', 'deviceScaleFactor'):
assert value in val
about = r.html.find('#about', first=True)
assert len(about.links) == 6
@pytest.mark.render
@pytest.mark.asyncio
async def test_async_render(async_get):
r = await async_get()
script = """
() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
}
}
"""
val = await r.html.arender(script=script)
for value in ('width', 'height', 'deviceScaleFactor'):
assert value in val
about = r.html.find('#about', first=True)
assert len(about.links) == 6
await r.html.browser.close()
@pytest.mark.render
def test_bare_render():
doc = """"""
html = HTML(html=doc)
script = """
() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
}
}
"""
val = html.render(script=script, reload=False)
for value in ('width', 'height', 'deviceScaleFactor'):
assert value in val
assert html.find('html')
assert 'https://httpbin.org' in html.links
@pytest.mark.render
@pytest.mark.asyncio
async def test_bare_arender():
doc = """"""
html = HTML(html=doc, async_=True)
script = """
() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
}
}
"""
val = await html.arender(script=script, reload=False)
for value in ('width', 'height', 'deviceScaleFactor'):
assert value in val
assert html.find('html')
assert 'https://httpbin.org' in html.links
await html.browser.close()
@pytest.mark.render
def test_bare_js_eval():
doc = """
This gets replaced
"""
html = HTML(html=doc)
html.render()
assert html.find('#replace', first=True).text == 'yolo'
@pytest.mark.render
@pytest.mark.asyncio
async def test_bare_js_async_eval():
doc = """
This gets replaced
"""
html = HTML(html=doc, async_=True)
await html.arender()
assert html.find('#replace', first=True).text == 'yolo'
await html.browser.close()
@pytest.mark.ok
def test_browser_session():
""" Test browser instaces is created and properly close when session is closed.
Note: session.close method need to be tested together with browser creation,
since no doing that will left the browser running. """
session = HTMLSession()
assert isinstance(session.browser, Browser)
assert hasattr(session, "loop") == True
session.close()
# assert count_chromium_process() == 0
@pytest.mark.ok
@pytest.mark.asyncio
async def test_browser_session_fail():
""" HTMLSession.browser should not be call within an existing event loop> """
session = HTMLSession()
with pytest.raises(RuntimeError):
session.browser
@pytest.mark.ok
def test_browser_process():
for _ in range(3):
r = get()
r.html.render()
assert r.html.page == None
@pytest.mark.ok
@pytest.mark.asyncio
async def test_async_browser_session():
session = AsyncHTMLSession()
browser = await session.browser
assert isinstance(browser, Browser)
await session.close()
if __name__ == '__main__':
test_containing()