mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
Add arender method to HTML
This commit is contained in:
+62
-33
@@ -499,6 +499,45 @@ class HTML(BaseParser):
|
||||
def add_next_symbol(self, next_symbol):
|
||||
self.next_symbol.append(next_symbol)
|
||||
|
||||
async def _async_render(self, *, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int], keep_page: bool):
|
||||
""" Handle page creation and js rendering. Internal use for render/arender methods. """
|
||||
try:
|
||||
page = await self.browser.newPage()
|
||||
|
||||
# Wait before rendering the page, to prevent timeouts.
|
||||
await asyncio.sleep(wait)
|
||||
|
||||
# Load the given page (GET request, obviously.)
|
||||
if reload:
|
||||
await page.goto(url, options={'timeout': int(timeout * 1000)})
|
||||
else:
|
||||
await page.goto(f'data:text/html,{self.html}', options={'timeout': int(timeout * 1000)})
|
||||
|
||||
result = None
|
||||
if script:
|
||||
result = await page.evaluate(script)
|
||||
|
||||
if scrolldown:
|
||||
for _ in range(scrolldown):
|
||||
await page._keyboard.down('PageDown')
|
||||
await asyncio.sleep(sleep)
|
||||
else:
|
||||
await asyncio.sleep(sleep)
|
||||
|
||||
if scrolldown:
|
||||
await page._keyboard.up('PageDown')
|
||||
|
||||
# Return the content of the page, JavaScript evaluated.
|
||||
content = await page.content()
|
||||
if not keep_page:
|
||||
await page.close()
|
||||
page = None
|
||||
return content, result, page
|
||||
except TimeoutError:
|
||||
await page.close()
|
||||
page = None
|
||||
return None
|
||||
|
||||
def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False):
|
||||
"""Reloads the response in Chromium, and replaces HTML content
|
||||
with an updated version, with JavaScript executed.
|
||||
@@ -543,45 +582,35 @@ class HTML(BaseParser):
|
||||
Warning: the first time you run this method, it will download
|
||||
Chromium into your home directory (``~/.pyppeteer``).
|
||||
"""
|
||||
async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int], keep_page: bool):
|
||||
try:
|
||||
page = await self.session.browser.newPage()
|
||||
|
||||
# Wait before rendering the page, to prevent timeouts.
|
||||
await asyncio.sleep(wait)
|
||||
self.browser = self.session.browser # Automatycally create a event loop and browser
|
||||
content = None
|
||||
|
||||
# Load the given page (GET request, obviously.)
|
||||
if reload:
|
||||
await page.goto(url, options={'timeout': int(timeout * 1000)})
|
||||
else:
|
||||
await page.goto(f'data:text/html,{self.html}', options={'timeout': int(timeout * 1000)})
|
||||
# Automatically set Reload to False, if example URL is being used.
|
||||
if self.url == DEFAULT_URL:
|
||||
reload = False
|
||||
|
||||
result = None
|
||||
if script:
|
||||
result = await page.evaluate(script)
|
||||
for i in range(retries):
|
||||
if not content:
|
||||
try:
|
||||
|
||||
if scrolldown:
|
||||
for _ in range(scrolldown):
|
||||
await page._keyboard.down('PageDown')
|
||||
await asyncio.sleep(sleep)
|
||||
else:
|
||||
await asyncio.sleep(sleep)
|
||||
content, result, page = self.session.loop.run_until_complete(self._async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page))
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
break
|
||||
|
||||
if scrolldown:
|
||||
await page._keyboard.up('PageDown')
|
||||
if not content:
|
||||
raise MaxRetries("Unable to render the page. Try increasing timeout")
|
||||
|
||||
# Return the content of the page, JavaScript evaluated.
|
||||
content = await page.content()
|
||||
if not keep_page:
|
||||
await page.close()
|
||||
page = None
|
||||
return content, result, page
|
||||
except TimeoutError:
|
||||
await page.close()
|
||||
page = None
|
||||
return None
|
||||
html = HTML(url=self.url, html=content.encode(DEFAULT_ENCODING), default_encoding=DEFAULT_ENCODING)
|
||||
self.__dict__.update(html.__dict__)
|
||||
self.page = page
|
||||
return result
|
||||
|
||||
self.session.browser # Automatically create a event loop and browser
|
||||
async def arender(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False):
|
||||
""" Async version of render. Takes same parameters. """
|
||||
self.browser = await self.session.browser
|
||||
content = None
|
||||
|
||||
# Automatically set Reload to False, if example URL is being used.
|
||||
@@ -592,7 +621,7 @@ class HTML(BaseParser):
|
||||
if not content:
|
||||
try:
|
||||
|
||||
content, result, page = self.session.loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page))
|
||||
content, result, page = await self._async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page)
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
|
||||
@@ -193,6 +193,28 @@ def test_render():
|
||||
assert len(about.links) == 6
|
||||
|
||||
|
||||
@pytest.mark.render
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_render(async_get):
|
||||
r = await async_get()
|
||||
script = """
|
||||
() => {
|
||||
return {
|
||||
width: document.documentElement.clientWidth,
|
||||
height: document.documentElement.clientHeight,
|
||||
deviceScaleFactor: window.devicePixelRatio,
|
||||
}
|
||||
}
|
||||
"""
|
||||
val = await r.html.arender(script=script)
|
||||
for value in ('width', 'height', 'deviceScaleFactor'):
|
||||
assert value in val
|
||||
|
||||
about = r.html.find('#about', first=True)
|
||||
assert len(about.links) == 6
|
||||
await r.html.browser.close()
|
||||
|
||||
|
||||
@pytest.mark.render
|
||||
def test_bare_render():
|
||||
doc = """<a href='https://httpbin.org'>"""
|
||||
|
||||
Reference in New Issue
Block a user