diff --git a/requests_html.py b/requests_html.py
index e2a66f1..dea4664 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -499,6 +499,45 @@ class HTML(BaseParser):
def add_next_symbol(self, next_symbol):
self.next_symbol.append(next_symbol)
+ async def _async_render(self, *, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int], keep_page: bool):
+ """ Handle page creation and js rendering. Internal use for render/arender methods. """
+ try:
+ page = await self.browser.newPage()
+
+ # Wait before rendering the page, to prevent timeouts.
+ await asyncio.sleep(wait)
+
+ # Load the given page (GET request, obviously.)
+ if reload:
+ await page.goto(url, options={'timeout': int(timeout * 1000)})
+ else:
+ await page.goto(f'data:text/html,{self.html}', options={'timeout': int(timeout * 1000)})
+
+ result = None
+ if script:
+ result = await page.evaluate(script)
+
+ if scrolldown:
+ for _ in range(scrolldown):
+ await page._keyboard.down('PageDown')
+ await asyncio.sleep(sleep)
+ else:
+ await asyncio.sleep(sleep)
+
+ if scrolldown:
+ await page._keyboard.up('PageDown')
+
+ # Return the content of the page, JavaScript evaluated.
+ content = await page.content()
+ if not keep_page:
+ await page.close()
+ page = None
+ return content, result, page
+ except TimeoutError:
+ await page.close()
+ page = None
+ return None
+
def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False):
"""Reloads the response in Chromium, and replaces HTML content
with an updated version, with JavaScript executed.
@@ -543,45 +582,35 @@ class HTML(BaseParser):
Warning: the first time you run this method, it will download
Chromium into your home directory (``~/.pyppeteer``).
"""
- async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int], keep_page: bool):
- try:
- page = await self.session.browser.newPage()
- # Wait before rendering the page, to prevent timeouts.
- await asyncio.sleep(wait)
+ self.browser = self.session.browser # Automatycally create a event loop and browser
+ content = None
- # Load the given page (GET request, obviously.)
- if reload:
- await page.goto(url, options={'timeout': int(timeout * 1000)})
- else:
- await page.goto(f'data:text/html,{self.html}', options={'timeout': int(timeout * 1000)})
+ # Automatically set Reload to False, if example URL is being used.
+ if self.url == DEFAULT_URL:
+ reload = False
- result = None
- if script:
- result = await page.evaluate(script)
+ for i in range(retries):
+ if not content:
+ try:
- if scrolldown:
- for _ in range(scrolldown):
- await page._keyboard.down('PageDown')
- await asyncio.sleep(sleep)
- else:
- await asyncio.sleep(sleep)
+ content, result, page = self.session.loop.run_until_complete(self._async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page))
+ except TypeError:
+ pass
+ else:
+ break
- if scrolldown:
- await page._keyboard.up('PageDown')
+ if not content:
+ raise MaxRetries("Unable to render the page. Try increasing timeout")
- # Return the content of the page, JavaScript evaluated.
- content = await page.content()
- if not keep_page:
- await page.close()
- page = None
- return content, result, page
- except TimeoutError:
- await page.close()
- page = None
- return None
+ html = HTML(url=self.url, html=content.encode(DEFAULT_ENCODING), default_encoding=DEFAULT_ENCODING)
+ self.__dict__.update(html.__dict__)
+ self.page = page
+ return result
- self.session.browser # Automatically create a event loop and browser
+ async def arender(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False):
+ """ Async version of render. Takes same parameters. """
+ self.browser = await self.session.browser
content = None
# Automatically set Reload to False, if example URL is being used.
@@ -592,7 +621,7 @@ class HTML(BaseParser):
if not content:
try:
- content, result, page = self.session.loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page))
+ content, result, page = await self._async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page)
except TypeError:
pass
else:
diff --git a/tests/test_requests_html.py b/tests/test_requests_html.py
index 5d583dd..4a40c2d 100644
--- a/tests/test_requests_html.py
+++ b/tests/test_requests_html.py
@@ -193,6 +193,28 @@ def test_render():
assert len(about.links) == 6
+@pytest.mark.render
+@pytest.mark.asyncio
+async def test_async_render(async_get):
+ r = await async_get()
+ script = """
+ () => {
+ return {
+ width: document.documentElement.clientWidth,
+ height: document.documentElement.clientHeight,
+ deviceScaleFactor: window.devicePixelRatio,
+ }
+ }
+ """
+ val = await r.html.arender(script=script)
+ for value in ('width', 'height', 'deviceScaleFactor'):
+ assert value in val
+
+ about = r.html.find('#about', first=True)
+ assert len(about.links) == 6
+ await r.html.browser.close()
+
+
@pytest.mark.render
def test_bare_render():
doc = """"""