diff --git a/requests_html.py b/requests_html.py
index f6ab3f5..0807881 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -480,7 +480,7 @@ class HTML(BaseParser):
def add_next_symbol(self, next_symbol):
self.next_symbol.append(next_symbol)
- def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0):
+ def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False):
"""Reloads the response in Chromium, and replaces HTML content
with an updated version, with JavaScript executed.
@@ -490,6 +490,7 @@ class HTML(BaseParser):
:param scrolldown: Integer, if provided, of how many times to page down.
:param sleep: Integer, if provided, of how many long to sleep after initial render.
:param reload: If ``False``, content will not be loaded from the browser, but will be provided from memory.
+ :param keep_page: If ``True`` will allow you to interact with the browser page through ``r.html.page``.
If ``scrolldown`` is specified, the page will scrolldown the specified
number of times, after sleeping the specified amount of time
@@ -520,13 +521,15 @@ class HTML(BaseParser):
>>> r.html.render(script=script)
{'width': 800, 'height': 600, 'deviceScaleFactor': 1}
+ Warning: If you use keep_page, you're responsable for closing each page, since
+ opening to many at scale may crach the browser.
+
Warning: the first time you run this method, it will download
Chromium into your home directory (``~/.pyppeteer``).
"""
- async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int]):
+ async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int], keep_page: bool):
try:
- browser = await pyppeteer.launch(headless=True, args=['--no-sandbox'])
- page = await browser.newPage()
+ page = await self.session.browser.newPage()
# Wait before rendering the page, to prevent timeouts.
await asyncio.sleep(wait)
@@ -553,11 +556,14 @@ class HTML(BaseParser):
# Return the content of the page, JavaScript evaluated.
content = await page.content()
+ if not keep_page:
+ await page.close()
+ page = None
return content, result, page
except TimeoutError:
return None
- loop = asyncio.get_event_loop()
+ self.session.browser # Automatycally create a event loop and browser
content = None
# Automatically set Reload to False, if example URL is being used.
@@ -568,7 +574,7 @@ class HTML(BaseParser):
if not content:
try:
- content, result, page = loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout))
+ content, result, page = self.session.loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page))
except TypeError:
pass
else:
diff --git a/tests/test_requests_html.py b/tests/test_requests_html.py
index 54d2ff9..bf0070d 100644
--- a/tests/test_requests_html.py
+++ b/tests/test_requests_html.py
@@ -4,6 +4,7 @@ from functools import partial
import pytest
import psutil
from pyppeteer.browser import Browser
+from pyppeteer.page import Page
from requests_html import HTMLSession, AsyncHTMLSession, HTML
from requests_file import FileAdapter
@@ -247,5 +248,27 @@ def test_browser_session():
assert count_chromium_process() == 0
+@pytest.mark.ok
+def test_browser_process():
+ for _ in range(3):
+ r = get()
+ r.html.render()
+
+ assert r.html.page == None
+
+ assert count_chromium_process() == 2
+
+
+@pytest.mark.ok
+def test_browser_pages():
+ for _ in range(3):
+ r = get()
+ r.html.render(keep_page=True)
+
+ assert isinstance(r.html.page, Page)
+
+ assert count_chromium_process() == 5 # 2 process for chromiun and 1 by each page
+
+
if __name__ == '__main__':
test_containing()