mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
Update HTML.render to use session.browser anf close pages automatically
This commit is contained in:
+12
-6
@@ -480,7 +480,7 @@ class HTML(BaseParser):
|
||||
def add_next_symbol(self, next_symbol):
|
||||
self.next_symbol.append(next_symbol)
|
||||
|
||||
def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0):
|
||||
def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False):
|
||||
"""Reloads the response in Chromium, and replaces HTML content
|
||||
with an updated version, with JavaScript executed.
|
||||
|
||||
@@ -490,6 +490,7 @@ class HTML(BaseParser):
|
||||
:param scrolldown: Integer, if provided, of how many times to page down.
|
||||
:param sleep: Integer, if provided, of how many long to sleep after initial render.
|
||||
:param reload: If ``False``, content will not be loaded from the browser, but will be provided from memory.
|
||||
:param keep_page: If ``True`` will allow you to interact with the browser page through ``r.html.page``.
|
||||
|
||||
If ``scrolldown`` is specified, the page will scrolldown the specified
|
||||
number of times, after sleeping the specified amount of time
|
||||
@@ -520,13 +521,15 @@ class HTML(BaseParser):
|
||||
>>> r.html.render(script=script)
|
||||
{'width': 800, 'height': 600, 'deviceScaleFactor': 1}
|
||||
|
||||
Warning: If you use keep_page, you're responsable for closing each page, since
|
||||
opening to many at scale may crach the browser.
|
||||
|
||||
Warning: the first time you run this method, it will download
|
||||
Chromium into your home directory (``~/.pyppeteer``).
|
||||
"""
|
||||
async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int]):
|
||||
async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int], keep_page: bool):
|
||||
try:
|
||||
browser = await pyppeteer.launch(headless=True, args=['--no-sandbox'])
|
||||
page = await browser.newPage()
|
||||
page = await self.session.browser.newPage()
|
||||
|
||||
# Wait before rendering the page, to prevent timeouts.
|
||||
await asyncio.sleep(wait)
|
||||
@@ -553,11 +556,14 @@ class HTML(BaseParser):
|
||||
|
||||
# Return the content of the page, JavaScript evaluated.
|
||||
content = await page.content()
|
||||
if not keep_page:
|
||||
await page.close()
|
||||
page = None
|
||||
return content, result, page
|
||||
except TimeoutError:
|
||||
return None
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
self.session.browser # Automatycally create a event loop and browser
|
||||
content = None
|
||||
|
||||
# Automatically set Reload to False, if example URL is being used.
|
||||
@@ -568,7 +574,7 @@ class HTML(BaseParser):
|
||||
if not content:
|
||||
try:
|
||||
|
||||
content, result, page = loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout))
|
||||
content, result, page = self.session.loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page))
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
|
||||
@@ -4,6 +4,7 @@ from functools import partial
|
||||
import pytest
|
||||
import psutil
|
||||
from pyppeteer.browser import Browser
|
||||
from pyppeteer.page import Page
|
||||
from requests_html import HTMLSession, AsyncHTMLSession, HTML
|
||||
from requests_file import FileAdapter
|
||||
|
||||
@@ -247,5 +248,27 @@ def test_browser_session():
|
||||
assert count_chromium_process() == 0
|
||||
|
||||
|
||||
@pytest.mark.ok
|
||||
def test_browser_process():
|
||||
for _ in range(3):
|
||||
r = get()
|
||||
r.html.render()
|
||||
|
||||
assert r.html.page == None
|
||||
|
||||
assert count_chromium_process() == 2
|
||||
|
||||
|
||||
@pytest.mark.ok
|
||||
def test_browser_pages():
|
||||
for _ in range(3):
|
||||
r = get()
|
||||
r.html.render(keep_page=True)
|
||||
|
||||
assert isinstance(r.html.page, Page)
|
||||
|
||||
assert count_chromium_process() == 5 # 2 process for chromiun and 1 by each page
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_containing()
|
||||
|
||||
Reference in New Issue
Block a user