From a2cc6bfa55e931f68525256ba91fe8930e188241 Mon Sep 17 00:00:00 2001
From: Ordanis Sanchez <ordanisanchez@gmail.com>
Date: Tue, 20 Mar 2018 19:50:04 -0400
Subject: [PATCH] Update HTML.render to use session.browser anf close pages
 automatically

---
 requests_html.py            | 18 ++++++++++++------
 tests/test_requests_html.py | 23 +++++++++++++++++++++++
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/requests_html.py b/requests_html.py
index f6ab3f5..0807881 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -480,7 +480,7 @@ class HTML(BaseParser):
     def add_next_symbol(self, next_symbol):
         self.next_symbol.append(next_symbol)
 
-    def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0):
+    def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False):
         """Reloads the response in Chromium, and replaces HTML content
         with an updated version, with JavaScript executed.
 
@@ -490,6 +490,7 @@ class HTML(BaseParser):
         :param scrolldown: Integer, if provided, of how many times to page down.
         :param sleep: Integer, if provided, of how many long to sleep after initial render.
         :param reload: If ``False``, content will not be loaded from the browser, but will be provided from memory.
+        :param keep_page: If ``True`` will allow you to interact with the browser page through ``r.html.page``.
 
         If ``scrolldown`` is specified, the page will scrolldown the specified
         number of times, after sleeping the specified amount of time
@@ -520,13 +521,15 @@ class HTML(BaseParser):
             >>> r.html.render(script=script)
             {'width': 800, 'height': 600, 'deviceScaleFactor': 1}
 
+        Warning: If you use keep_page, you're responsable for closing each page, since
+        opening to many at scale may crach the browser.
+
         Warning: the first time you run this method, it will download
         Chromium into your home directory (``~/.pyppeteer``).
         """
-        async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int]):
+        async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int], keep_page: bool):
             try:
-                browser = await pyppeteer.launch(headless=True, args=['--no-sandbox'])
-                page = await browser.newPage()
+                page = await self.session.browser.newPage()
 
                 # Wait before rendering the page, to prevent timeouts.
                 await asyncio.sleep(wait)
@@ -553,11 +556,14 @@ class HTML(BaseParser):
 
                 # Return the content of the page, JavaScript evaluated.
                 content = await page.content()
+                if not keep_page:
+                    await page.close()
+                    page = None
                 return content, result, page
             except TimeoutError:
                 return None
 
-        loop = asyncio.get_event_loop()
+        self.session.browser  # Automatycally create a event loop and browser
         content = None
 
         # Automatically set Reload to False, if example URL is being used.
@@ -568,7 +574,7 @@ class HTML(BaseParser):
             if not content:
                 try:
 
-                    content, result, page = loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout))
+                    content, result, page = self.session.loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page))
                 except TypeError:
                     pass
             else:
diff --git a/tests/test_requests_html.py b/tests/test_requests_html.py
index 54d2ff9..bf0070d 100644
--- a/tests/test_requests_html.py
+++ b/tests/test_requests_html.py
@@ -4,6 +4,7 @@ from functools import partial
 import pytest
 import psutil
 from pyppeteer.browser import Browser
+from pyppeteer.page import Page
 from requests_html import HTMLSession, AsyncHTMLSession, HTML
 from requests_file import FileAdapter
 
@@ -247,5 +248,27 @@ def test_browser_session():
     assert count_chromium_process() == 0
 
 
+@pytest.mark.ok
+def test_browser_process():
+    for _ in range(3):
+        r = get()
+        r.html.render()
+
+        assert r.html.page == None
+
+    assert count_chromium_process() == 2
+
+
+@pytest.mark.ok
+def test_browser_pages():
+    for _ in range(3):
+        r = get()
+        r.html.render(keep_page=True)
+
+        assert isinstance(r.html.page, Page)
+
+    assert count_chromium_process() == 5  # 2 process for chromiun and 1 by each page
+
+
 if __name__ == '__main__':
     test_containing()