diff --git a/README.rst b/README.rst index 5cda54a..4f2faba 100644 --- a/README.rst +++ b/README.rst @@ -126,10 +126,6 @@ JavaScript Support Let's grab some text that's rendered by JavaScript: -.. code-block:: shell - - $ pipenv install requests-html[browser] - .. code-block:: pycon >>> from requests_html import BrowserHTMLSession diff --git a/requests_html.py b/requests_html.py index 2f4290a..16376fa 100644 --- a/requests_html.py +++ b/requests_html.py @@ -1,6 +1,6 @@ import asyncio from urllib.parse import urlparse, urlunparse - +from concurrent.futures._base import TimeoutError import pyppeteer import requests from pyquery import PyQuery @@ -299,21 +299,34 @@ class BrowserHTMLSession(HTMLSession): return r @staticmethod - def render(source_url): - """Fully render HTML, JavaScript and all.""" + def render(source_url, retries=8): + """Fully render HTML, JavaScript and all. + Will attempt 8 times by default. + """ async def _async_render(url): - browser = pyppeteer.launch() - page = await browser.newPage() + try: + browser = pyppeteer.launch() + page = await browser.newPage() - # Load the given page (GET request, obviously.) - await page.goto(url) + # Load the given page (GET request, obviously.) + await page.goto(url) + + # Return the content of the page, JavaScript evaluated. + return await page.content() + except TimeoutError: + return None - # Return the content of the page, JavaScript evaluated. - return await page.content() loop = asyncio.get_event_loop() - content = loop.run_until_complete(_async_render(source_url)) + content = None + + for i in range(retries): + if not content: + try: + content = loop.run_until_complete(_async_render(source_url)) + except TimeoutError: + pass return content