From 19a204c9f77fa0cd63c8cfe1e95fbd5e85105973 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 27 Feb 2018 10:14:47 -0500 Subject: [PATCH] fixes Signed-off-by: Kenneth Reitz --- README.rst | 4 ---- requests_html.py | 33 +++++++++++++++++++++++---------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index 5cda54a..4f2faba 100644 --- a/README.rst +++ b/README.rst @@ -126,10 +126,6 @@ JavaScript Support Let's grab some text that's rendered by JavaScript: -.. code-block:: shell - - $ pipenv install requests-html[browser] - .. code-block:: pycon >>> from requests_html import BrowserHTMLSession diff --git a/requests_html.py b/requests_html.py index 2f4290a..16376fa 100644 --- a/requests_html.py +++ b/requests_html.py @@ -1,6 +1,6 @@ import asyncio from urllib.parse import urlparse, urlunparse - +from concurrent.futures._base import TimeoutError import pyppeteer import requests from pyquery import PyQuery @@ -299,21 +299,34 @@ class BrowserHTMLSession(HTMLSession): return r @staticmethod - def render(source_url): - """Fully render HTML, JavaScript and all.""" + def render(source_url, retries=8): + """Fully render HTML, JavaScript and all. + Will attempt 8 times by default. + """ async def _async_render(url): - browser = pyppeteer.launch() - page = await browser.newPage() + try: + browser = pyppeteer.launch() + page = await browser.newPage() - # Load the given page (GET request, obviously.) - await page.goto(url) + # Load the given page (GET request, obviously.) + await page.goto(url) + + # Return the content of the page, JavaScript evaluated. + return await page.content() + except TimeoutError: + return None - # Return the content of the page, JavaScript evaluated. - return await page.content() loop = asyncio.get_event_loop() - content = loop.run_until_complete(_async_render(source_url)) + content = None + + for i in range(retries): + if not content: + try: + content = loop.run_until_complete(_async_render(source_url)) + except TimeoutError: + pass return content