Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
This commit is contained in:
2018-02-27 10:14:47 -05:00
parent a1186f3326
commit 19a204c9f7
2 changed files with 23 additions and 14 deletions
-4
View File
@@ -126,10 +126,6 @@ JavaScript Support
Let's grab some text that's rendered by JavaScript:
.. code-block:: shell
$ pipenv install requests-html[browser]
.. code-block:: pycon
>>> from requests_html import BrowserHTMLSession
+23 -10
View File
@@ -1,6 +1,6 @@
import asyncio
from urllib.parse import urlparse, urlunparse
from concurrent.futures._base import TimeoutError
import pyppeteer
import requests
from pyquery import PyQuery
@@ -299,21 +299,34 @@ class BrowserHTMLSession(HTMLSession):
return r
@staticmethod
def render(source_url):
"""Fully render HTML, JavaScript and all."""
def render(source_url, retries=8):
"""Fully render HTML, JavaScript and all.
Will attempt 8 times by default.
"""
async def _async_render(url):
browser = pyppeteer.launch()
page = await browser.newPage()
try:
browser = pyppeteer.launch()
page = await browser.newPage()
# Load the given page (GET request, obviously.)
await page.goto(url)
# Load the given page (GET request, obviously.)
await page.goto(url)
# Return the content of the page, JavaScript evaluated.
return await page.content()
except TimeoutError:
return None
# Return the content of the page, JavaScript evaluated.
return await page.content()
loop = asyncio.get_event_loop()
content = loop.run_until_complete(_async_render(source_url))
content = None
for i in range(retries):
if not content:
try:
content = loop.run_until_complete(_async_render(source_url))
except TimeoutError:
pass
return content