mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
@@ -14,6 +14,7 @@ parse = "*"
|
||||
"bs4" = "*"
|
||||
"pyqt5" = "*"
|
||||
"w3lib" = "*"
|
||||
pyppeteer = "*"
|
||||
|
||||
|
||||
[dev-packages]
|
||||
|
||||
Generated
+35
-1
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "f6a1a62089049c03d073d0255f1547245bfb4277c62c8df273afc3fcc4f5e127"
|
||||
"sha256": "9af93ac7145d6f8f0f24b28c59064699450344bdd45f18d3c4383647a1a08f03"
|
||||
},
|
||||
"host-environment-markers": {
|
||||
"implementation_name": "cpython",
|
||||
@@ -114,6 +114,19 @@
|
||||
],
|
||||
"version": "==1.8.2"
|
||||
},
|
||||
"pyee": {
|
||||
"hashes": [
|
||||
"sha256:47f8fa96d6dee61c82001831e1fbba55f3f808003a322d0e6653aa01c59f6b9e",
|
||||
"sha256:4ec22817297b7024f89721cc34f790ee2767c5b5ca44284c565ee643abafbe32"
|
||||
],
|
||||
"version": "==5.0.0"
|
||||
},
|
||||
"pyppeteer": {
|
||||
"hashes": [
|
||||
"sha256:596929fb7d052048679081d3dc2a998cf065e936a752c7ba2392445d6e0e9706"
|
||||
],
|
||||
"version": "==0.0.10"
|
||||
},
|
||||
"pyqt5": {
|
||||
"hashes": [
|
||||
"sha256:128285176240e990fce9c50293105ffd0d2884d8910bb338118f867b171ec6e8",
|
||||
@@ -174,6 +187,27 @@
|
||||
"sha256:55994787e93b411c2d659068b51b9998d9d0c05e0df188e6daf8f45836e1ea38"
|
||||
],
|
||||
"version": "==1.19.0"
|
||||
},
|
||||
"websockets": {
|
||||
"hashes": [
|
||||
"sha256:f5192da704535a7cbf76d6e99c1ec4af7e8d1288252bf5a2385d414509ded0cf",
|
||||
"sha256:0c31bc832d529dc7583d324eb6c836a4f362032a1902723c112cf57883488d8c",
|
||||
"sha256:da7610a017f5343fdf765f4e0eb6fd0dfd08264ca1565212b110836d9367fc9c",
|
||||
"sha256:fd81af8cf3e69f9a97f3a6c0623a0527de0f922c2df725f00cd7646d478af632",
|
||||
"sha256:3d425ae081fb4ba1eef9ecf30472ffd79f8e868297ccc7a47993c96dbf2a819c",
|
||||
"sha256:ebdd4f18fe7e3bea9bd3bf446b0f4117739478caa2c76e4f0fb72cc45b03cbd7",
|
||||
"sha256:3859ca16c229ddb0fa21c5090e4efcb037c08ce69b0c1dfed6122c3f98cd0c22",
|
||||
"sha256:d1a0572b6edb22c9208e3e5381064e09d287d2a915f90233fef994ee7a14a935",
|
||||
"sha256:80188abdadd23edaaea05ce761dc9a2e1df31a74a0533967f0dcd9560c85add0",
|
||||
"sha256:fecf51c13195c416c22422353b306dddb9c752e4b80b21e0fa1fccbe38246677",
|
||||
"sha256:367ff945bc0950ad9634591e2afe50bf2222bc4fad1088a386c4bb700888026e",
|
||||
"sha256:6df87698022aef2596bffdfecc96d656db59c8d719708c8a471daa815ee61656",
|
||||
"sha256:341824d8c9ad53fc43cca3fa9407f294125fa258592f7676640396501448e57e",
|
||||
"sha256:64896a6b3368c959b8096b655e46f03dfa65b96745249f374bd6a35705cc3489",
|
||||
"sha256:1f3e5a52cab6daa3d432c7b0de0a14109be39d2bfaad033ee5de4a3d3e11dcdf",
|
||||
"sha256:da4d4fbe059b0453e726d6d993760065d69b823a27efc3040402a6fcfe6a1ed9"
|
||||
],
|
||||
"version": "==4.0.1"
|
||||
}
|
||||
},
|
||||
"develop": {
|
||||
|
||||
+16
-29
@@ -1,5 +1,7 @@
|
||||
import asyncio
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
import pyppeteer
|
||||
import requests
|
||||
from pyquery import PyQuery
|
||||
|
||||
@@ -10,11 +12,7 @@ from parse import search as parse_search
|
||||
from parse import findall
|
||||
from w3lib.encoding import html_to_unicode
|
||||
|
||||
try:
|
||||
from PyQt5.QtWidgets import QApplication
|
||||
from PyQt5.QtWebEngineWidgets import QWebEngineView
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
DEFAULT_ENCODING = 'utf-8'
|
||||
@@ -154,7 +152,8 @@ class BaseParser:
|
||||
try:
|
||||
href = link.attrs['href'].strip()
|
||||
if not href.startswith('#') and self.skip_anchors and href not in ['javascript:;']:
|
||||
yield href
|
||||
if href:
|
||||
yield href
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
@@ -285,7 +284,7 @@ class HTMLSession(requests.Session):
|
||||
|
||||
class BrowserHTMLSession(HTMLSession):
|
||||
"""A web-browser interpreted session (for JavaScript), powered by
|
||||
PyQt5's QWebEngineView."""
|
||||
`PyPpeteer <https://pypi.python.org/pypi/pyppeteer>`_."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(BrowserHTMLSession, self).__init__(*args, **kwargs)
|
||||
@@ -294,7 +293,7 @@ class BrowserHTMLSession(HTMLSession):
|
||||
# Convert Request object into HTTPRequest object.
|
||||
r = super(BrowserHTMLSession, self).request(*args, **kwargs)
|
||||
|
||||
r._content = self.render(r.text).encode(DEFAULT_ENCODING)
|
||||
r._content = self.render(r.url).encode(DEFAULT_ENCODING)
|
||||
r.encoding = DEFAULT_ENCODING
|
||||
|
||||
return r
|
||||
@@ -303,30 +302,18 @@ class BrowserHTMLSession(HTMLSession):
|
||||
def render(source_url):
|
||||
"""Fully render HTML, JavaScript and all."""
|
||||
|
||||
if 'QApplication' not in globals():
|
||||
raise RuntimeError('PyQt5 must be installed.')
|
||||
async def _async_render(url):
|
||||
browser = pyppeteer.launch()
|
||||
page = await browser.newPage()
|
||||
await page.goto(url)
|
||||
|
||||
class Render(QWebEngineView):
|
||||
def __init__(self, html):
|
||||
self.html = None
|
||||
self.app = QApplication([])
|
||||
QWebEngineView.__init__(self)
|
||||
self.loadFinished.connect(self._loadFinished)
|
||||
self.setHtml(html)
|
||||
# self.load(QUrl(url))
|
||||
self.app.exec_()
|
||||
content = await page.content()
|
||||
return content
|
||||
|
||||
def _loadFinished(self, result):
|
||||
# This is an async call, you need to wait for this
|
||||
# to be called before closing the app
|
||||
self.page().toHtml(self._callable)
|
||||
loop = asyncio.get_event_loop()
|
||||
content = loop.run_until_complete(_async_render(source_url))
|
||||
|
||||
def _callable(self, data):
|
||||
self.html = data
|
||||
# Data has been stored, it's safe to quit the app
|
||||
self.app.quit()
|
||||
|
||||
return Render(source_url).html
|
||||
return content
|
||||
|
||||
|
||||
# Backwards compatiblity.
|
||||
|
||||
@@ -21,7 +21,7 @@ VERSION = '0.3.5'
|
||||
|
||||
# What packages are required for this module to be executed?
|
||||
REQUIRED = [
|
||||
'requests', 'pyquery', 'fake-useragent', 'parse', 'bs4', 'w3lib'
|
||||
'requests', 'pyquery', 'fake-useragent', 'parse', 'bs4', 'w3lib', 'pyppeteer'
|
||||
]
|
||||
|
||||
# The rest you shouldn't have to touch too much :)
|
||||
@@ -79,9 +79,6 @@ setup(
|
||||
author_email=EMAIL,
|
||||
url=URL,
|
||||
python_requires='>=3.5.0',
|
||||
extras_require={
|
||||
'browser': ['PyQt5'],
|
||||
},
|
||||
# If your package is a single module, use this instead of 'packages':
|
||||
py_modules=['requests_html'],
|
||||
|
||||
|
||||
Reference in New Issue
Block a user