mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
@@ -24,6 +24,7 @@ requests-file = "*"
|
||||
pytest = "*"
|
||||
"e1839a8" = {path = ".", editable = true}
|
||||
sphinx = "*"
|
||||
mypy = "*"
|
||||
|
||||
|
||||
[scripts]
|
||||
|
||||
Generated
+79
-1
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "9af93ac7145d6f8f0f24b28c59064699450344bdd45f18d3c4383647a1a08f03"
|
||||
"sha256": "ef6f9504ed9751cf2f4c5aef06e59838981c79d84fa1d36fb5ce258d8dba189f"
|
||||
},
|
||||
"host-environment-markers": {
|
||||
"implementation_name": "cpython",
|
||||
@@ -345,6 +345,13 @@
|
||||
],
|
||||
"version": "==1.0"
|
||||
},
|
||||
"mypy": {
|
||||
"hashes": [
|
||||
"sha256:aa668809ae0dbec5e9feb8929f4b5e1f9318a0a397447fa2f38c382a2ed6a036",
|
||||
"sha256:bd0c9a2fcf0c4f7a54a2b625f466fcc000d415f371298d96fa5d2acc69074aca"
|
||||
],
|
||||
"version": "==0.560"
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:99276dc6e3a7851f32027a68f1095cd3f77c148091b092ea867a351811cfe388",
|
||||
@@ -371,6 +378,20 @@
|
||||
],
|
||||
"version": "==0.6.0"
|
||||
},
|
||||
"psutil": {
|
||||
"hashes": [
|
||||
"sha256:82a06785db8eeb637b349006cc28a92e40cd190fefae9875246d18d0de7ccac8",
|
||||
"sha256:4152ae231709e3e8b80e26b6da20dc965a1a589959c48af1ed024eca6473f60d",
|
||||
"sha256:230eeb3aeb077814f3a2cd036ddb6e0f571960d327298cc914c02385c3e02a63",
|
||||
"sha256:a3286556d4d2f341108db65d8e20d0cd3fcb9a91741cb5eb496832d7daf2a97c",
|
||||
"sha256:94d4e63189f2593960e73acaaf96be235dd8a455fe2bcb37d8ad6f0e87f61556",
|
||||
"sha256:c91eee73eea00df5e62c741b380b7e5b6fdd553891bee5669817a3a38d036f13",
|
||||
"sha256:779ec7e7621758ca11a8d99a1064996454b3570154277cc21342a01148a49c28",
|
||||
"sha256:8a15d773203a1277e57b1d11a7ccdf70804744ef4a9518a87ab8436995c31a4b",
|
||||
"sha256:e2467e9312c2fa191687b89ff4bc2ad8843be4af6fb4dc95a7cc5f7d7a327b18"
|
||||
],
|
||||
"version": "==5.4.3"
|
||||
},
|
||||
"py": {
|
||||
"hashes": [
|
||||
"sha256:8cca5c229d225f8c1e3085be4fcf306090b00850fefad892f9d96c7b6e2f310f",
|
||||
@@ -378,6 +399,13 @@
|
||||
],
|
||||
"version": "==1.5.2"
|
||||
},
|
||||
"pyee": {
|
||||
"hashes": [
|
||||
"sha256:47f8fa96d6dee61c82001831e1fbba55f3f808003a322d0e6653aa01c59f6b9e",
|
||||
"sha256:4ec22817297b7024f89721cc34f790ee2767c5b5ca44284c565ee643abafbe32"
|
||||
],
|
||||
"version": "==5.0.0"
|
||||
},
|
||||
"pygments": {
|
||||
"hashes": [
|
||||
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
|
||||
@@ -397,6 +425,12 @@
|
||||
],
|
||||
"version": "==2.2.0"
|
||||
},
|
||||
"pyppeteer": {
|
||||
"hashes": [
|
||||
"sha256:596929fb7d052048679081d3dc2a998cf065e936a752c7ba2392445d6e0e9706"
|
||||
],
|
||||
"version": "==0.0.10"
|
||||
},
|
||||
"pyquery": {
|
||||
"hashes": [
|
||||
"sha256:07987c2ed2aed5cba29ff18af95e56e9eb04a2249f42ce47bddfb37f487229a3",
|
||||
@@ -488,6 +522,29 @@
|
||||
],
|
||||
"version": "==1.9.1"
|
||||
},
|
||||
"typed-ast": {
|
||||
"hashes": [
|
||||
"sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58",
|
||||
"sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a",
|
||||
"sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863",
|
||||
"sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded",
|
||||
"sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85",
|
||||
"sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6",
|
||||
"sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c",
|
||||
"sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6",
|
||||
"sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559",
|
||||
"sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892",
|
||||
"sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea",
|
||||
"sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87",
|
||||
"sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe",
|
||||
"sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9",
|
||||
"sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46",
|
||||
"sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9",
|
||||
"sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd",
|
||||
"sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa"
|
||||
],
|
||||
"version": "==1.1.0"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b",
|
||||
@@ -501,6 +558,27 @@
|
||||
"sha256:55994787e93b411c2d659068b51b9998d9d0c05e0df188e6daf8f45836e1ea38"
|
||||
],
|
||||
"version": "==1.19.0"
|
||||
},
|
||||
"websockets": {
|
||||
"hashes": [
|
||||
"sha256:f5192da704535a7cbf76d6e99c1ec4af7e8d1288252bf5a2385d414509ded0cf",
|
||||
"sha256:0c31bc832d529dc7583d324eb6c836a4f362032a1902723c112cf57883488d8c",
|
||||
"sha256:da7610a017f5343fdf765f4e0eb6fd0dfd08264ca1565212b110836d9367fc9c",
|
||||
"sha256:fd81af8cf3e69f9a97f3a6c0623a0527de0f922c2df725f00cd7646d478af632",
|
||||
"sha256:3d425ae081fb4ba1eef9ecf30472ffd79f8e868297ccc7a47993c96dbf2a819c",
|
||||
"sha256:ebdd4f18fe7e3bea9bd3bf446b0f4117739478caa2c76e4f0fb72cc45b03cbd7",
|
||||
"sha256:3859ca16c229ddb0fa21c5090e4efcb037c08ce69b0c1dfed6122c3f98cd0c22",
|
||||
"sha256:d1a0572b6edb22c9208e3e5381064e09d287d2a915f90233fef994ee7a14a935",
|
||||
"sha256:80188abdadd23edaaea05ce761dc9a2e1df31a74a0533967f0dcd9560c85add0",
|
||||
"sha256:fecf51c13195c416c22422353b306dddb9c752e4b80b21e0fa1fccbe38246677",
|
||||
"sha256:367ff945bc0950ad9634591e2afe50bf2222bc4fad1088a386c4bb700888026e",
|
||||
"sha256:6df87698022aef2596bffdfecc96d656db59c8d719708c8a471daa815ee61656",
|
||||
"sha256:341824d8c9ad53fc43cca3fa9407f294125fa258592f7676640396501448e57e",
|
||||
"sha256:64896a6b3368c959b8096b655e46f03dfa65b96745249f374bd6a35705cc3489",
|
||||
"sha256:1f3e5a52cab6daa3d432c7b0de0a14109be39d2bfaad033ee5de4a3d3e11dcdf",
|
||||
"sha256:da4d4fbe059b0453e726d6d993760065d69b823a27efc3040402a6fcfe6a1ed9"
|
||||
],
|
||||
"version": "==4.0.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+31
-33
@@ -1,7 +1,7 @@
|
||||
import asyncio
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
from concurrent.futures._base import TimeoutError
|
||||
from typing import List
|
||||
from typing import Set
|
||||
|
||||
import pyppeteer
|
||||
import requests
|
||||
@@ -16,43 +16,16 @@ from parse import findall
|
||||
from w3lib.encoding import html_to_unicode
|
||||
|
||||
|
||||
|
||||
|
||||
DEFAULT_ENCODING = 'utf-8'
|
||||
|
||||
useragent = UserAgent()
|
||||
|
||||
class HTMLResponse(requests.Response):
|
||||
"""An HTML-enabled :class:`Response <Response>` object.
|
||||
Same as Requests class:`Response <Response>` object, but with an
|
||||
intelligent ``.html`` property added.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(HTMLResponse, self).__init__(*args, **kwargs)
|
||||
self._html = None
|
||||
|
||||
@property
|
||||
def html(self) -> str:
|
||||
if self._html:
|
||||
return self._html
|
||||
|
||||
self._html = HTML(url=self.url, html=self.text, default_encoding=self.encoding)
|
||||
return self._html
|
||||
|
||||
@classmethod
|
||||
def _from_response(cls, response):
|
||||
html_r = cls()
|
||||
html_r.__dict__.update(response.__dict__)
|
||||
return html_r
|
||||
|
||||
|
||||
|
||||
|
||||
class BaseParser:
|
||||
"""A basic HTML/Element Parser, for Humans."""
|
||||
|
||||
def __init__(self, *, element, default_encoding: str = None, html: str = None, url: str):
|
||||
def __init__(self, *, element, default_encoding: str = None, html: str = None, url: str) -> None:
|
||||
self.element = element
|
||||
self.url = url
|
||||
self.skip_anchors = True
|
||||
@@ -69,7 +42,7 @@ class BaseParser:
|
||||
return etree.tostring(self.element, encoding='unicode').strip()
|
||||
|
||||
@html.setter
|
||||
def set_html(self, html):
|
||||
def set_html(self, html: str) -> None:
|
||||
"""Property setter for self.html."""
|
||||
self._html = html
|
||||
|
||||
@@ -148,7 +121,7 @@ class BaseParser:
|
||||
return [r for r in findall(template, self.html)]
|
||||
|
||||
@property
|
||||
def links(self) -> List[str]:
|
||||
def links(self) -> Set[str]:
|
||||
"""All found links on page, in as–is form."""
|
||||
def gen():
|
||||
for link in self.find('a'):
|
||||
@@ -164,7 +137,7 @@ class BaseParser:
|
||||
return set(g for g in gen())
|
||||
|
||||
@property
|
||||
def absolute_links(self) -> List[str]:
|
||||
def absolute_links(self) -> Set[str]:
|
||||
"""All found links on page, in absolute form."""
|
||||
def gen():
|
||||
for link in self.links:
|
||||
@@ -275,6 +248,31 @@ class HTML(BaseParser):
|
||||
return self
|
||||
|
||||
|
||||
class HTMLResponse(requests.Response):
|
||||
"""An HTML-enabled :class:`Response <Response>` object.
|
||||
Same as Requests class:`Response <Response>` object, but with an
|
||||
intelligent ``.html`` property added.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(HTMLResponse, self).__init__(*args, **kwargs)
|
||||
self._html = None
|
||||
|
||||
@property
|
||||
def html(self) -> HTML:
|
||||
if self._html:
|
||||
return self._html
|
||||
|
||||
self._html = HTML(url=self.url, html=self.text, default_encoding=self.encoding)
|
||||
return self._html
|
||||
|
||||
@classmethod
|
||||
def _from_response(cls, response):
|
||||
html_r = cls()
|
||||
html_r.__dict__.update(response.__dict__)
|
||||
return html_r
|
||||
|
||||
|
||||
def user_agent(style='chrome') -> str:
|
||||
"""Returns a random user-agent, if not requested one of a specific
|
||||
style. Defaults to a Chrome-style User-Agent.
|
||||
@@ -301,7 +299,7 @@ class HTMLSession(requests.Session):
|
||||
self.hooks = {'response': self._handle_response}
|
||||
|
||||
@staticmethod
|
||||
def _handle_response(response, **kwargs) -> requests.Response:
|
||||
def _handle_response(response, **kwargs) -> HTMLResponse:
|
||||
"""Requests HTTP Response handler. Attaches .html property to Response
|
||||
objects.
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user