mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 06:46:14 +00:00
@@ -18,3 +18,10 @@ parse = "*"
|
||||
[dev-packages]
|
||||
|
||||
twine = "*"
|
||||
requests-file = "*"
|
||||
pytest = "*"
|
||||
"e1839a8" = {path = ".", editable = true}
|
||||
|
||||
[scripts]
|
||||
|
||||
tests = "pytest"
|
||||
Generated
+126
-1
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "75c60031e7b2733e07a2cf09fc24895c85b0c5032161ebbca6b7fc68f6c933a3"
|
||||
"sha256": "0472672d45b7404f1329bdf8d01dc6429fa5668ecff3367094b1048fc4be65a9"
|
||||
},
|
||||
"host-environment-markers": {
|
||||
"implementation_name": "cpython",
|
||||
@@ -144,6 +144,27 @@
|
||||
}
|
||||
},
|
||||
"develop": {
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450",
|
||||
"sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9"
|
||||
],
|
||||
"version": "==17.4.0"
|
||||
},
|
||||
"beautifulsoup4": {
|
||||
"hashes": [
|
||||
"sha256:7015e76bf32f1f574636c4288399a6de66ce08fb7b2457f628a8d70c0fbabb11",
|
||||
"sha256:11a9a27b7d3bddc6d86f59fb76afb70e921a25ac2d6cc55b40d072bd68435a76",
|
||||
"sha256:808b6ac932dccb0a4126558f7dfdcf41710dd44a4ef497a0bb59a77f9f078e89"
|
||||
],
|
||||
"version": "==4.6.0"
|
||||
},
|
||||
"bs4": {
|
||||
"hashes": [
|
||||
"sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"
|
||||
],
|
||||
"version": "==0.0.1"
|
||||
},
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296",
|
||||
@@ -158,6 +179,30 @@
|
||||
],
|
||||
"version": "==3.0.4"
|
||||
},
|
||||
"cssselect": {
|
||||
"hashes": [
|
||||
"sha256:3b5103e8789da9e936a68d993b70df732d06b8bb9a337a05ed4eb52c17ef7206",
|
||||
"sha256:066d8bc5229af09617e24b3ca4d52f1f9092d9e061931f4184cd572885c23204"
|
||||
],
|
||||
"version": "==1.0.3"
|
||||
},
|
||||
"e1839a8": {
|
||||
"editable": true,
|
||||
"path": "."
|
||||
},
|
||||
"fake-useragent": {
|
||||
"hashes": [
|
||||
"sha256:cc9b9ddcebc708b3deac846f5fccb16e37c02ee47435a4ec7132271dd96aec8c"
|
||||
],
|
||||
"version": "==0.1.10"
|
||||
},
|
||||
"html2text": {
|
||||
"hashes": [
|
||||
"sha256:490db40fe5b2cd79c461cf56be4d39eb8ca68191ae41ba3ba79f6cb05b7dd662",
|
||||
"sha256:627514fb30e7566b37be6900df26c2c78a030cc9e6211bda604d8181233bcdd4"
|
||||
],
|
||||
"version": "==2018.1.9"
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4",
|
||||
@@ -165,6 +210,45 @@
|
||||
],
|
||||
"version": "==2.6"
|
||||
},
|
||||
"lxml": {
|
||||
"hashes": [
|
||||
"sha256:41f59cbdab232f11680d5d4dec9f2e6782fd24d78e37ee833447702e34e675f4",
|
||||
"sha256:e7e41d383f19bab9d57f5f3b18d158655bcd682e7e723f441b9e183e1e35a6b5",
|
||||
"sha256:155521c337acecf8202091cff85bb9f709f238130ebadf04280fb1db11f5ad8b",
|
||||
"sha256:d2c985d2460b81c6ca5feb8b86f1bc594ad59405d0bdf68626b85852b701553c",
|
||||
"sha256:950e63387514aa1b881eba5ac6cb2ec51a118b3dafe99dd80ca19d8fb0142f30",
|
||||
"sha256:470d7ce41e8047208ba1a376560bad17f1468df1f3097bc83902b26cfafdbb0c",
|
||||
"sha256:e608839a5ee2180164424ccf279c8e2d9bbe8816d002c58fd97d6b621ba4aa94",
|
||||
"sha256:87a66bcadac270fc010cb029022a93fc722bf1204a8b03e782d4c790f0edf7ca",
|
||||
"sha256:2dedfeeecc2d5a939cf622602f5a1ce443ca82407f386880f739f1a9f08053ad",
|
||||
"sha256:ba05732e4bcf59e948f61588851dcf620fd60d5bbd9d704203e5f59bbaa60219",
|
||||
"sha256:2190266059fec3c5a55f9d6c30532c64c6d414d3228909c0af573fe4907e78d1",
|
||||
"sha256:dd291debfaa535d9cb6cee8d7aca2328775e037d02d13f1634e57f49bc302cc4",
|
||||
"sha256:29a36e354c39b2e24bc4ee103de53417ebb80f976a6ab9e8d093d559e2ac03e1",
|
||||
"sha256:e37427d5a27eefbcfc48847e0b37f348113fac7280bc857421db39ffc6372570",
|
||||
"sha256:b106d4d2383382399ad82108fd187e92f40b1c90f55c2d36bbcb1c44bcf940fc",
|
||||
"sha256:0ee07da52d240f1dc3c83eef5cd5f1b7f018226c1121f2a54d446645779a6d17",
|
||||
"sha256:3b33549fb8f91b38a7500078242b03cca513f3412a2cdae722e89bf83f95971d",
|
||||
"sha256:4c12e90886d9c53ab434c8d0cebea122321cce19614c3c6b6d1a7700d7cc6212",
|
||||
"sha256:79322000279cda10b53c374d53ca632ead3bc51c6aebf8e62c8fa93a4d08b750",
|
||||
"sha256:6cba398eb37e0631e60e0e080c101cfe91769b2c8267105b64b4625e2581ea21",
|
||||
"sha256:49a655956f8de69e1258bc0fcfc43eb3bd1e038655784d77d1869b4b81444e37",
|
||||
"sha256:af8a5373241d09b8fc53e0490e1719ce5dc90a21b19db89b6596c1adcdd52270",
|
||||
"sha256:e6b6698415c7e8d227a47a3b1038e1b37c2b438a1b48c2db7ad9e74ddbcd1149",
|
||||
"sha256:155c916cf2645b4a8f2bd5d09065e92d1b67b8d464bdc001e0b524af84bedf6f",
|
||||
"sha256:fa7320679ced5e25b20203d157280680fc84eb783b6cc650cb0c98e1858b7dd3",
|
||||
"sha256:4187c4b0cefc3353181db048c51f42c489d9ac51e40b86c4851dc0671372971d",
|
||||
"sha256:d5d29663e979e83b3fc361e97200f959cddb3a14797391d15273d84a5a8ae44b",
|
||||
"sha256:940caef1ec7c78e0c34b0f6b94fe42d0f2022915ffc78643d28538a5cfd0f40e"
|
||||
],
|
||||
"version": "==4.1.1"
|
||||
},
|
||||
"parse": {
|
||||
"hashes": [
|
||||
"sha256:8048dde3f5ca07ad7ac7350460952d83b63eaacecdac1b37f45fd74870d849d2"
|
||||
],
|
||||
"version": "==1.8.2"
|
||||
},
|
||||
"pkginfo": {
|
||||
"hashes": [
|
||||
"sha256:31a49103180ae1518b65d3f4ce09c784e2bc54e338197668b4fb7dc539521024",
|
||||
@@ -172,6 +256,33 @@
|
||||
],
|
||||
"version": "==1.4.1"
|
||||
},
|
||||
"pluggy": {
|
||||
"hashes": [
|
||||
"sha256:7f8ae7f5bdf75671a718d2daf0a64b7885f74510bcd98b1a0bb420eb9a9d0cff"
|
||||
],
|
||||
"version": "==0.6.0"
|
||||
},
|
||||
"py": {
|
||||
"hashes": [
|
||||
"sha256:8cca5c229d225f8c1e3085be4fcf306090b00850fefad892f9d96c7b6e2f310f",
|
||||
"sha256:ca18943e28235417756316bfada6cd96b23ce60dd532642690dcfdaba988a76d"
|
||||
],
|
||||
"version": "==1.5.2"
|
||||
},
|
||||
"pyquery": {
|
||||
"hashes": [
|
||||
"sha256:07987c2ed2aed5cba29ff18af95e56e9eb04a2249f42ce47bddfb37f487229a3",
|
||||
"sha256:4771db76bd14352eba006463656aef990a0147a0eeaf094725097acfa90442bf"
|
||||
],
|
||||
"version": "==1.4.0"
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:8970e25181e15ab14ae895599a0a0e0ade7d1f1c4c8ca1072ce16f25526a184d",
|
||||
"sha256:9ddcb879c8cc859d2540204b5399011f842e5e8823674bf429f70ada281b3cc6"
|
||||
],
|
||||
"version": "==3.4.1"
|
||||
},
|
||||
"requests": {
|
||||
"hashes": [
|
||||
"sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
|
||||
@@ -179,6 +290,13 @@
|
||||
],
|
||||
"version": "==2.18.4"
|
||||
},
|
||||
"requests-file": {
|
||||
"hashes": [
|
||||
"sha256:75c175eed739270aec3c5279ffd74e6527dada275c5c0d76b5817e9c86bb7dea",
|
||||
"sha256:8f04aa6201bacda0567e7ac7f677f1499b0fc76b22140c54bc06edf1ba92e2fa"
|
||||
],
|
||||
"version": "==1.4.3"
|
||||
},
|
||||
"requests-toolbelt": {
|
||||
"hashes": [
|
||||
"sha256:42c9c170abc2cacb78b8ab23ac957945c7716249206f90874651971a4acff237",
|
||||
@@ -186,6 +304,13 @@
|
||||
],
|
||||
"version": "==0.8.0"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb",
|
||||
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9"
|
||||
],
|
||||
"version": "==1.11.0"
|
||||
},
|
||||
"tqdm": {
|
||||
"hashes": [
|
||||
"sha256:4c041f8019f7be65b8028ddde9a836f7ccc51c4637f1ff2ba9b5813d38d19d5a",
|
||||
|
||||
+28
-16
@@ -62,9 +62,16 @@ class BaseParser:
|
||||
else:
|
||||
return c
|
||||
|
||||
def xpath(self, selector):
|
||||
def xpath(self, selector, first=False):
|
||||
"""Given an XPath selector, returns a list of element objects."""
|
||||
return [Element(element=e, url=self.url) for e in self.lxml.xpath(selector)]
|
||||
c = [Element(element=e, url=self.url) for e in self.lxml.xpath(selector)]
|
||||
if first:
|
||||
try:
|
||||
return c[0]
|
||||
except IndexError:
|
||||
return None
|
||||
else:
|
||||
return c
|
||||
|
||||
def search(self, template):
|
||||
"""Searches the element for the given parse template."""
|
||||
@@ -146,7 +153,7 @@ class HTML(BaseParser):
|
||||
"""An HTML document."""
|
||||
def __init__(self, *, response):
|
||||
super(HTML, self).__init__(
|
||||
element=fromstring(self.html),
|
||||
element=fromstring(response.text),
|
||||
html=response.text,
|
||||
url=response.url
|
||||
)
|
||||
@@ -174,21 +181,26 @@ def user_agent(style=None):
|
||||
else:
|
||||
return useragent[style]
|
||||
|
||||
def get_session(mock_browser=True):
|
||||
"""Returns a consumable session, for cookie persistence and connection
|
||||
pooling, amongst other things.
|
||||
"""
|
||||
|
||||
# Requests Session.
|
||||
session = requests.Session()
|
||||
class Session(requests.Session):
|
||||
"""A consumable session, for cookie persistience and connection pooling,
|
||||
amongst other things."""
|
||||
def __init__(self, mock_browser=True, *args, **kwargs):
|
||||
super(Session, self).__init__(*args, **kwargs)
|
||||
|
||||
# Mock a web browser's user agent.
|
||||
if mock_browser:
|
||||
session.headers['User-Agent'] = user_agent()
|
||||
# Mock a web browser's user agent.
|
||||
if mock_browser:
|
||||
self.headers['User-Agent'] = user_agent()
|
||||
|
||||
# Hook into Requests.
|
||||
session.hooks = {'response': _handle_response}
|
||||
self.hooks = {'response': self._handle_response}
|
||||
|
||||
return session
|
||||
@staticmethod
|
||||
def _handle_response(response, **kwargs):
|
||||
"""Requests HTTP Response handler. Attaches .html property to Response
|
||||
objects.
|
||||
"""
|
||||
|
||||
session = get_session()
|
||||
response.html = HTML(response=response)
|
||||
return response
|
||||
|
||||
session = Session()
|
||||
|
||||
+1053
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,59 @@
|
||||
import os
|
||||
|
||||
from requests_html import Session
|
||||
from requests_file import FileAdapter
|
||||
|
||||
session = Session()
|
||||
session.mount('file://', FileAdapter())
|
||||
|
||||
|
||||
def get():
|
||||
path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html'))
|
||||
url = 'file://{}'.format(path)
|
||||
|
||||
return session.get(url)
|
||||
|
||||
|
||||
def test_file_get():
|
||||
r = get()
|
||||
assert r.status_code == 200
|
||||
|
||||
|
||||
def test_css_selector():
|
||||
r = get()
|
||||
about = r.html.find('#about', first=True)
|
||||
|
||||
for menu_item in (
|
||||
'About', 'Applications', 'Quotes', 'Getting Started', 'Help',
|
||||
'Python Brochure'
|
||||
):
|
||||
assert menu_item in about.text.split('\n')
|
||||
|
||||
|
||||
def test_attrs():
|
||||
r = get()
|
||||
about = r.html.find('#about', first=True)
|
||||
|
||||
assert 'aria-haspopup' in about.attrs
|
||||
assert len(about.attrs['class']) == 2
|
||||
|
||||
|
||||
def test_links():
|
||||
r = get()
|
||||
about = r.html.find('#about', first=True)
|
||||
|
||||
len(about.links) == 6
|
||||
len(about.absolute_links) == 6
|
||||
|
||||
|
||||
def test_search():
|
||||
r = get()
|
||||
style = r.html.search('Python is a {} language')[0]
|
||||
assert style == 'programming'
|
||||
|
||||
|
||||
def test_xpath():
|
||||
r = get()
|
||||
html = r.html.xpath('/html', first=True)
|
||||
assert 'no-js' in html.attrs['class']
|
||||
|
||||
Reference in New Issue
Block a user