This commit is contained in:
2018-03-03 10:16:00 -05:00
commit 746181fe87
19 changed files with 52535 additions and 0 deletions
+106
View File
@@ -0,0 +1,106 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*.DS_Store
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
.static_storage/
.media/
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
+19
View File
@@ -0,0 +1,19 @@
language: python
python:
- "3.6"
# command to install dependencies
install:
- "pip install pipenv --upgrade-strategy=only-if-needed"
- "pipenv install --dev"
# command to run the dependencies
script:
- "pipenv run tests"
# command to run tests
# jobs:
# include:
# - stage: "✨ Flake8 NitPicking ✨"
# python: "3.6"
# script: "pipenv run flake8"
+21
View File
@@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright 2018 Kenneth Reitz
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
+31
View File
@@ -0,0 +1,31 @@
[[source]]
url = "https://pypi.python.org/simple"
verify_ssl = true
name = "pypi"
[packages]
requests = "*"
pyquery = "*"
fake-useragent = "*"
parse = "*"
"bs4" = "*"
"w3lib" = "*"
pyppeteer = "*"
[dev-packages]
twine = "*"
requests-file = "*"
pytest = "*"
"e1839a8" = {path = ".", editable = true}
sphinx = "*"
mypy = "*"
[scripts]
tests = "pytest -v -m ok"
Generated
+558
View File
@@ -0,0 +1,558 @@
{
"_meta": {
"hash": {
"sha256": "cf67076e9c185c3bc951910b2a44b8b548ce954e0e3ff2a5bef1942d13275e8e"
},
"host-environment-markers": {
"implementation_name": "cpython",
"implementation_version": "3.6.4",
"os_name": "posix",
"platform_machine": "x86_64",
"platform_python_implementation": "CPython",
"platform_release": "17.4.0",
"platform_system": "Darwin",
"platform_version": "Darwin Kernel Version 17.4.0: Sun Dec 17 09:19:54 PST 2017; root:xnu-4570.41.2~1/RELEASE_X86_64",
"python_full_version": "3.6.4",
"python_version": "3.6",
"sys_platform": "darwin"
},
"pipfile-spec": 6,
"requires": {},
"sources": [
{
"name": "pypi",
"url": "https://pypi.python.org/simple",
"verify_ssl": true
}
]
},
"default": {
"beautifulsoup4": {
"hashes": [
"sha256:7015e76bf32f1f574636c4288399a6de66ce08fb7b2457f628a8d70c0fbabb11",
"sha256:11a9a27b7d3bddc6d86f59fb76afb70e921a25ac2d6cc55b40d072bd68435a76",
"sha256:808b6ac932dccb0a4126558f7dfdcf41710dd44a4ef497a0bb59a77f9f078e89"
],
"version": "==4.6.0"
},
"bs4": {
"hashes": [
"sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"
],
"version": "==0.0.1"
},
"certifi": {
"hashes": [
"sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296",
"sha256:edbc3f203427eef571f79a7692bb160a2b0f7ccaa31953e99bd17e307cf63f7d"
],
"version": "==2018.1.18"
},
"chardet": {
"hashes": [
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691",
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"
],
"version": "==3.0.4"
},
"cssselect": {
"hashes": [
"sha256:3b5103e8789da9e936a68d993b70df732d06b8bb9a337a05ed4eb52c17ef7206",
"sha256:066d8bc5229af09617e24b3ca4d52f1f9092d9e061931f4184cd572885c23204"
],
"version": "==1.0.3"
},
"fake-useragent": {
"hashes": [
"sha256:cc9b9ddcebc708b3deac846f5fccb16e37c02ee47435a4ec7132271dd96aec8c"
],
"version": "==0.1.10"
},
"idna": {
"hashes": [
"sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4",
"sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f"
],
"version": "==2.6"
},
"lxml": {
"hashes": [
"sha256:41f59cbdab232f11680d5d4dec9f2e6782fd24d78e37ee833447702e34e675f4",
"sha256:e7e41d383f19bab9d57f5f3b18d158655bcd682e7e723f441b9e183e1e35a6b5",
"sha256:155521c337acecf8202091cff85bb9f709f238130ebadf04280fb1db11f5ad8b",
"sha256:d2c985d2460b81c6ca5feb8b86f1bc594ad59405d0bdf68626b85852b701553c",
"sha256:950e63387514aa1b881eba5ac6cb2ec51a118b3dafe99dd80ca19d8fb0142f30",
"sha256:470d7ce41e8047208ba1a376560bad17f1468df1f3097bc83902b26cfafdbb0c",
"sha256:e608839a5ee2180164424ccf279c8e2d9bbe8816d002c58fd97d6b621ba4aa94",
"sha256:87a66bcadac270fc010cb029022a93fc722bf1204a8b03e782d4c790f0edf7ca",
"sha256:2dedfeeecc2d5a939cf622602f5a1ce443ca82407f386880f739f1a9f08053ad",
"sha256:ba05732e4bcf59e948f61588851dcf620fd60d5bbd9d704203e5f59bbaa60219",
"sha256:2190266059fec3c5a55f9d6c30532c64c6d414d3228909c0af573fe4907e78d1",
"sha256:dd291debfaa535d9cb6cee8d7aca2328775e037d02d13f1634e57f49bc302cc4",
"sha256:29a36e354c39b2e24bc4ee103de53417ebb80f976a6ab9e8d093d559e2ac03e1",
"sha256:e37427d5a27eefbcfc48847e0b37f348113fac7280bc857421db39ffc6372570",
"sha256:b106d4d2383382399ad82108fd187e92f40b1c90f55c2d36bbcb1c44bcf940fc",
"sha256:0ee07da52d240f1dc3c83eef5cd5f1b7f018226c1121f2a54d446645779a6d17",
"sha256:3b33549fb8f91b38a7500078242b03cca513f3412a2cdae722e89bf83f95971d",
"sha256:4c12e90886d9c53ab434c8d0cebea122321cce19614c3c6b6d1a7700d7cc6212",
"sha256:79322000279cda10b53c374d53ca632ead3bc51c6aebf8e62c8fa93a4d08b750",
"sha256:6cba398eb37e0631e60e0e080c101cfe91769b2c8267105b64b4625e2581ea21",
"sha256:49a655956f8de69e1258bc0fcfc43eb3bd1e038655784d77d1869b4b81444e37",
"sha256:af8a5373241d09b8fc53e0490e1719ce5dc90a21b19db89b6596c1adcdd52270",
"sha256:e6b6698415c7e8d227a47a3b1038e1b37c2b438a1b48c2db7ad9e74ddbcd1149",
"sha256:155c916cf2645b4a8f2bd5d09065e92d1b67b8d464bdc001e0b524af84bedf6f",
"sha256:fa7320679ced5e25b20203d157280680fc84eb783b6cc650cb0c98e1858b7dd3",
"sha256:4187c4b0cefc3353181db048c51f42c489d9ac51e40b86c4851dc0671372971d",
"sha256:d5d29663e979e83b3fc361e97200f959cddb3a14797391d15273d84a5a8ae44b",
"sha256:940caef1ec7c78e0c34b0f6b94fe42d0f2022915ffc78643d28538a5cfd0f40e"
],
"version": "==4.1.1"
},
"parse": {
"hashes": [
"sha256:8048dde3f5ca07ad7ac7350460952d83b63eaacecdac1b37f45fd74870d849d2"
],
"version": "==1.8.2"
},
"pyee": {
"hashes": [
"sha256:47f8fa96d6dee61c82001831e1fbba55f3f808003a322d0e6653aa01c59f6b9e",
"sha256:4ec22817297b7024f89721cc34f790ee2767c5b5ca44284c565ee643abafbe32"
],
"version": "==5.0.0"
},
"pyppeteer": {
"hashes": [
"sha256:596929fb7d052048679081d3dc2a998cf065e936a752c7ba2392445d6e0e9706"
],
"version": "==0.0.10"
},
"pyquery": {
"hashes": [
"sha256:07987c2ed2aed5cba29ff18af95e56e9eb04a2249f42ce47bddfb37f487229a3",
"sha256:4771db76bd14352eba006463656aef990a0147a0eeaf094725097acfa90442bf"
],
"version": "==1.4.0"
},
"requests": {
"hashes": [
"sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
"sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e"
],
"version": "==2.18.4"
},
"six": {
"hashes": [
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb",
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9"
],
"version": "==1.11.0"
},
"urllib3": {
"hashes": [
"sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b",
"sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f"
],
"version": "==1.22"
},
"w3lib": {
"hashes": [
"sha256:aaf7362464532b1036ab0092e2eee78e8fd7b56787baa9ed4967457b083d011b",
"sha256:55994787e93b411c2d659068b51b9998d9d0c05e0df188e6daf8f45836e1ea38"
],
"version": "==1.19.0"
},
"websockets": {
"hashes": [
"sha256:f5192da704535a7cbf76d6e99c1ec4af7e8d1288252bf5a2385d414509ded0cf",
"sha256:0c31bc832d529dc7583d324eb6c836a4f362032a1902723c112cf57883488d8c",
"sha256:da7610a017f5343fdf765f4e0eb6fd0dfd08264ca1565212b110836d9367fc9c",
"sha256:fd81af8cf3e69f9a97f3a6c0623a0527de0f922c2df725f00cd7646d478af632",
"sha256:3d425ae081fb4ba1eef9ecf30472ffd79f8e868297ccc7a47993c96dbf2a819c",
"sha256:ebdd4f18fe7e3bea9bd3bf446b0f4117739478caa2c76e4f0fb72cc45b03cbd7",
"sha256:3859ca16c229ddb0fa21c5090e4efcb037c08ce69b0c1dfed6122c3f98cd0c22",
"sha256:d1a0572b6edb22c9208e3e5381064e09d287d2a915f90233fef994ee7a14a935",
"sha256:80188abdadd23edaaea05ce761dc9a2e1df31a74a0533967f0dcd9560c85add0",
"sha256:fecf51c13195c416c22422353b306dddb9c752e4b80b21e0fa1fccbe38246677",
"sha256:367ff945bc0950ad9634591e2afe50bf2222bc4fad1088a386c4bb700888026e",
"sha256:6df87698022aef2596bffdfecc96d656db59c8d719708c8a471daa815ee61656",
"sha256:341824d8c9ad53fc43cca3fa9407f294125fa258592f7676640396501448e57e",
"sha256:64896a6b3368c959b8096b655e46f03dfa65b96745249f374bd6a35705cc3489",
"sha256:1f3e5a52cab6daa3d432c7b0de0a14109be39d2bfaad033ee5de4a3d3e11dcdf",
"sha256:da4d4fbe059b0453e726d6d993760065d69b823a27efc3040402a6fcfe6a1ed9"
],
"version": "==4.0.1"
}
},
"develop": {
"alabaster": {
"hashes": [
"sha256:2eef172f44e8d301d25aff8068fddd65f767a3f04b5f15b0f4922f113aa1c732",
"sha256:37cdcb9e9954ed60912ebc1ca12a9d12178c26637abdf124e3cde2341c257fe0"
],
"version": "==0.7.10"
},
"attrs": {
"hashes": [
"sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450",
"sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9"
],
"version": "==17.4.0"
},
"babel": {
"hashes": [
"sha256:ad209a68d7162c4cff4b29cdebe3dec4cef75492df501b0049a9433c96ce6f80",
"sha256:8ce4cb6fdd4393edd323227cba3a077bceb2a6ce5201c902c65e730046f41f14"
],
"version": "==2.5.3"
},
"beautifulsoup4": {
"hashes": [
"sha256:7015e76bf32f1f574636c4288399a6de66ce08fb7b2457f628a8d70c0fbabb11",
"sha256:11a9a27b7d3bddc6d86f59fb76afb70e921a25ac2d6cc55b40d072bd68435a76",
"sha256:808b6ac932dccb0a4126558f7dfdcf41710dd44a4ef497a0bb59a77f9f078e89"
],
"version": "==4.6.0"
},
"bs4": {
"hashes": [
"sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"
],
"version": "==0.0.1"
},
"certifi": {
"hashes": [
"sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296",
"sha256:edbc3f203427eef571f79a7692bb160a2b0f7ccaa31953e99bd17e307cf63f7d"
],
"version": "==2018.1.18"
},
"chardet": {
"hashes": [
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691",
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"
],
"version": "==3.0.4"
},
"cssselect": {
"hashes": [
"sha256:3b5103e8789da9e936a68d993b70df732d06b8bb9a337a05ed4eb52c17ef7206",
"sha256:066d8bc5229af09617e24b3ca4d52f1f9092d9e061931f4184cd572885c23204"
],
"version": "==1.0.3"
},
"docutils": {
"hashes": [
"sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6",
"sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
"sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274"
],
"version": "==0.14"
},
"e1839a8": {
"editable": true,
"path": "."
},
"fake-useragent": {
"hashes": [
"sha256:cc9b9ddcebc708b3deac846f5fccb16e37c02ee47435a4ec7132271dd96aec8c"
],
"version": "==0.1.10"
},
"idna": {
"hashes": [
"sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4",
"sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f"
],
"version": "==2.6"
},
"imagesize": {
"hashes": [
"sha256:3620cc0cadba3f7475f9940d22431fc4d407269f1be59ec9b8edcca26440cf18",
"sha256:5b326e4678b6925158ccc66a9fa3122b6106d7c876ee32d7de6ce59385b96315"
],
"version": "==1.0.0"
},
"jinja2": {
"hashes": [
"sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
"sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
],
"version": "==2.10"
},
"lxml": {
"hashes": [
"sha256:41f59cbdab232f11680d5d4dec9f2e6782fd24d78e37ee833447702e34e675f4",
"sha256:e7e41d383f19bab9d57f5f3b18d158655bcd682e7e723f441b9e183e1e35a6b5",
"sha256:155521c337acecf8202091cff85bb9f709f238130ebadf04280fb1db11f5ad8b",
"sha256:d2c985d2460b81c6ca5feb8b86f1bc594ad59405d0bdf68626b85852b701553c",
"sha256:950e63387514aa1b881eba5ac6cb2ec51a118b3dafe99dd80ca19d8fb0142f30",
"sha256:470d7ce41e8047208ba1a376560bad17f1468df1f3097bc83902b26cfafdbb0c",
"sha256:e608839a5ee2180164424ccf279c8e2d9bbe8816d002c58fd97d6b621ba4aa94",
"sha256:87a66bcadac270fc010cb029022a93fc722bf1204a8b03e782d4c790f0edf7ca",
"sha256:2dedfeeecc2d5a939cf622602f5a1ce443ca82407f386880f739f1a9f08053ad",
"sha256:ba05732e4bcf59e948f61588851dcf620fd60d5bbd9d704203e5f59bbaa60219",
"sha256:2190266059fec3c5a55f9d6c30532c64c6d414d3228909c0af573fe4907e78d1",
"sha256:dd291debfaa535d9cb6cee8d7aca2328775e037d02d13f1634e57f49bc302cc4",
"sha256:29a36e354c39b2e24bc4ee103de53417ebb80f976a6ab9e8d093d559e2ac03e1",
"sha256:e37427d5a27eefbcfc48847e0b37f348113fac7280bc857421db39ffc6372570",
"sha256:b106d4d2383382399ad82108fd187e92f40b1c90f55c2d36bbcb1c44bcf940fc",
"sha256:0ee07da52d240f1dc3c83eef5cd5f1b7f018226c1121f2a54d446645779a6d17",
"sha256:3b33549fb8f91b38a7500078242b03cca513f3412a2cdae722e89bf83f95971d",
"sha256:4c12e90886d9c53ab434c8d0cebea122321cce19614c3c6b6d1a7700d7cc6212",
"sha256:79322000279cda10b53c374d53ca632ead3bc51c6aebf8e62c8fa93a4d08b750",
"sha256:6cba398eb37e0631e60e0e080c101cfe91769b2c8267105b64b4625e2581ea21",
"sha256:49a655956f8de69e1258bc0fcfc43eb3bd1e038655784d77d1869b4b81444e37",
"sha256:af8a5373241d09b8fc53e0490e1719ce5dc90a21b19db89b6596c1adcdd52270",
"sha256:e6b6698415c7e8d227a47a3b1038e1b37c2b438a1b48c2db7ad9e74ddbcd1149",
"sha256:155c916cf2645b4a8f2bd5d09065e92d1b67b8d464bdc001e0b524af84bedf6f",
"sha256:fa7320679ced5e25b20203d157280680fc84eb783b6cc650cb0c98e1858b7dd3",
"sha256:4187c4b0cefc3353181db048c51f42c489d9ac51e40b86c4851dc0671372971d",
"sha256:d5d29663e979e83b3fc361e97200f959cddb3a14797391d15273d84a5a8ae44b",
"sha256:940caef1ec7c78e0c34b0f6b94fe42d0f2022915ffc78643d28538a5cfd0f40e"
],
"version": "==4.1.1"
},
"markupsafe": {
"hashes": [
"sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
],
"version": "==1.0"
},
"mypy": {
"hashes": [
"sha256:aa668809ae0dbec5e9feb8929f4b5e1f9318a0a397447fa2f38c382a2ed6a036",
"sha256:bd0c9a2fcf0c4f7a54a2b625f466fcc000d415f371298d96fa5d2acc69074aca"
],
"version": "==0.560"
},
"packaging": {
"hashes": [
"sha256:99276dc6e3a7851f32027a68f1095cd3f77c148091b092ea867a351811cfe388",
"sha256:5d50835fdf0a7edf0b55e311b7c887786504efea1177abd7e69329a8e5ea619e"
],
"version": "==16.8"
},
"parse": {
"hashes": [
"sha256:8048dde3f5ca07ad7ac7350460952d83b63eaacecdac1b37f45fd74870d849d2"
],
"version": "==1.8.2"
},
"pkginfo": {
"hashes": [
"sha256:31a49103180ae1518b65d3f4ce09c784e2bc54e338197668b4fb7dc539521024",
"sha256:bb1a6aeabfc898f5df124e7e00303a5b3ec9a489535f346bfbddb081af93f89e"
],
"version": "==1.4.1"
},
"pluggy": {
"hashes": [
"sha256:7f8ae7f5bdf75671a718d2daf0a64b7885f74510bcd98b1a0bb420eb9a9d0cff"
],
"version": "==0.6.0"
},
"psutil": {
"hashes": [
"sha256:82a06785db8eeb637b349006cc28a92e40cd190fefae9875246d18d0de7ccac8",
"sha256:4152ae231709e3e8b80e26b6da20dc965a1a589959c48af1ed024eca6473f60d",
"sha256:230eeb3aeb077814f3a2cd036ddb6e0f571960d327298cc914c02385c3e02a63",
"sha256:a3286556d4d2f341108db65d8e20d0cd3fcb9a91741cb5eb496832d7daf2a97c",
"sha256:94d4e63189f2593960e73acaaf96be235dd8a455fe2bcb37d8ad6f0e87f61556",
"sha256:c91eee73eea00df5e62c741b380b7e5b6fdd553891bee5669817a3a38d036f13",
"sha256:779ec7e7621758ca11a8d99a1064996454b3570154277cc21342a01148a49c28",
"sha256:8a15d773203a1277e57b1d11a7ccdf70804744ef4a9518a87ab8436995c31a4b",
"sha256:e2467e9312c2fa191687b89ff4bc2ad8843be4af6fb4dc95a7cc5f7d7a327b18"
],
"version": "==5.4.3"
},
"py": {
"hashes": [
"sha256:8cca5c229d225f8c1e3085be4fcf306090b00850fefad892f9d96c7b6e2f310f",
"sha256:ca18943e28235417756316bfada6cd96b23ce60dd532642690dcfdaba988a76d"
],
"version": "==1.5.2"
},
"pyee": {
"hashes": [
"sha256:47f8fa96d6dee61c82001831e1fbba55f3f808003a322d0e6653aa01c59f6b9e",
"sha256:4ec22817297b7024f89721cc34f790ee2767c5b5ca44284c565ee643abafbe32"
],
"version": "==5.0.0"
},
"pygments": {
"hashes": [
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
"sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
],
"version": "==2.2.0"
},
"pyparsing": {
"hashes": [
"sha256:fee43f17a9c4087e7ed1605bd6df994c6173c1e977d7ade7b651292fab2bd010",
"sha256:0832bcf47acd283788593e7a0f542407bd9550a55a8a8435214a1960e04bcb04",
"sha256:9e8143a3e15c13713506886badd96ca4b579a87fbdf49e550dbfc057d6cb218e",
"sha256:281683241b25fe9b80ec9d66017485f6deff1af5cde372469134b56ca8447a07",
"sha256:b8b3117ed9bdf45e14dcc89345ce638ec7e0e29b2b579fa1ecf32ce45ebac8a5",
"sha256:8f1e18d3fd36c6795bb7e02a39fd05c611ffc2596c1e0d995d34d67630426c18",
"sha256:e4d45427c6e20a59bf4f88c639dcc03ce30d193112047f94012102f235853a58"
],
"version": "==2.2.0"
},
"pyppeteer": {
"hashes": [
"sha256:596929fb7d052048679081d3dc2a998cf065e936a752c7ba2392445d6e0e9706"
],
"version": "==0.0.10"
},
"pyquery": {
"hashes": [
"sha256:07987c2ed2aed5cba29ff18af95e56e9eb04a2249f42ce47bddfb37f487229a3",
"sha256:4771db76bd14352eba006463656aef990a0147a0eeaf094725097acfa90442bf"
],
"version": "==1.4.0"
},
"pytest": {
"hashes": [
"sha256:8970e25181e15ab14ae895599a0a0e0ade7d1f1c4c8ca1072ce16f25526a184d",
"sha256:9ddcb879c8cc859d2540204b5399011f842e5e8823674bf429f70ada281b3cc6"
],
"version": "==3.4.1"
},
"pytz": {
"hashes": [
"sha256:ed6509d9af298b7995d69a440e2822288f2eca1681b8cce37673dbb10091e5fe",
"sha256:f93ddcdd6342f94cea379c73cddb5724e0d6d0a1c91c9bdef364dc0368ba4fda",
"sha256:61242a9abc626379574a166dc0e96a66cd7c3b27fc10868003fa210be4bff1c9",
"sha256:ba18e6a243b3625513d85239b3e49055a2f0318466e0b8a92b8fb8ca7ccdf55f",
"sha256:07edfc3d4d2705a20a6e99d97f0c4b61c800b8232dc1c04d87e8554f130148dd",
"sha256:3a47ff71597f821cd84a162e71593004286e5be07a340fd462f0d33a760782b5",
"sha256:5bd55c744e6feaa4d599a6cbd8228b4f8f9ba96de2c38d56f08e534b3c9edf0d",
"sha256:887ab5e5b32e4d0c86efddd3d055c1f363cbaa583beb8da5e22d2fa2f64d51ef",
"sha256:410bcd1d6409026fbaa65d9ed33bf6dd8b1e94a499e32168acfc7b332e4095c0"
],
"version": "==2018.3"
},
"requests": {
"hashes": [
"sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
"sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e"
],
"version": "==2.18.4"
},
"requests-file": {
"hashes": [
"sha256:75c175eed739270aec3c5279ffd74e6527dada275c5c0d76b5817e9c86bb7dea",
"sha256:8f04aa6201bacda0567e7ac7f677f1499b0fc76b22140c54bc06edf1ba92e2fa"
],
"version": "==1.4.3"
},
"requests-toolbelt": {
"hashes": [
"sha256:42c9c170abc2cacb78b8ab23ac957945c7716249206f90874651971a4acff237",
"sha256:f6a531936c6fa4c6cfce1b9c10d5c4f498d16528d2a54a22ca00011205a187b5"
],
"version": "==0.8.0"
},
"six": {
"hashes": [
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb",
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9"
],
"version": "==1.11.0"
},
"snowballstemmer": {
"hashes": [
"sha256:9f3bcd3c401c3e862ec0ebe6d2c069ebc012ce142cce209c098ccb5b09136e89",
"sha256:919f26a68b2c17a7634da993d91339e288964f93c274f1343e3bbbe2096e1128"
],
"version": "==1.2.1"
},
"sphinx": {
"hashes": [
"sha256:41ae26acc6130ccf6ed47e5cca73742b80d55a134f0ab897c479bba8d3640b8e",
"sha256:da987de5fcca21a4acc7f67a86a363039e67ac3e8827161e61b91deb131c0ee8"
],
"version": "==1.7.1"
},
"sphinxcontrib-websupport": {
"hashes": [
"sha256:f4932e95869599b89bf4f80fc3989132d83c9faa5bf633e7b5e0c25dffb75da2",
"sha256:7a85961326aa3a400cd4ad3c816d70ed6f7c740acd7ce5d78cd0a67825072eb9"
],
"version": "==1.0.1"
},
"tqdm": {
"hashes": [
"sha256:f66468c14ccd011a627734c9b3fd72f20ce16f8faecc47384eb2507af5924fb9",
"sha256:5ec0d4442358e55cdb4a0471d04c6c831518fd8837f259db5537d90feab380df"
],
"version": "==4.19.6"
},
"twine": {
"hashes": [
"sha256:d3ce5c480c22ccfb761cd358526e862b32546d2fe4bc93d46b5cf04ea3cc46ca",
"sha256:caa45b7987fc96321258cd7668e3be2ff34064f5c66d2d975b641adca659c1ab"
],
"version": "==1.9.1"
},
"typed-ast": {
"hashes": [
"sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58",
"sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a",
"sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863",
"sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded",
"sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85",
"sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6",
"sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c",
"sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6",
"sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559",
"sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892",
"sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea",
"sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87",
"sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe",
"sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9",
"sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46",
"sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9",
"sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd",
"sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa"
],
"version": "==1.1.0"
},
"urllib3": {
"hashes": [
"sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b",
"sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f"
],
"version": "==1.22"
},
"w3lib": {
"hashes": [
"sha256:aaf7362464532b1036ab0092e2eee78e8fd7b56787baa9ed4967457b083d011b",
"sha256:55994787e93b411c2d659068b51b9998d9d0c05e0df188e6daf8f45836e1ea38"
],
"version": "==1.19.0"
},
"websockets": {
"hashes": [
"sha256:f5192da704535a7cbf76d6e99c1ec4af7e8d1288252bf5a2385d414509ded0cf",
"sha256:0c31bc832d529dc7583d324eb6c836a4f362032a1902723c112cf57883488d8c",
"sha256:da7610a017f5343fdf765f4e0eb6fd0dfd08264ca1565212b110836d9367fc9c",
"sha256:fd81af8cf3e69f9a97f3a6c0623a0527de0f922c2df725f00cd7646d478af632",
"sha256:3d425ae081fb4ba1eef9ecf30472ffd79f8e868297ccc7a47993c96dbf2a819c",
"sha256:ebdd4f18fe7e3bea9bd3bf446b0f4117739478caa2c76e4f0fb72cc45b03cbd7",
"sha256:3859ca16c229ddb0fa21c5090e4efcb037c08ce69b0c1dfed6122c3f98cd0c22",
"sha256:d1a0572b6edb22c9208e3e5381064e09d287d2a915f90233fef994ee7a14a935",
"sha256:80188abdadd23edaaea05ce761dc9a2e1df31a74a0533967f0dcd9560c85add0",
"sha256:fecf51c13195c416c22422353b306dddb9c752e4b80b21e0fa1fccbe38246677",
"sha256:367ff945bc0950ad9634591e2afe50bf2222bc4fad1088a386c4bb700888026e",
"sha256:6df87698022aef2596bffdfecc96d656db59c8d719708c8a471daa815ee61656",
"sha256:341824d8c9ad53fc43cca3fa9407f294125fa258592f7676640396501448e57e",
"sha256:64896a6b3368c959b8096b655e46f03dfa65b96745249f374bd6a35705cc3489",
"sha256:1f3e5a52cab6daa3d432c7b0de0a14109be39d2bfaad033ee5de4a3d3e11dcdf",
"sha256:da4d4fbe059b0453e726d6d993760065d69b823a27efc3040402a6fcfe6a1ed9"
],
"version": "==4.0.1"
}
}
}
+167
View File
File diff suppressed because one or more lines are too long
+20
View File
@@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = requests-html
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+36
View File
@@ -0,0 +1,36 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
set SPHINXPROJ=requests-html
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
:end
popd
Binary file not shown.

After

Width:  |  Height:  |  Size: 281 KiB

+48
View File
@@ -0,0 +1,48 @@
<!-- Alabaster (krTheme++) Hacks -->
<!-- CSS Adjustments (I'm very picky.) -->
<style type="text/css">
/* Rezzy requires precise alignment. */
img.logo {margin-left: -20px!important;}
/* "Quick Search" should be capitalized. */
div#searchbox h3 {text-transform: capitalize;}
/* Make the document a little wider, less code is cut-off. */
div.document {width: 1008px;}
/* Much-improved spacing around code blocks. */
div.highlight pre {padding: 11px 14px;}
/* Remain Responsive! */
@media screen and (max-width: 1008px) {
div.sphinxsidebar {display: none;}
div.document {width: 100%!important;}
/* Have code blocks escape the document right-margin. */
div.highlight pre {margin-right: -30px;}
}
</style>
<!-- Analytics tracking for Kenneth. -->
<script type="text/javascript">
var _gauges = _gauges || [];
(function() {
var t = document.createElement('script');
t.type = 'text/javascript';
t.async = true;
t.id = 'gauges-tracker';
t.setAttribute('data-site-id', '5a956183ba4ae36e18000033');
t.setAttribute('data-track-path', 'https://track.gaug.es/track.gif');
t.src = 'https://d2fuc4clr7gvcn.cloudfront.net/track.js';
var s = document.getElementsByTagName('script')[0];
s.parentNode.insertBefore(t, s);
})();
</script>
<!-- That was not a hack. That was art. -->
+41
View File
@@ -0,0 +1,41 @@
<p class="logo">
<a href="{{ pathto(master_doc) }}">
<img class="logo" src="{{ pathto('_static/requests-html-logo.png', 1) }}" title="https://kennethreitz.org/tattoos"/>
</a>
</p>
<p>
<iframe src="https://ghbtns.com/github-btn.html?user=kennethreitz&repo=requests-html&type=watch&count=true&size=large"
allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
</p>
<p>
<strong>Requests-HTML</strong> intends to make parsing HTML (e.g. scraping the web) as
simple and intuitive as possible.
</p>
<h3>Stay Informed</h3>
<p>Receive updates on new releases and upcoming projects.</p>
<p><iframe src="https://ghbtns.com/github-btn.html?user=kennethreitz&type=follow&count=false"
allowtransparency="true" frameborder="0" scrolling="0" width="200" height="20"></iframe></p>
<p><a href="https://twitter.com/kennethreitz" class="twitter-follow-button" data-show-count="false">Follow @kennethreitz</a> <script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script></p>
<p><a href="https://saythanks.io/to/kennethreitz">Say Thanks!</a></p>
<p><a href="http://tinyletter.com/kennethreitz">Join Mailing List</a>.</p>
<h3>Other Projects</h3>
<p>More <a href="http://kennethreitz.org/">Kenneth Reitz</a> projects:</p>
<ul>
<li><a href="https://python-requests.org/">python-requests.org</a></li>
<li><a href="http://howtopython.org/">howtopython.org</a></li>
<li><a href="http://pipenv.org/">pipenv</a></li>
<li><a href="http://pep8.org/">pep8.org</a></li>
<li><a href="http://httpbin.org/">httpbin.org</a></li>
<li><a href="http://python-guide.org">The Python Guide</a></li>
<li><a href="https://github.com/kennethreitz/maya">Maya: Datetimes for Humans</a></li>
<li><a href="https://github.com/kennethreitz/records">Records: SQL for Humans</a></li>
<li><a href="http://www.git-legit.org">Legit: Git for Humans</a></li>
<li><a href="http://docs.python-tablib.org/en/latest/">Tablib: Tabular Datasets</a></li>
</ul>
+41
View File
@@ -0,0 +1,41 @@
<p class="logo">
<a href="{{ pathto(master_doc) }}">
<img class="logo" src="{{ pathto('_static/requests-html-logo.png', 1) }}" title="https://kennethreitz.org/tattoos"/>
</a>
</p>
<p>
<iframe src="https://ghbtns.com/github-btn.html?user=kennethreitz&repo=requests-html&type=watch&count=true&size=large"
allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
</p>
<p>
<strong>Requests-HTML</strong> intends to make parsing HTML (e.g. scraping the web) as
simple and intuitive as possible.
</p>
<h3>Stay Informed</h3>
<p>Receive updates on new releases and upcoming projects.</p>
<p><iframe src="https://ghbtns.com/github-btn.html?user=kennethreitz&type=follow&count=false"
allowtransparency="true" frameborder="0" scrolling="0" width="200" height="20"></iframe></p>
<p><a href="https://twitter.com/kennethreitz" class="twitter-follow-button" data-show-count="false">Follow @kennethreitz</a> <script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script></p>
<p><a href="https://saythanks.io/to/kennethreitz">Say Thanks!</a></p>
<p><a href="http://tinyletter.com/kennethreitz">Join Mailing List</a>.</p>
<h3>Other Projects</h3>
<p>More <a href="http://kennethreitz.org/">Kenneth Reitz</a> projects:</p>
<ul>
<li><a href="https://python-requests.org/">python-requests.org</a></li>
<li><a href="http://howtopython.org/">howtopython.org</a></li>
<li><a href="http://pipenv.org/">pipenv</a></li>
<li><a href="http://pep8.org/">pep8.org</a></li>
<li><a href="http://httpbin.org/">httpbin.org</a></li>
<li><a href="http://python-guide.org">The Python Guide</a></li>
<li><a href="https://github.com/kennethreitz/maya">Maya: Datetimes for Humans</a></li>
<li><a href="https://github.com/kennethreitz/records">Records: SQL for Humans</a></li>
<li><a href="http://www.git-legit.org">Legit: Git for Humans</a></li>
<li><a href="http://docs.python-tablib.org/en/latest/">Tablib: Tabular Datasets</a></li>
</ul>
+211
View File
@@ -0,0 +1,211 @@
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/stable/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
import requests_html
project = 'requests-HTML'
copyright = u'MMXVIII. A <a href="http://kennethreitz.com/pages/open-projects.html">Kenneth Reitz</a> Project'
author = 'Kenneth Reitz'
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
release = 'v0.3.4'
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.viewcode',
'sphinx.ext.githubpages',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path .
exclude_patterns = []
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
'show_powered_by': False,
'github_user': 'kennethreitz',
'github_repo': 'requests-html',
'github_banner': True,
'show_related': False,
'note_bg': '#FFF59C'
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
html_sidebars = {
'index': ['sidebarintro.html', 'sourcelink.html', 'searchbox.html',
'hacks.html'],
'**': ['sidebarlogo.html', 'localtoc.html', 'relations.html',
'sourcelink.html', 'searchbox.html', 'hacks.html']
}
html_show_sphinx = False
html_show_sourcelink = False
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'requests-htmldoc'
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'requests-html.tex', 'requests-html Documentation',
'Kenneth Reitz', 'manual'),
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'requests-html', 'requests-html Documentation',
[author], 1)
]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'requests-html', 'requests-html Documentation',
author, 'requests-html', 'One line description of project.',
'Miscellaneous'),
]
# -- Options for Epub output -------------------------------------------------
# Bibliographic Dublin Core info.
epub_title = project
epub_author = author
epub_publisher = author
epub_copyright = copyright
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''
# A unique identification for the text.
#
# epub_uid = ''
# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']
# -- Extension configuration -------------------------------------------------
# -- Options for intersphinx extension ---------------------------------------
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}
# -- Options for todo extension ----------------------------------------------
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+594
View File
@@ -0,0 +1,594 @@
import sys
import asyncio
from urllib.parse import urlparse, urlunparse
from concurrent.futures._base import TimeoutError
from typing import Set, Union, List, MutableMapping, Optional
import pyppeteer
import requests
from pyquery import PyQuery
from fake_useragent import UserAgent
import lxml
from lxml import etree
from lxml.html import HtmlElement
from lxml.html.soupparser import fromstring as soup_parse
from parse import search as parse_search
from parse import findall, Result
from w3lib.encoding import html_to_unicode
DEFAULT_ENCODING = 'utf-8'
DEFAULT_URL = 'https://example.org/'
DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8'
useragent = None
# Typing.
_Find = Union[List['Element'], 'Element']
_XPath = Union[List[str], List['Element'], str, 'Element']
_Result = Union[List['Result'], 'Result']
_HTML = Union[str, bytes]
_BaseHTML = str
_UserAgent = str
_DefaultEncoding = str
_URL = str
_RawHTML = bytes
_Encoding = str
_LXML = HtmlElement
_Text = str
_Search = Result
_Containing = Union[str, List[str]]
_Links = Set[str]
_Attrs = MutableMapping
_Next = Union['HTML', List[str]]
# Sanity checking.
try:
assert sys.version_info.major == 3
assert sys.version_info.minor > 5
except AssertionError:
raise RuntimeError('Requests-HTML requires Python 3.6+!')
class BaseParser:
"""A basic HTML/Element Parser, for Humans.
:param element: The element from which to base the parsing upon.
:param default_encoding: Which encoding to default to.
:param html: HTML from which to base the parsing upon (optional).
:param url: The URL from which the HTML originated, used for ``absolute_links``.
"""
def __init__(self, *, element, session: 'HTTPSession' = None, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL) -> None:
self.element = element
self.session = session or HTMLSession()
self.url = url
self.skip_anchors = True
self.default_encoding = default_encoding
self._encoding = None
self._html = html.encode(DEFAULT_ENCODING) if isinstance(html, str) else html
self._lxml = None
self._pq = None
@property
def raw_html(self) -> _RawHTML:
"""Bytes representation of the HTML content.
(`learn more <http://www.diveintopython3.net/strings.html>`_).
"""
if self._html:
return self._html
else:
return etree.tostring(self.element, encoding='unicode').strip().encode(self.encoding)
@property
def html(self) -> _BaseHTML:
"""Unicode representation of the HTML content
(`learn more <http://www.diveintopython3.net/strings.html>`_).
"""
if self._html:
return self.raw_html.decode(self.encoding)
else:
return etree.tostring(self.element, encoding='unicode').strip()
@html.setter
def html(self, html: bytes) -> None:
"""Property setter for self.html."""
self._html = html
@property
def encoding(self) -> _Encoding:
"""The encoding string to be used, extracted from the HTML and
:class:`HTMLResponse <HTMLResponse>` headers.
"""
if self._encoding:
return self._encoding
# Scan meta tags for charset.
if self._html:
self._encoding = html_to_unicode(self.default_encoding, self._html)[0]
return self._encoding if self._encoding else self.default_encoding
@encoding.setter
def encoding(self, enc: str) -> None:
"""Property setter for self.encoding."""
self._encoding = enc
@property
def pq(self) -> PyQuery:
"""`PyQuery <https://pythonhosted.org/pyquery/>`_ representation
of the :class:`Element <Element>` or :class:`HTML <HTML>`.
"""
if self._pq is None:
self._pq = PyQuery(self.html)
return self._pq
@property
def lxml(self) -> HtmlElement:
"""`lxml <http://lxml.de>`_ representation of the
:class:`Element <Element>` or :class:`HTML <HTML>`.
"""
if self._lxml is None:
try:
self._lxml = soup_parse(self.html, features='html.parser')
except ValueError:
self._lxml = lxml.html.fromstring(self.html)
return self._lxml
@property
def text(self) -> _Text:
"""The text content of the
:class:`Element <Element>` or :class:`HTML <HTML>`.
"""
return self.pq.text()
@property
def full_text(self) -> _Text:
"""The full text content (including links) of the
:class:`Element <Element>` or :class:`HTML <HTML>`.
"""
return self.lxml.text_content()
def next(self, fetch: bool = False) -> _Next:
"""Attempts to find the next page, if there is one. If ``fetch``
is ``True`` (default), returns :class:`HTML <HTML>` object of
next page. If ``fetch`` is ``False``, simply returns the next URL.
"""
def get_next():
candidates = self.find('a', containing=('next', 'more', 'older'))
for candidate in candidates:
if candidate.attrs.get('href'):
# Support 'next' rel (e.g. reddit).
if 'next' in candidate.attrs.get('rel', []):
return candidate.attrs['href']
# Support 'next' in classnames.
for _class in candidate.attrs.get('class', []):
if 'next' in _class:
return candidate.attrs['href']
if 'page' in candidate.attrs['href']:
return candidate.attrs['href']
try:
# Resort to the last candidate.
return candidates[-1].attrs['href']
except IndexError:
return None
next = get_next()
if next:
url = self._make_absolute(next)
else:
return None
if fetch:
return self.session.get(url)
else:
return url
def find(self, selector: str = "*", containing: _Containing = None, first: bool = False, _encoding: str = None) -> _Find:
"""Given a CSS Selector, returns a list of
:class:`Element <Element>` objects or a single one.
:param selector: CSS Selector to use.
:param containing: If specified, only return elements that contain the provided text.
:param first: Whether or not to return just the first result.
:param _encoding: The encoding format.
Example CSS Selectors:
- ``a``
- ``a.someClass``
- ``a#someID``
- ``a[target=_blank]``
See W3School's `CSS Selectors Reference
<https://www.w3schools.com/cssref/css_selectors.asp>`_
for more details.
If ``first`` is ``True``, only returns the first
:class:`Element <Element>` found.
"""
# Convert a single containing into a list.
if isinstance(containing, str):
containing = [containing]
encoding = _encoding or self.encoding
elements = [
Element(element=found, url=self.url, default_encoding=encoding)
for found in self.pq(selector)
]
if containing:
elements_copy = elements.copy()
elements = []
for element in elements_copy:
if any([c.lower() in element.full_text.lower() for c in containing]):
elements.append(element)
elements.reverse()
return _get_first_or_list(elements, first)
def xpath(self, selector: str, first: bool = False, _encoding: str = None) -> _XPath:
"""Given an XPath selector, returns a list of
:class:`Element <Element>` objects or a single one.
:param selector: XPath Selector to use.
:param first: Whether or not to return just the first result.
:param _encoding: The encoding format.
If a sub-selector is specified (e.g. ``//a/@href``), a simple
list of results is returned.
See W3School's `XPath Examples
<https://www.w3schools.com/xml/xpath_examples.asp>`_
for more details.
If ``first`` is ``True``, only returns the first
:class:`Element <Element>` found.
"""
selected = self.lxml.xpath(selector)
elements = [
Element(element=selection, url=self.url, default_encoding=_encoding or self.encoding)
if not isinstance(selection, etree._ElementUnicodeResult) else str(selection)
for selection in selected
]
return _get_first_or_list(elements, first)
def search(self, template: str) -> Result:
"""Search the :class:`Element <Element>` for the given Parse template.
:param template: The Parse template to use.
"""
return parse_search(template, self.html)
def search_all(self, template: str) -> _Result:
"""Search the :class:`Element <Element>` (multiple times) for the given parse
template.
:param template: The Parse template to use.
"""
return [r for r in findall(template, self.html)]
@property
def links(self) -> _Links:
"""All found links on page, in asis form."""
def gen():
for link in self.find('a'):
try:
href = link.attrs['href'].strip()
if href and not (href.startswith('#') and self.skip_anchors) and not href.startswith('javascript:'):
yield href
except KeyError:
pass
return set(gen())
def _make_absolute(self, link):
"""Makes a given link absolute."""
# Parse the link with stdlib.
parsed = urlparse(link)._asdict()
# Appears to be a relative link:
if not parsed['netloc']:
parsed['netloc'] = urlparse(self.base_url).netloc
if not parsed['scheme']:
parsed['scheme'] = urlparse(self.base_url).scheme
# Re-construct URL, with new data.
parsed = (v for v in parsed.values())
return urlunparse(parsed)
@property
def absolute_links(self) -> _Links:
"""All found links on page, in absolute form
(`learn more <https://www.navegabem.com/absolute-or-relative-links.html>`_).
"""
def gen():
for link in self.links:
yield self._make_absolute(link)
return set(gen())
@property
def base_url(self) -> _URL:
"""The base URL for the page. Supports the ``<base>`` tag
(`learn more <https://www.w3schools.com/tags/tag_base.asp>`_)."""
# Support for <base> tag.
base = self.find('base', first=True)
if base:
result = base.attrs['href'].strip()
if result:
return result
url = '/'.join(self.url.split('/')[:-1])
if url.endswith('/'):
url = url[:-1]
return url
class Element(BaseParser):
"""An element of HTML.
:param element: The element from which to base the parsing upon.
:param url: The URL from which the HTML originated, used for ``absolute_links``.
:param default_encoding: Which encoding to default to.
"""
__slots__ = [
'element', 'url', 'skip_anchors', 'default_encoding', '_encoding',
'_encoding', '_html', '_lxml', '_pq', 'session'
]
def __init__(self, *, element, url: _URL, default_encoding: _DefaultEncoding = None) -> None:
super(Element, self).__init__(element=element, url=url, default_encoding=default_encoding)
self.element = element
def __repr__(self) -> str:
attrs = ['{}={}'.format(attr, repr(self.attrs[attr])) for attr in self.attrs]
return "<Element {} {}>".format(repr(self.element.tag), ' '.join(attrs))
@property
def attrs(self) -> _Attrs:
"""Returns a dictionary of the attributes of the :class:`Element <Element>`
(`learn more <https://www.w3schools.com/tags/ref_attributes.asp>`_).
"""
attrs = {k: v for k, v in self.element.items()}
# Split class and rel up, as there are ussually many of them:
for attr in ['class', 'rel']:
if attr in attrs:
attrs[attr] = tuple(attrs[attr].split())
return attrs
class HTML(BaseParser):
"""An HTML document, ready for parsing.
:param url: The URL from which the HTML originated, used for ``absolute_links``.
:param html: HTML from which to base the parsing upon (optional).
:param default_encoding: Which encoding to default to.
"""
def __init__(self, *, url: str = DEFAULT_URL, html: _HTML, default_encoding: str = DEFAULT_ENCODING) -> None:
# Convert incoming unicode HTML into bytes.
if isinstance(html, str):
html = html.encode(DEFAULT_ENCODING)
super(HTML, self).__init__(
# Convert unicode HTML to bytes.
element=PyQuery(html)('html') or PyQuery(f'<html>{html}</html>')('html'),
html=html,
url=url,
default_encoding=default_encoding
)
def __repr__(self) -> str:
return f"<HTML url={self.url!r}>"
def __iter__(self):
next = self
while True:
yield next
try:
next = next.next(fetch=True).html
except AttributeError:
break
def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0):
"""Reloads the response in Chromium, and replaces HTML content
with an updated version, with JavaScript executed.
:param retries: The number of times to retry loading the page in Chromium.
:param script: JavaScript to execute upon page load (optional).
:param wait: The number of seconds to wait before loading the page, preventing timeouts (optional).
:param scrolldown: Integer, if provided, of how many times to page down.
:param sleep: Integer, if provided, of how many long to sleep after initial render.
:param reload: If ``False``, content will not be loaded from the browser, but will be provided from memory.
If ``scrolldown`` is specified, the page will scrolldown the specified
number of times, after sleeping the specified amount of time
(e.g. ``scrolldown=10, sleep=1``).
If just ``sleep`` is provided, the rendering will wait *n* seconds, before
returning.
If ``script`` is specified, it will execute the provided JavaScript at
runtime. Example:
.. code-block:: python
script = \"\"\"
() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
}
}
\"\"\"
Returns the return value of the executed ``script``, if any is provided:
.. code-block:: python
>>> r.html.render(script=script)
{'width': 800, 'height': 600, 'deviceScaleFactor': 1}
Warning: the first time you run this method, it will download
Chromium into your home directory (``~/.pyppeteer``).
"""
async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int]):
try:
browser = pyppeteer.launch(headless=True)
page = await browser.newPage()
# Wait before rendering the page, to prevent timeouts.
await asyncio.sleep(wait)
# Load the given page (GET request, obviously.)
if reload:
await page.goto(url, options={'timeout': int(timeout * 1000)})
else:
await page.goto(f'data:text/html,{self.html}', options={'timeout': int(timeout * 1000)})
result = None
if script:
result = await page.evaluate(script)
if scrolldown:
for _ in range(scrolldown):
await page._keyboard.down('PageDown')
await asyncio.sleep(sleep)
else:
await asyncio.sleep(sleep)
if scrolldown:
await page._keyboard.up('PageDown')
# Return the content of the page, JavaScript evaluated.
content = await page.content()
return content, result
except TimeoutError:
return None
loop = asyncio.get_event_loop()
content = None
# Automatically set Reload to False, if example URL is being used.
if self.url == DEFAULT_URL:
reload = False
for i in range(retries):
if not content:
try:
content, result = loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout))
except TimeoutError:
pass
html = HTML(url=self.url, html=content.encode(DEFAULT_ENCODING), default_encoding=DEFAULT_ENCODING)
self.__dict__.update(html.__dict__)
return result
class HTMLResponse(requests.Response):
"""An HTML-enabled :class:`requests.Response <requests.Response>` object.
Effectively the same, but with an intelligent ``.html`` property added.
"""
def __init__(self) -> None:
super(HTMLResponse, self).__init__()
self._html = None # type: HTML
@property
def html(self) -> HTML:
if not self._html:
self._html = HTML(url=self.url, html=self.content, default_encoding=self.encoding)
return self._html
@classmethod
def _from_response(cls, response):
html_r = cls()
html_r.__dict__.update(response.__dict__)
return html_r
def user_agent(style=None) -> _UserAgent:
"""Returns an apparently legit user-agent, if not requested one of a specific
style. Defaults to a Chrome-style User-Agent.
"""
global useragent
if (not useragent) and style:
useragent = UserAgent()
return useragent[style] if style else DEFAULT_USER_AGENT
def _get_first_or_list(l, first=False):
if first:
try:
return l[0]
except IndexError:
return None
else:
return l
class HTMLSession(requests.Session):
"""A consumable session, for cookie persistence and connection pooling,
amongst other things.
"""
def __init__(self, mock_browser=True):
super(HTMLSession, self).__init__()
# Mock a web browser's user agent.
if mock_browser:
self.headers['User-Agent'] = user_agent()
self.hooks = {'response': self._handle_response}
@staticmethod
def _handle_response(response, **kwargs) -> HTMLResponse:
"""Requests HTTP Response handler. Attaches .html property to
class:`requests.Response <requests.Response>` objects.
"""
if not response.encoding:
response.encoding = DEFAULT_ENCODING
return response
def request(self, *args, **kwargs) -> HTMLResponse:
"""Makes an HTTP Request, with mocked UserAgent headers.
Returns a class:`HTTPResponse <HTTPResponse>`.
"""
# Convert Request object into HTTPRequest object.
r = super(HTMLSession, self).request(*args, **kwargs)
return HTMLResponse._from_response(r)
+108
View File
@@ -0,0 +1,108 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Note: To use the 'upload' functionality of this file, you must:
# $ pip install twine
import io
import os
import sys
from shutil import rmtree
from setuptools import setup, Command
# Package meta-data.
NAME = 'requests-html'
DESCRIPTION = 'HTML Parsing for Humans.'
URL = 'https://github.com/kennethreitz/requests-html'
EMAIL = 'me@kennethreitz.org'
AUTHOR = 'Kenneth Reitz'
VERSION = '0.8.0'
# What packages are required for this module to be executed?
REQUIRED = [
'requests', 'pyquery', 'fake-useragent', 'parse', 'bs4', 'w3lib', 'pyppeteer'
]
# The rest you shouldn't have to touch too much :)
# ------------------------------------------------
# Except, perhaps the License and Trove Classifiers!
# If you do change the License, remember to change the Trove Classifier for that!
here = os.path.abspath(os.path.dirname(__file__))
# Import the README and use it as the long-description.
# Note: this will only work if 'README.rst' is present in your MANIFEST.in file!
with io.open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
long_description = '\n' + f.read()
class UploadCommand(Command):
"""Support setup.py upload."""
description = 'Build and publish the package.'
user_options = []
@staticmethod
def status(s):
"""Prints things in bold."""
print('\033[1m{0}\033[0m'.format(s))
def initialize_options(self):
pass
def finalize_options(self):
pass
def run(self):
try:
self.status('Removing previous builds…')
rmtree(os.path.join(here, 'dist'))
except OSError:
pass
self.status('Building Source and Wheel (universal) distribution…')
os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
self.status('Uploading the package to PyPi via Twine…')
os.system('twine upload dist/*')
self.status('Publishing git tags…')
os.system('git tag v{0}'.format(VERSION))
os.system('git push --tags')
sys.exit()
# Where the magic happens:
setup(
name=NAME,
version=VERSION,
description=DESCRIPTION,
long_description=long_description,
author=AUTHOR,
author_email=EMAIL,
url=URL,
python_requires='>=3.6.0',
# If your package is a single module, use this instead of 'packages':
py_modules=['requests_html'],
# entry_points={
# 'console_scripts': ['mycli=mymodule:cli'],
# },
install_requires=REQUIRED,
include_package_data=True,
license='MIT',
classifiers=[
# Trove classifiers
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
'License :: OSI Approved :: MIT License',
'Programming Language :: Python',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy'
],
# $ setup.py publish support.
cmdclass={
'upload': UploadCommand,
},
)
+1053
View File
File diff suppressed because it is too large Load Diff
+172
View File
@@ -0,0 +1,172 @@
import os
import pytest
from requests_html import HTMLSession, HTML
from requests_file import FileAdapter
session = HTMLSession()
session.mount('file://', FileAdapter())
def get():
path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html'))
url = 'file://{}'.format(path)
return session.get(url)
@pytest.mark.ok
def test_file_get():
r = get()
assert r.status_code == 200
@pytest.mark.ok
def test_class_seperation():
r = get()
about = r.html.find('#about', first=True)
assert len(about.attrs['class']) == 2
@pytest.mark.ok
def test_css_selector():
r = get()
about = r.html.find('#about', first=True)
for menu_item in (
'About', 'Applications', 'Quotes', 'Getting Started', 'Help',
'Python Brochure'
):
assert menu_item in about.text.split('\n')
assert menu_item in about.full_text.split('\n')
@pytest.mark.ok
def test_containing():
r = get()
python = r.html.find(containing='python')
assert len(python) == 191
for e in python:
assert 'python' in e.full_text.lower()
@pytest.mark.ok
def test_attrs():
r = get()
about = r.html.find('#about', first=True)
assert 'aria-haspopup' in about.attrs
assert len(about.attrs['class']) == 2
@pytest.mark.ok
def test_links():
r = get()
about = r.html.find('#about', first=True)
assert len(about.links) == 6
assert len(about.absolute_links) == 6
@pytest.mark.ok
def test_search():
r = get()
style = r.html.search('Python is a {} language')[0]
assert style == 'programming'
@pytest.mark.ok
def test_xpath():
r = get()
html = r.html.xpath('/html', first=True)
assert 'no-js' in html.attrs['class']
a_hrefs = r.html.xpath('//a/@href')
assert '#site-map' in a_hrefs
@pytest.mark.ok
def test_html_loading():
doc = """<a href='https://httpbin.org'>"""
html = HTML(html=doc)
assert 'https://httpbin.org' in html.links
assert isinstance(html.raw_html, bytes)
assert isinstance(html.html, str)
@pytest.mark.ok
def test_anchor_links():
r = get()
r.html.skip_anchors = False
assert '#site-map' in r.html.links
@pytest.mark.render
def test_render():
r = get()
script = """
() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
}
}
"""
val = r.html.render(script=script)
for value in ('width', 'height', 'deviceScaleFactor'):
assert value in val
about = r.html.find('#about', first=True)
assert len(about.links) == 6
@pytest.mark.render
def test_bare_render():
doc = """<a href='https://httpbin.org'>"""
html = HTML(html=doc)
script = """
() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
}
}
"""
val = html.render(script=script, reload=False)
for value in ('width', 'height', 'deviceScaleFactor'):
assert value in val
assert html.find('html')
assert 'https://httpbin.org' in html.links
@pytest.mark.render
def test_bare_js_eval():
doc = """
<!DOCTYPE html>
<html>
<body>
<div id="replace">This gets replaced</div>
<script type="text/javascript">
document.getElementById("replace").innerHTML = "yolo";
</script>
</body>
</html>
"""
html = HTML(html=doc)
html.render()
assert html.find('#replace', first=True).text == 'yolo'
if __name__ == '__main__':
test_containing()