Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
This commit is contained in:
2018-02-27 08:27:27 -05:00
parent 6d6ddea08d
commit 9fe8c24344
7 changed files with 640 additions and 16 deletions
+1
View File
@@ -22,6 +22,7 @@ twine = "*"
requests-file = "*"
pytest = "*"
"e1839a8" = {path = ".", editable = true}
sphinx = "*"
[scripts]
Generated
+114 -4
View File
@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "c88fa0d21e44545a4b9ce13ff463f2da284a846029b315dbb8757057b70166a4"
"sha256": "f6a1a62089049c03d073d0255f1547245bfb4277c62c8df273afc3fcc4f5e127"
},
"host-environment-markers": {
"implementation_name": "cpython",
@@ -177,6 +177,13 @@
}
},
"develop": {
"alabaster": {
"hashes": [
"sha256:2eef172f44e8d301d25aff8068fddd65f767a3f04b5f15b0f4922f113aa1c732",
"sha256:37cdcb9e9954ed60912ebc1ca12a9d12178c26637abdf124e3cde2341c257fe0"
],
"version": "==0.7.10"
},
"attrs": {
"hashes": [
"sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450",
@@ -184,6 +191,13 @@
],
"version": "==17.4.0"
},
"babel": {
"hashes": [
"sha256:ad209a68d7162c4cff4b29cdebe3dec4cef75492df501b0049a9433c96ce6f80",
"sha256:8ce4cb6fdd4393edd323227cba3a077bceb2a6ce5201c902c65e730046f41f14"
],
"version": "==2.5.3"
},
"beautifulsoup4": {
"hashes": [
"sha256:7015e76bf32f1f574636c4288399a6de66ce08fb7b2457f628a8d70c0fbabb11",
@@ -219,6 +233,14 @@
],
"version": "==1.0.3"
},
"docutils": {
"hashes": [
"sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6",
"sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
"sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274"
],
"version": "==0.14"
},
"e1839a8": {
"editable": true,
"path": "."
@@ -236,6 +258,20 @@
],
"version": "==2.6"
},
"imagesize": {
"hashes": [
"sha256:3620cc0cadba3f7475f9940d22431fc4d407269f1be59ec9b8edcca26440cf18",
"sha256:5b326e4678b6925158ccc66a9fa3122b6106d7c876ee32d7de6ce59385b96315"
],
"version": "==1.0.0"
},
"jinja2": {
"hashes": [
"sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
"sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
],
"version": "==2.10"
},
"lxml": {
"hashes": [
"sha256:41f59cbdab232f11680d5d4dec9f2e6782fd24d78e37ee833447702e34e675f4",
@@ -269,6 +305,19 @@
],
"version": "==4.1.1"
},
"markupsafe": {
"hashes": [
"sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
],
"version": "==1.0"
},
"packaging": {
"hashes": [
"sha256:99276dc6e3a7851f32027a68f1095cd3f77c148091b092ea867a351811cfe388",
"sha256:5d50835fdf0a7edf0b55e311b7c887786504efea1177abd7e69329a8e5ea619e"
],
"version": "==16.8"
},
"parse": {
"hashes": [
"sha256:8048dde3f5ca07ad7ac7350460952d83b63eaacecdac1b37f45fd74870d849d2"
@@ -295,6 +344,25 @@
],
"version": "==1.5.2"
},
"pygments": {
"hashes": [
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
"sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
],
"version": "==2.2.0"
},
"pyparsing": {
"hashes": [
"sha256:fee43f17a9c4087e7ed1605bd6df994c6173c1e977d7ade7b651292fab2bd010",
"sha256:0832bcf47acd283788593e7a0f542407bd9550a55a8a8435214a1960e04bcb04",
"sha256:9e8143a3e15c13713506886badd96ca4b579a87fbdf49e550dbfc057d6cb218e",
"sha256:281683241b25fe9b80ec9d66017485f6deff1af5cde372469134b56ca8447a07",
"sha256:b8b3117ed9bdf45e14dcc89345ce638ec7e0e29b2b579fa1ecf32ce45ebac8a5",
"sha256:8f1e18d3fd36c6795bb7e02a39fd05c611ffc2596c1e0d995d34d67630426c18",
"sha256:e4d45427c6e20a59bf4f88c639dcc03ce30d193112047f94012102f235853a58"
],
"version": "==2.2.0"
},
"pyquery": {
"hashes": [
"sha256:07987c2ed2aed5cba29ff18af95e56e9eb04a2249f42ce47bddfb37f487229a3",
@@ -309,6 +377,20 @@
],
"version": "==3.4.1"
},
"pytz": {
"hashes": [
"sha256:ed6509d9af298b7995d69a440e2822288f2eca1681b8cce37673dbb10091e5fe",
"sha256:f93ddcdd6342f94cea379c73cddb5724e0d6d0a1c91c9bdef364dc0368ba4fda",
"sha256:61242a9abc626379574a166dc0e96a66cd7c3b27fc10868003fa210be4bff1c9",
"sha256:ba18e6a243b3625513d85239b3e49055a2f0318466e0b8a92b8fb8ca7ccdf55f",
"sha256:07edfc3d4d2705a20a6e99d97f0c4b61c800b8232dc1c04d87e8554f130148dd",
"sha256:3a47ff71597f821cd84a162e71593004286e5be07a340fd462f0d33a760782b5",
"sha256:5bd55c744e6feaa4d599a6cbd8228b4f8f9ba96de2c38d56f08e534b3c9edf0d",
"sha256:887ab5e5b32e4d0c86efddd3d055c1f363cbaa583beb8da5e22d2fa2f64d51ef",
"sha256:410bcd1d6409026fbaa65d9ed33bf6dd8b1e94a499e32168acfc7b332e4095c0"
],
"version": "==2018.3"
},
"requests": {
"hashes": [
"sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
@@ -337,12 +419,33 @@
],
"version": "==1.11.0"
},
"snowballstemmer": {
"hashes": [
"sha256:9f3bcd3c401c3e862ec0ebe6d2c069ebc012ce142cce209c098ccb5b09136e89",
"sha256:919f26a68b2c17a7634da993d91339e288964f93c274f1343e3bbbe2096e1128"
],
"version": "==1.2.1"
},
"sphinx": {
"hashes": [
"sha256:41ae26acc6130ccf6ed47e5cca73742b80d55a134f0ab897c479bba8d3640b8e",
"sha256:da987de5fcca21a4acc7f67a86a363039e67ac3e8827161e61b91deb131c0ee8"
],
"version": "==1.7.1"
},
"sphinxcontrib-websupport": {
"hashes": [
"sha256:f4932e95869599b89bf4f80fc3989132d83c9faa5bf633e7b5e0c25dffb75da2",
"sha256:7a85961326aa3a400cd4ad3c816d70ed6f7c740acd7ce5d78cd0a67825072eb9"
],
"version": "==1.0.1"
},
"tqdm": {
"hashes": [
"sha256:4c041f8019f7be65b8028ddde9a836f7ccc51c4637f1ff2ba9b5813d38d19d5a",
"sha256:df32e6f127dc0ccbc675eadb33f749abbcb8f174c5cb9ec49c0cdb73aa737377"
"sha256:f66468c14ccd011a627734c9b3fd72f20ce16f8faecc47384eb2507af5924fb9",
"sha256:5ec0d4442358e55cdb4a0471d04c6c831518fd8837f259db5537d90feab380df"
],
"version": "==4.19.5"
"version": "==4.19.6"
},
"twine": {
"hashes": [
@@ -357,6 +460,13 @@
"sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f"
],
"version": "==1.22"
},
"w3lib": {
"hashes": [
"sha256:aaf7362464532b1036ab0092e2eee78e8fd7b56787baa9ed4967457b083d011b",
"sha256:55994787e93b411c2d659068b51b9998d9d0c05e0df188e6daf8f45836e1ea38"
],
"version": "==1.19.0"
}
}
}
+20
View File
@@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = requests-html
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+36
View File
@@ -0,0 +1,36 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
set SPHINXPROJ=requests-html
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
:end
popd
+197
View File
@@ -0,0 +1,197 @@
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/stable/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
import requests_html
project = 'requests-html'
copyright = '2018, Kenneth Reitz'
author = 'Kenneth Reitz'
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
release = 'v0.3.4'
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.viewcode',
'sphinx.ext.githubpages',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path .
exclude_patterns = []
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'requests-htmldoc'
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'requests-html.tex', 'requests-html Documentation',
'Kenneth Reitz', 'manual'),
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'requests-html', 'requests-html Documentation',
[author], 1)
]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'requests-html', 'requests-html Documentation',
author, 'requests-html', 'One line description of project.',
'Miscellaneous'),
]
# -- Options for Epub output -------------------------------------------------
# Bibliographic Dublin Core info.
epub_title = project
epub_author = author
epub_publisher = author
epub_copyright = copyright
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''
# A unique identification for the text.
#
# epub_uid = ''
# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']
# -- Extension configuration -------------------------------------------------
# -- Options for intersphinx extension ---------------------------------------
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}
# -- Options for todo extension ----------------------------------------------
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True
File diff suppressed because one or more lines are too long
+21 -12
View File
@@ -22,7 +22,10 @@ DEFAULT_ENCODING = 'utf-8'
useragent = UserAgent()
class HTMLResponse(requests.Response):
"""docstring for Response"""
"""An HTML-enabled :class:`Response <Response>` object.
Same as Requests class:`Response <Response>` object, but with an
intelligent ``.html`` property added.
"""
def __init__(self, *args, **kwargs):
super(HTMLResponse, self).__init__(*args, **kwargs)
@@ -57,6 +60,7 @@ class BaseParser:
@property
def html(self):
"""Unicode representation of the HTML content."""
if self._html:
return self._html
else:
@@ -64,10 +68,14 @@ class BaseParser:
@html.setter
def set_html(self, html):
"""Property setter for self.html."""
self._html = html
@property
def encoding(self):
"""The encoding string to be used, extracted from the HTML and
:class:`HTMLResponse <HTMLResponse>` headers.
"""
if self._encoding:
return self._encoding
@@ -79,7 +87,7 @@ class BaseParser:
@property
def pq(self):
"""PyQuery representation of the element."""
"""PyQuery representation of the :class:`Element <Element>` or :class:`HTML <HTML>`."""
return PyQuery(self.element)
@property
@@ -88,16 +96,16 @@ class BaseParser:
@property
def text(self):
"""The text content of the element."""
"""The text content of the :class:`Element <Element>` or :class:`HTML <HTML>`.."""
return self.pq.text()
@property
def full_text(self):
"""The full text content (including links) of the element."""
"""The full text content (including links) of the :class:`Element <Element>` or :class:`HTML <HTML>`.."""
return self.lxml.text_content()
def find(self, selector, first=False, _encoding=None):
"""Given a jQuery selector, returns a list of element objects."""
"""Given a jQuery selector, returns a list of :class:`Element <Element>` objects."""
def gen():
for found in self.pq(selector):
yield Element(element=found, url=self.url, default_encoding=_encoding or self.encoding)
@@ -113,7 +121,7 @@ class BaseParser:
return c
def xpath(self, selector, first=False, _encoding=None):
"""Given an XPath selector, returns a list of element objects."""
"""Given an XPath selector, returns a list of :class:`Element <Element>` objects."""
c = [Element(element=e, url=self.url, default_encoding=_encoding or self.encoding) for e in self.lxml.xpath(selector)]
if first:
try:
@@ -124,11 +132,11 @@ class BaseParser:
return c
def search(self, template):
"""Searches the element for the given parse template."""
"""Searches the :class:`Element <Element>` for the given parse template."""
return parse_search(template, self.html)
def search_all(self, template):
"""Searches the element (multiple times) for the given parse
"""Searches the :class:`Element <Element>` (multiple times) for the given parse
template.
"""
return [r for r in findall(template, self.html)]
@@ -172,7 +180,7 @@ class BaseParser:
@property
def base_url(self):
"""The base URL for the page."""
"""The base URL for the page. Supports the <base> tag."""
# Support for <base> tag.
base = self.find('base', first=True)
@@ -203,7 +211,7 @@ class Element(BaseParser):
@property
def attrs(self):
"""Returns a dictionary of the attributes of the element."""
"""Returns a dictionary of the attributes of the class:`Element <Element>`."""
attrs = {k: self.pq.attr[k].strip() for k in self.element.keys()}
# Split class up, as there are ussually many of them:
@@ -214,7 +222,7 @@ class Element(BaseParser):
class HTML(BaseParser):
"""An HTML document."""
"""An HTML document, ready for parsing."""
def __init__(self, *, url, html, default_encoding=DEFAULT_ENCODING):
super(HTML, self).__init__(
@@ -272,7 +280,8 @@ class HTMLSession(requests.Session):
class BrowserHTMLSession(HTMLSession):
"""A web-browser interpreted session (for JavaScript)."""
"""A web-browser interpreted session (for JavaScript), powered by
PyQt5's QWebEngineView."""
def __init__(self, *args, **kwargs):
super(BrowserHTMLSession, self).__init__(*args, **kwargs)