mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
884 lines
102 KiB
HTML
884 lines
102 KiB
HTML
|
||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||
|
||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||
<head>
|
||
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
|
||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||
<title>requests_html — requests-HTML v0.3.4 documentation</title>
|
||
<link rel="stylesheet" href="../_static/alabaster.css" type="text/css" />
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||
<script type="text/javascript" src="../_static/documentation_options.js"></script>
|
||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||
<link rel="index" title="Index" href="../genindex.html" />
|
||
<link rel="search" title="Search" href="../search.html" />
|
||
|
||
<link rel="stylesheet" href="../_static/custom.css" type="text/css" />
|
||
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
|
||
|
||
</head><body>
|
||
|
||
|
||
<div class="document">
|
||
<div class="documentwrapper">
|
||
<div class="bodywrapper">
|
||
<div class="body" role="main">
|
||
|
||
<h1>Source code for requests_html</h1><div class="highlight"><pre>
|
||
<span></span><span class="kn">import</span> <span class="nn">sys</span>
|
||
<span class="kn">import</span> <span class="nn">asyncio</span>
|
||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="k">import</span> <span class="n">urlparse</span><span class="p">,</span> <span class="n">urlunparse</span><span class="p">,</span> <span class="n">urljoin</span>
|
||
<span class="kn">from</span> <span class="nn">concurrent.futures</span> <span class="k">import</span> <span class="n">ThreadPoolExecutor</span>
|
||
<span class="kn">from</span> <span class="nn">concurrent.futures._base</span> <span class="k">import</span> <span class="ne">TimeoutError</span>
|
||
<span class="kn">from</span> <span class="nn">functools</span> <span class="k">import</span> <span class="n">partial</span>
|
||
<span class="kn">from</span> <span class="nn">typing</span> <span class="k">import</span> <span class="n">Set</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">MutableMapping</span><span class="p">,</span> <span class="n">Optional</span>
|
||
|
||
<span class="kn">import</span> <span class="nn">pyppeteer</span>
|
||
<span class="kn">import</span> <span class="nn">requests</span>
|
||
<span class="kn">from</span> <span class="nn">pyquery</span> <span class="k">import</span> <span class="n">PyQuery</span>
|
||
|
||
<span class="kn">from</span> <span class="nn">fake_useragent</span> <span class="k">import</span> <span class="n">UserAgent</span>
|
||
<span class="kn">from</span> <span class="nn">lxml.html.clean</span> <span class="k">import</span> <span class="n">Cleaner</span>
|
||
<span class="kn">import</span> <span class="nn">lxml</span>
|
||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="k">import</span> <span class="n">etree</span>
|
||
<span class="kn">from</span> <span class="nn">lxml.html</span> <span class="k">import</span> <span class="n">HtmlElement</span>
|
||
<span class="kn">from</span> <span class="nn">lxml.html</span> <span class="k">import</span> <span class="n">tostring</span> <span class="k">as</span> <span class="n">lxml_html_tostring</span>
|
||
<span class="kn">from</span> <span class="nn">lxml.html.soupparser</span> <span class="k">import</span> <span class="n">fromstring</span> <span class="k">as</span> <span class="n">soup_parse</span>
|
||
<span class="kn">from</span> <span class="nn">parse</span> <span class="k">import</span> <span class="n">search</span> <span class="k">as</span> <span class="n">parse_search</span>
|
||
<span class="kn">from</span> <span class="nn">parse</span> <span class="k">import</span> <span class="n">findall</span><span class="p">,</span> <span class="n">Result</span>
|
||
<span class="kn">from</span> <span class="nn">w3lib.encoding</span> <span class="k">import</span> <span class="n">html_to_unicode</span>
|
||
|
||
<span class="n">DEFAULT_ENCODING</span> <span class="o">=</span> <span class="s1">'utf-8'</span>
|
||
<span class="n">DEFAULT_URL</span> <span class="o">=</span> <span class="s1">'https://example.org/'</span>
|
||
<span class="n">DEFAULT_USER_AGENT</span> <span class="o">=</span> <span class="s1">'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8'</span>
|
||
<span class="n">DEFAULT_NEXT_SYMBOL</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'next'</span><span class="p">,</span> <span class="s1">'more'</span><span class="p">,</span> <span class="s1">'older'</span><span class="p">]</span>
|
||
|
||
<span class="n">cleaner</span> <span class="o">=</span> <span class="n">Cleaner</span><span class="p">()</span>
|
||
<span class="n">cleaner</span><span class="o">.</span><span class="n">javascript</span> <span class="o">=</span> <span class="kc">True</span>
|
||
<span class="n">cleaner</span><span class="o">.</span><span class="n">style</span> <span class="o">=</span> <span class="kc">True</span>
|
||
|
||
<span class="n">useragent</span> <span class="o">=</span> <span class="kc">None</span>
|
||
|
||
<span class="c1"># Typing.</span>
|
||
<span class="n">_Find</span> <span class="o">=</span> <span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="s1">'Element'</span><span class="p">],</span> <span class="s1">'Element'</span><span class="p">]</span>
|
||
<span class="n">_XPath</span> <span class="o">=</span> <span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="s1">'Element'</span><span class="p">],</span> <span class="nb">str</span><span class="p">,</span> <span class="s1">'Element'</span><span class="p">]</span>
|
||
<span class="n">_Result</span> <span class="o">=</span> <span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="s1">'Result'</span><span class="p">],</span> <span class="s1">'Result'</span><span class="p">]</span>
|
||
<span class="n">_HTML</span> <span class="o">=</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">]</span>
|
||
<span class="n">_BaseHTML</span> <span class="o">=</span> <span class="nb">str</span>
|
||
<span class="n">_UserAgent</span> <span class="o">=</span> <span class="nb">str</span>
|
||
<span class="n">_DefaultEncoding</span> <span class="o">=</span> <span class="nb">str</span>
|
||
<span class="n">_URL</span> <span class="o">=</span> <span class="nb">str</span>
|
||
<span class="n">_RawHTML</span> <span class="o">=</span> <span class="nb">bytes</span>
|
||
<span class="n">_Encoding</span> <span class="o">=</span> <span class="nb">str</span>
|
||
<span class="n">_LXML</span> <span class="o">=</span> <span class="n">HtmlElement</span>
|
||
<span class="n">_Text</span> <span class="o">=</span> <span class="nb">str</span>
|
||
<span class="n">_Search</span> <span class="o">=</span> <span class="n">Result</span>
|
||
<span class="n">_Containing</span> <span class="o">=</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
|
||
<span class="n">_Links</span> <span class="o">=</span> <span class="n">Set</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
||
<span class="n">_Attrs</span> <span class="o">=</span> <span class="n">MutableMapping</span>
|
||
<span class="n">_Next</span> <span class="o">=</span> <span class="n">Union</span><span class="p">[</span><span class="s1">'HTML'</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
|
||
<span class="n">_NextSymbol</span> <span class="o">=</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
||
|
||
<span class="c1"># Sanity checking.</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="k">assert</span> <span class="n">sys</span><span class="o">.</span><span class="n">version_info</span><span class="o">.</span><span class="n">major</span> <span class="o">==</span> <span class="mi">3</span>
|
||
<span class="k">assert</span> <span class="n">sys</span><span class="o">.</span><span class="n">version_info</span><span class="o">.</span><span class="n">minor</span> <span class="o">></span> <span class="mi">5</span>
|
||
<span class="k">except</span> <span class="ne">AssertionError</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s1">'Requests-HTML requires Python 3.6+!'</span><span class="p">)</span>
|
||
|
||
|
||
<span class="k">class</span> <span class="nc">MaxRetries</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">message</span> <span class="o">=</span> <span class="n">message</span>
|
||
|
||
|
||
<span class="k">class</span> <span class="nc">BaseParser</span><span class="p">:</span>
|
||
<span class="sd">"""A basic HTML/Element Parser, for Humans.</span>
|
||
|
||
<span class="sd"> :param element: The element from which to base the parsing upon.</span>
|
||
<span class="sd"> :param default_encoding: Which encoding to default to.</span>
|
||
<span class="sd"> :param html: HTML from which to base the parsing upon (optional).</span>
|
||
<span class="sd"> :param url: The URL from which the HTML originated, used for ``absolute_links``.</span>
|
||
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">element</span><span class="p">,</span> <span class="n">default_encoding</span><span class="p">:</span> <span class="n">_DefaultEncoding</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">html</span><span class="p">:</span> <span class="n">_HTML</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="n">_URL</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">element</span> <span class="o">=</span> <span class="n">element</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">url</span> <span class="o">=</span> <span class="n">url</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">skip_anchors</span> <span class="o">=</span> <span class="kc">True</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">default_encoding</span> <span class="o">=</span> <span class="n">default_encoding</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_html</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">DEFAULT_ENCODING</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="k">else</span> <span class="n">html</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_lxml</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_pq</span> <span class="o">=</span> <span class="kc">None</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">raw_html</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">_RawHTML</span><span class="p">:</span>
|
||
<span class="sd">"""Bytes representation of the HTML content.</span>
|
||
<span class="sd"> (`learn more <http://www.diveintopython3.net/strings.html>`_).</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_html</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_html</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">etree</span><span class="o">.</span><span class="n">tostring</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">element</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'unicode'</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">encoding</span><span class="p">)</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">html</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">_BaseHTML</span><span class="p">:</span>
|
||
<span class="sd">"""Unicode representation of the HTML content</span>
|
||
<span class="sd"> (`learn more <http://www.diveintopython3.net/strings.html>`_).</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_html</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">raw_html</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">encoding</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">etree</span><span class="o">.</span><span class="n">tostring</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">element</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'unicode'</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||
|
||
<span class="nd">@html</span><span class="o">.</span><span class="n">setter</span>
|
||
<span class="k">def</span> <span class="nf">html</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">html</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_html</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">encoding</span><span class="p">)</span>
|
||
|
||
<span class="nd">@raw_html</span><span class="o">.</span><span class="n">setter</span>
|
||
<span class="k">def</span> <span class="nf">raw_html</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">html</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="sd">"""Property setter for self.html."""</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_html</span> <span class="o">=</span> <span class="n">html</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">encoding</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Encoding</span><span class="p">:</span>
|
||
<span class="sd">"""The encoding string to be used, extracted from the HTML and</span>
|
||
<span class="sd"> :class:`HTMLResponse <HTMLResponse>` headers.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span>
|
||
|
||
<span class="c1"># Scan meta tags for charset.</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_html</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span> <span class="o">=</span> <span class="n">html_to_unicode</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">default_encoding</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_html</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="c1"># Fall back to requests' detected encoding if decode fails.</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">raw_html</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">encoding</span><span class="p">)</span>
|
||
<span class="k">except</span> <span class="ne">UnicodeDecodeError</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">default_encoding</span>
|
||
|
||
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">default_encoding</span>
|
||
|
||
<span class="nd">@encoding</span><span class="o">.</span><span class="n">setter</span>
|
||
<span class="k">def</span> <span class="nf">encoding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">enc</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="sd">"""Property setter for self.encoding."""</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span> <span class="o">=</span> <span class="n">enc</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">pq</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyQuery</span><span class="p">:</span>
|
||
<span class="sd">"""`PyQuery <https://pythonhosted.org/pyquery/>`_ representation</span>
|
||
<span class="sd"> of the :class:`Element <Element>` or :class:`HTML <HTML>`.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_pq</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_pq</span> <span class="o">=</span> <span class="n">PyQuery</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">html</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_pq</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">lxml</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">HtmlElement</span><span class="p">:</span>
|
||
<span class="sd">"""`lxml <http://lxml.de>`_ representation of the</span>
|
||
<span class="sd"> :class:`Element <Element>` or :class:`HTML <HTML>`.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_lxml</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_lxml</span> <span class="o">=</span> <span class="n">soup_parse</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">html</span><span class="p">,</span> <span class="n">features</span><span class="o">=</span><span class="s1">'html.parser'</span><span class="p">)</span>
|
||
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_lxml</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">html</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_lxml</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">text</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Text</span><span class="p">:</span>
|
||
<span class="sd">"""The text content of the</span>
|
||
<span class="sd"> :class:`Element <Element>` or :class:`HTML <HTML>`.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">pq</span><span class="o">.</span><span class="n">text</span><span class="p">()</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">full_text</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Text</span><span class="p">:</span>
|
||
<span class="sd">"""The full text content (including links) of the</span>
|
||
<span class="sd"> :class:`Element <Element>` or :class:`HTML <HTML>`.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">lxml</span><span class="o">.</span><span class="n">text_content</span><span class="p">()</span>
|
||
|
||
<span class="k">def</span> <span class="nf">find</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">selector</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"*"</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">containing</span><span class="p">:</span> <span class="n">_Containing</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">clean</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">first</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">_encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Find</span><span class="p">:</span>
|
||
<span class="sd">"""Given a CSS Selector, returns a list of</span>
|
||
<span class="sd"> :class:`Element <Element>` objects or a single one.</span>
|
||
|
||
<span class="sd"> :param selector: CSS Selector to use.</span>
|
||
<span class="sd"> :param clean: Whether or not to sanitize the found HTML of ``<script>`` and ``<style>`` tags.</span>
|
||
<span class="sd"> :param containing: If specified, only return elements that contain the provided text.</span>
|
||
<span class="sd"> :param first: Whether or not to return just the first result.</span>
|
||
<span class="sd"> :param _encoding: The encoding format.</span>
|
||
|
||
<span class="sd"> Example CSS Selectors:</span>
|
||
|
||
<span class="sd"> - ``a``</span>
|
||
<span class="sd"> - ``a.someClass``</span>
|
||
<span class="sd"> - ``a#someID``</span>
|
||
<span class="sd"> - ``a[target=_blank]``</span>
|
||
|
||
<span class="sd"> See W3School's `CSS Selectors Reference</span>
|
||
<span class="sd"> <https://www.w3schools.com/cssref/css_selectors.asp>`_</span>
|
||
<span class="sd"> for more details.</span>
|
||
|
||
<span class="sd"> If ``first`` is ``True``, only returns the first</span>
|
||
<span class="sd"> :class:`Element <Element>` found.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="c1"># Convert a single containing into a list.</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">containing</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||
<span class="n">containing</span> <span class="o">=</span> <span class="p">[</span><span class="n">containing</span><span class="p">]</span>
|
||
|
||
<span class="n">encoding</span> <span class="o">=</span> <span class="n">_encoding</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">encoding</span>
|
||
<span class="n">elements</span> <span class="o">=</span> <span class="p">[</span>
|
||
<span class="n">Element</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">found</span><span class="p">,</span> <span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">default_encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">)</span>
|
||
<span class="k">for</span> <span class="n">found</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">pq</span><span class="p">(</span><span class="n">selector</span><span class="p">)</span>
|
||
<span class="p">]</span>
|
||
|
||
<span class="k">if</span> <span class="n">containing</span><span class="p">:</span>
|
||
<span class="n">elements_copy</span> <span class="o">=</span> <span class="n">elements</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
||
<span class="n">elements</span> <span class="o">=</span> <span class="p">[]</span>
|
||
|
||
<span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">elements_copy</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="nb">any</span><span class="p">([</span><span class="n">c</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="ow">in</span> <span class="n">element</span><span class="o">.</span><span class="n">full_text</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">containing</span><span class="p">]):</span>
|
||
<span class="n">elements</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">element</span><span class="p">)</span>
|
||
|
||
<span class="n">elements</span><span class="o">.</span><span class="n">reverse</span><span class="p">()</span>
|
||
|
||
<span class="c1"># Sanitize the found HTML.</span>
|
||
<span class="k">if</span> <span class="n">clean</span><span class="p">:</span>
|
||
<span class="n">elements_copy</span> <span class="o">=</span> <span class="n">elements</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
||
<span class="n">elements</span> <span class="o">=</span> <span class="p">[]</span>
|
||
|
||
<span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">elements_copy</span><span class="p">:</span>
|
||
<span class="n">element</span><span class="o">.</span><span class="n">raw_html</span> <span class="o">=</span> <span class="n">lxml_html_tostring</span><span class="p">(</span><span class="n">cleaner</span><span class="o">.</span><span class="n">clean_html</span><span class="p">(</span><span class="n">element</span><span class="o">.</span><span class="n">lxml</span><span class="p">))</span>
|
||
<span class="n">elements</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">element</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="n">_get_first_or_list</span><span class="p">(</span><span class="n">elements</span><span class="p">,</span> <span class="n">first</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span> <span class="nf">xpath</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">selector</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">clean</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">first</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">_encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">_XPath</span><span class="p">:</span>
|
||
<span class="sd">"""Given an XPath selector, returns a list of</span>
|
||
<span class="sd"> :class:`Element <Element>` objects or a single one.</span>
|
||
|
||
<span class="sd"> :param selector: XPath Selector to use.</span>
|
||
<span class="sd"> :param clean: Whether or not to sanitize the found HTML of ``<script>`` and ``<style>`` tags.</span>
|
||
<span class="sd"> :param first: Whether or not to return just the first result.</span>
|
||
<span class="sd"> :param _encoding: The encoding format.</span>
|
||
|
||
<span class="sd"> If a sub-selector is specified (e.g. ``//a/@href``), a simple</span>
|
||
<span class="sd"> list of results is returned.</span>
|
||
|
||
<span class="sd"> See W3School's `XPath Examples</span>
|
||
<span class="sd"> <https://www.w3schools.com/xml/xpath_examples.asp>`_</span>
|
||
<span class="sd"> for more details.</span>
|
||
|
||
<span class="sd"> If ``first`` is ``True``, only returns the first</span>
|
||
<span class="sd"> :class:`Element <Element>` found.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">selected</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lxml</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="n">selector</span><span class="p">)</span>
|
||
|
||
<span class="n">elements</span> <span class="o">=</span> <span class="p">[</span>
|
||
<span class="n">Element</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">selection</span><span class="p">,</span> <span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">default_encoding</span><span class="o">=</span><span class="n">_encoding</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">encoding</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">selection</span><span class="p">,</span> <span class="n">etree</span><span class="o">.</span><span class="n">_ElementUnicodeResult</span><span class="p">)</span> <span class="k">else</span> <span class="nb">str</span><span class="p">(</span><span class="n">selection</span><span class="p">)</span>
|
||
<span class="k">for</span> <span class="n">selection</span> <span class="ow">in</span> <span class="n">selected</span>
|
||
<span class="p">]</span>
|
||
|
||
<span class="c1"># Sanitize the found HTML.</span>
|
||
<span class="k">if</span> <span class="n">clean</span><span class="p">:</span>
|
||
<span class="n">elements_copy</span> <span class="o">=</span> <span class="n">elements</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
||
<span class="n">elements</span> <span class="o">=</span> <span class="p">[]</span>
|
||
|
||
<span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">elements_copy</span><span class="p">:</span>
|
||
<span class="n">element</span><span class="o">.</span><span class="n">raw_html</span> <span class="o">=</span> <span class="n">lxml_html_tostring</span><span class="p">(</span><span class="n">cleaner</span><span class="o">.</span><span class="n">clean_html</span><span class="p">(</span><span class="n">element</span><span class="o">.</span><span class="n">lxml</span><span class="p">))</span>
|
||
<span class="n">elements</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">element</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="n">_get_first_or_list</span><span class="p">(</span><span class="n">elements</span><span class="p">,</span> <span class="n">first</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">template</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">Result</span><span class="p">:</span>
|
||
<span class="sd">"""Search the :class:`Element <Element>` for the given Parse template.</span>
|
||
|
||
<span class="sd"> :param template: The Parse template to use.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">return</span> <span class="n">parse_search</span><span class="p">(</span><span class="n">template</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">html</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span> <span class="nf">search_all</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">template</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Result</span><span class="p">:</span>
|
||
<span class="sd">"""Search the :class:`Element <Element>` (multiple times) for the given parse</span>
|
||
<span class="sd"> template.</span>
|
||
|
||
<span class="sd"> :param template: The Parse template to use.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="p">[</span><span class="n">r</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">findall</span><span class="p">(</span><span class="n">template</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">html</span><span class="p">)]</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">links</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Links</span><span class="p">:</span>
|
||
<span class="sd">"""All found links on page, in as–is form."""</span>
|
||
|
||
<span class="k">def</span> <span class="nf">gen</span><span class="p">():</span>
|
||
<span class="k">for</span> <span class="n">link</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'a'</span><span class="p">):</span>
|
||
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="n">href</span> <span class="o">=</span> <span class="n">link</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||
<span class="k">if</span> <span class="n">href</span> <span class="ow">and</span> <span class="ow">not</span> <span class="p">(</span><span class="n">href</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'#'</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">skip_anchors</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">href</span><span class="o">.</span><span class="n">startswith</span><span class="p">((</span><span class="s1">'javascript:'</span><span class="p">,</span> <span class="s1">'mailto:'</span><span class="p">)):</span>
|
||
<span class="k">yield</span> <span class="n">href</span>
|
||
<span class="k">except</span> <span class="ne">KeyError</span><span class="p">:</span>
|
||
<span class="k">pass</span>
|
||
|
||
<span class="k">return</span> <span class="nb">set</span><span class="p">(</span><span class="n">gen</span><span class="p">())</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_make_absolute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">link</span><span class="p">):</span>
|
||
<span class="sd">"""Makes a given link absolute."""</span>
|
||
|
||
<span class="c1"># Parse the link with stdlib.</span>
|
||
<span class="n">parsed</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">link</span><span class="p">)</span><span class="o">.</span><span class="n">_asdict</span><span class="p">()</span>
|
||
|
||
<span class="c1"># If link is relative, then join it with base_url.</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">parsed</span><span class="p">[</span><span class="s1">'netloc'</span><span class="p">]:</span>
|
||
<span class="k">return</span> <span class="n">urljoin</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_url</span><span class="p">,</span> <span class="n">link</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Link is absolute; if it lacks a scheme, add one from base_url.</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">parsed</span><span class="p">[</span><span class="s1">'scheme'</span><span class="p">]:</span>
|
||
<span class="n">parsed</span><span class="p">[</span><span class="s1">'scheme'</span><span class="p">]</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_url</span><span class="p">)</span><span class="o">.</span><span class="n">scheme</span>
|
||
|
||
<span class="c1"># Reconstruct the URL to incorporate the new scheme.</span>
|
||
<span class="n">parsed</span> <span class="o">=</span> <span class="p">(</span><span class="n">v</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">parsed</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
|
||
<span class="k">return</span> <span class="n">urlunparse</span><span class="p">(</span><span class="n">parsed</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Link is absolute and complete with scheme; nothing to be done here.</span>
|
||
<span class="k">return</span> <span class="n">link</span>
|
||
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">absolute_links</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Links</span><span class="p">:</span>
|
||
<span class="sd">"""All found links on page, in absolute form</span>
|
||
<span class="sd"> (`learn more <https://www.navegabem.com/absolute-or-relative-links.html>`_).</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="nf">gen</span><span class="p">():</span>
|
||
<span class="k">for</span> <span class="n">link</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">links</span><span class="p">:</span>
|
||
<span class="k">yield</span> <span class="bp">self</span><span class="o">.</span><span class="n">_make_absolute</span><span class="p">(</span><span class="n">link</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="nb">set</span><span class="p">(</span><span class="n">gen</span><span class="p">())</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">base_url</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">_URL</span><span class="p">:</span>
|
||
<span class="sd">"""The base URL for the page. Supports the ``<base>`` tag</span>
|
||
<span class="sd"> (`learn more <https://www.w3schools.com/tags/tag_base.asp>`_)."""</span>
|
||
|
||
<span class="c1"># Support for <base> tag.</span>
|
||
<span class="n">base</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'base'</span><span class="p">,</span> <span class="n">first</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">base</span><span class="p">:</span>
|
||
<span class="n">result</span> <span class="o">=</span> <span class="n">base</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||
<span class="k">if</span> <span class="n">result</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">result</span>
|
||
|
||
<span class="c1"># Parse the url to separate out the path</span>
|
||
<span class="n">parsed</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">)</span><span class="o">.</span><span class="n">_asdict</span><span class="p">()</span>
|
||
|
||
<span class="c1"># Remove any part of the path after the last '/'</span>
|
||
<span class="n">parsed</span><span class="p">[</span><span class="s1">'path'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'/'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">parsed</span><span class="p">[</span><span class="s1">'path'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span> <span class="o">+</span> <span class="s1">'/'</span>
|
||
|
||
<span class="c1"># Reconstruct the url with the modified path</span>
|
||
<span class="n">parsed</span> <span class="o">=</span> <span class="p">(</span><span class="n">v</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">parsed</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
|
||
<span class="n">url</span> <span class="o">=</span> <span class="n">urlunparse</span><span class="p">(</span><span class="n">parsed</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="n">url</span>
|
||
|
||
|
||
<div class="viewcode-block" id="Element"><a class="viewcode-back" href="../index.html#requests_html.Element">[docs]</a><span class="k">class</span> <span class="nc">Element</span><span class="p">(</span><span class="n">BaseParser</span><span class="p">):</span>
|
||
<span class="sd">"""An element of HTML.</span>
|
||
|
||
<span class="sd"> :param element: The element from which to base the parsing upon.</span>
|
||
<span class="sd"> :param url: The URL from which the HTML originated, used for ``absolute_links``.</span>
|
||
<span class="sd"> :param default_encoding: Which encoding to default to.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="vm">__slots__</span> <span class="o">=</span> <span class="p">[</span>
|
||
<span class="s1">'element'</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">,</span> <span class="s1">'skip_anchors'</span><span class="p">,</span> <span class="s1">'default_encoding'</span><span class="p">,</span> <span class="s1">'_encoding'</span><span class="p">,</span>
|
||
<span class="s1">'_html'</span><span class="p">,</span> <span class="s1">'_lxml'</span><span class="p">,</span> <span class="s1">'_pq'</span><span class="p">,</span> <span class="s1">'_attrs'</span><span class="p">,</span> <span class="s1">'session'</span>
|
||
<span class="p">]</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">element</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="n">_URL</span><span class="p">,</span> <span class="n">default_encoding</span><span class="p">:</span> <span class="n">_DefaultEncoding</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="nb">super</span><span class="p">(</span><span class="n">Element</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">element</span><span class="p">,</span> <span class="n">url</span><span class="o">=</span><span class="n">url</span><span class="p">,</span> <span class="n">default_encoding</span><span class="o">=</span><span class="n">default_encoding</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">element</span> <span class="o">=</span> <span class="n">element</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span> <span class="o">=</span> <span class="kc">None</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="n">attrs</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'</span><span class="si">{}</span><span class="s1">=</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">attr</span><span class="p">,</span> <span class="nb">repr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="n">attr</span><span class="p">]))</span> <span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">attrs</span><span class="p">]</span>
|
||
<span class="k">return</span> <span class="s2">"<Element </span><span class="si">{}</span><span class="s2"> </span><span class="si">{}</span><span class="s2">>"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">element</span><span class="o">.</span><span class="n">tag</span><span class="p">),</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">attrs</span><span class="p">))</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">attrs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Attrs</span><span class="p">:</span>
|
||
<span class="sd">"""Returns a dictionary of the attributes of the :class:`Element <Element>`</span>
|
||
<span class="sd"> (`learn more <https://www.w3schools.com/tags/ref_attributes.asp>`_).</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span> <span class="o">=</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">element</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
|
||
|
||
<span class="c1"># Split class and rel up, as there are ussually many of them:</span>
|
||
<span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'class'</span><span class="p">,</span> <span class="s1">'rel'</span><span class="p">]:</span>
|
||
<span class="k">if</span> <span class="n">attr</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span><span class="p">[</span><span class="n">attr</span><span class="p">]</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span><span class="p">[</span><span class="n">attr</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">())</span>
|
||
</div>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span>
|
||
|
||
|
||
<div class="viewcode-block" id="HTML"><a class="viewcode-back" href="../index.html#requests_html.HTML">[docs]</a><span class="k">class</span> <span class="nc">HTML</span><span class="p">(</span><span class="n">BaseParser</span><span class="p">):</span>
|
||
<span class="sd">"""An HTML document, ready for parsing.</span>
|
||
|
||
<span class="sd"> :param url: The URL from which the HTML originated, used for ``absolute_links``.</span>
|
||
<span class="sd"> :param html: HTML from which to base the parsing upon (optional).</span>
|
||
<span class="sd"> :param default_encoding: Which encoding to default to.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">session</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s1">'HTTPSession'</span><span class="p">,</span> <span class="s1">'AsyncHTMLSession'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">DEFAULT_URL</span><span class="p">,</span> <span class="n">html</span><span class="p">:</span> <span class="n">_HTML</span><span class="p">,</span> <span class="n">default_encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">DEFAULT_ENCODING</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
|
||
<span class="c1"># Convert incoming unicode HTML into bytes.</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||
<span class="n">html</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">DEFAULT_ENCODING</span><span class="p">)</span>
|
||
|
||
<span class="nb">super</span><span class="p">(</span><span class="n">HTML</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
|
||
<span class="c1"># Convert unicode HTML to bytes.</span>
|
||
<span class="n">element</span><span class="o">=</span><span class="n">PyQuery</span><span class="p">(</span><span class="n">html</span><span class="p">)(</span><span class="s1">'html'</span><span class="p">)</span> <span class="ow">or</span> <span class="n">PyQuery</span><span class="p">(</span><span class="n">f</span><span class="s1">'<html></span><span class="si">{html}</span><span class="s1"></html>'</span><span class="p">)(</span><span class="s1">'html'</span><span class="p">),</span>
|
||
<span class="n">html</span><span class="o">=</span><span class="n">html</span><span class="p">,</span>
|
||
<span class="n">url</span><span class="o">=</span><span class="n">url</span><span class="p">,</span>
|
||
<span class="n">default_encoding</span><span class="o">=</span><span class="n">default_encoding</span>
|
||
<span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">session</span> <span class="o">=</span> <span class="n">session</span> <span class="ow">or</span> <span class="n">HTMLSession</span><span class="p">()</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">page</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">next_symbol</span> <span class="o">=</span> <span class="n">DEFAULT_NEXT_SYMBOL</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">f</span><span class="s2">"<HTML url=</span><span class="si">{self.url!r}</span><span class="s2">>"</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_next</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fetch</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">next_symbol</span><span class="p">:</span> <span class="n">_NextSymbol</span> <span class="o">=</span> <span class="n">DEFAULT_NEXT_SYMBOL</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Next</span><span class="p">:</span>
|
||
<span class="sd">"""Attempts to find the next page, if there is one. If ``fetch``</span>
|
||
<span class="sd"> is ``True`` (default), returns :class:`HTML <HTML>` object of</span>
|
||
<span class="sd"> next page. If ``fetch`` is ``False``, simply returns the next URL.</span>
|
||
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="nf">get_next</span><span class="p">():</span>
|
||
<span class="n">candidates</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'a'</span><span class="p">,</span> <span class="n">containing</span><span class="o">=</span><span class="n">next_symbol</span><span class="p">)</span>
|
||
|
||
<span class="k">for</span> <span class="n">candidate</span> <span class="ow">in</span> <span class="n">candidates</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">):</span>
|
||
<span class="c1"># Support 'next' rel (e.g. reddit).</span>
|
||
<span class="k">if</span> <span class="s1">'next'</span> <span class="ow">in</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'rel'</span><span class="p">,</span> <span class="p">[]):</span>
|
||
<span class="k">return</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||
|
||
<span class="c1"># Support 'next' in classnames.</span>
|
||
<span class="k">for</span> <span class="n">_class</span> <span class="ow">in</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'class'</span><span class="p">,</span> <span class="p">[]):</span>
|
||
<span class="k">if</span> <span class="s1">'next'</span> <span class="ow">in</span> <span class="n">_class</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||
|
||
<span class="k">if</span> <span class="s1">'page'</span> <span class="ow">in</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]:</span>
|
||
<span class="k">return</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="c1"># Resort to the last candidate.</span>
|
||
<span class="k">return</span> <span class="n">candidates</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||
<span class="k">except</span> <span class="ne">IndexError</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="kc">None</span>
|
||
|
||
<span class="n">__next</span> <span class="o">=</span> <span class="n">get_next</span><span class="p">()</span>
|
||
<span class="k">if</span> <span class="n">__next</span><span class="p">:</span>
|
||
<span class="n">url</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_make_absolute</span><span class="p">(</span><span class="n">__next</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="kc">None</span>
|
||
|
||
<span class="k">if</span> <span class="n">fetch</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">url</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
|
||
<span class="nb">next</span> <span class="o">=</span> <span class="bp">self</span>
|
||
|
||
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
|
||
<span class="k">yield</span> <span class="nb">next</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="nb">next</span> <span class="o">=</span> <span class="nb">next</span><span class="o">.</span><span class="n">_next</span><span class="p">(</span><span class="n">fetch</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">next_symbol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">next_symbol</span><span class="p">)</span><span class="o">.</span><span class="n">html</span>
|
||
<span class="k">except</span> <span class="ne">AttributeError</span><span class="p">:</span>
|
||
<span class="k">break</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__next__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_next</span><span class="p">(</span><span class="n">fetch</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">next_symbol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">next_symbol</span><span class="p">)</span><span class="o">.</span><span class="n">html</span>
|
||
|
||
<span class="k">def</span> <span class="nf">add_next_symbol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">next_symbol</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">next_symbol</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">next_symbol</span><span class="p">)</span>
|
||
|
||
<div class="viewcode-block" id="HTML.render"><a class="viewcode-back" href="../index.html#requests_html.HTML.render">[docs]</a> <span class="k">def</span> <span class="nf">render</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">retries</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span> <span class="n">script</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">wait</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.2</span><span class="p">,</span> <span class="n">scrolldown</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">sleep</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">reload</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="mf">8.0</span><span class="p">,</span> <span class="n">keep_page</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
|
||
<span class="sd">"""Reloads the response in Chromium, and replaces HTML content</span>
|
||
<span class="sd"> with an updated version, with JavaScript executed.</span>
|
||
|
||
<span class="sd"> :param retries: The number of times to retry loading the page in Chromium.</span>
|
||
<span class="sd"> :param script: JavaScript to execute upon page load (optional).</span>
|
||
<span class="sd"> :param wait: The number of seconds to wait before loading the page, preventing timeouts (optional).</span>
|
||
<span class="sd"> :param scrolldown: Integer, if provided, of how many times to page down.</span>
|
||
<span class="sd"> :param sleep: Integer, if provided, of how many long to sleep after initial render.</span>
|
||
<span class="sd"> :param reload: If ``False``, content will not be loaded from the browser, but will be provided from memory.</span>
|
||
<span class="sd"> :param keep_page: If ``True`` will allow you to interact with the browser page through ``r.html.page``.</span>
|
||
|
||
<span class="sd"> If ``scrolldown`` is specified, the page will scrolldown the specified</span>
|
||
<span class="sd"> number of times, after sleeping the specified amount of time</span>
|
||
<span class="sd"> (e.g. ``scrolldown=10, sleep=1``).</span>
|
||
|
||
<span class="sd"> If just ``sleep`` is provided, the rendering will wait *n* seconds, before</span>
|
||
<span class="sd"> returning.</span>
|
||
|
||
<span class="sd"> If ``script`` is specified, it will execute the provided JavaScript at</span>
|
||
<span class="sd"> runtime. Example:</span>
|
||
|
||
<span class="sd"> .. code-block:: python</span>
|
||
|
||
<span class="sd"> script = \"\"\"</span>
|
||
<span class="sd"> () => {</span>
|
||
<span class="sd"> return {</span>
|
||
<span class="sd"> width: document.documentElement.clientWidth,</span>
|
||
<span class="sd"> height: document.documentElement.clientHeight,</span>
|
||
<span class="sd"> deviceScaleFactor: window.devicePixelRatio,</span>
|
||
<span class="sd"> }</span>
|
||
<span class="sd"> }</span>
|
||
<span class="sd"> \"\"\"</span>
|
||
|
||
<span class="sd"> Returns the return value of the executed ``script``, if any is provided:</span>
|
||
|
||
<span class="sd"> .. code-block:: python</span>
|
||
|
||
<span class="sd"> >>> r.html.render(script=script)</span>
|
||
<span class="sd"> {'width': 800, 'height': 600, 'deviceScaleFactor': 1}</span>
|
||
|
||
<span class="sd"> Warning: If you use keep_page, you're responsable for closing each page, since</span>
|
||
<span class="sd"> opening to many at scale may crach the browser.</span>
|
||
|
||
<span class="sd"> Warning: the first time you run this method, it will download</span>
|
||
<span class="sd"> Chromium into your home directory (``~/.pyppeteer``).</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">async</span> <span class="k">def</span> <span class="nf">_async_render</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">script</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">scrolldown</span><span class="p">,</span> <span class="n">sleep</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">wait</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">reload</span><span class="p">,</span> <span class="n">content</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">timeout</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">keep_page</span><span class="p">:</span> <span class="nb">bool</span><span class="p">):</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="n">page</span> <span class="o">=</span> <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="o">.</span><span class="n">browser</span><span class="o">.</span><span class="n">newPage</span><span class="p">()</span>
|
||
|
||
<span class="c1"># Wait before rendering the page, to prevent timeouts.</span>
|
||
<span class="k">await</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">wait</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Load the given page (GET request, obviously.)</span>
|
||
<span class="k">if</span> <span class="n">reload</span><span class="p">:</span>
|
||
<span class="k">await</span> <span class="n">page</span><span class="o">.</span><span class="n">goto</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">options</span><span class="o">=</span><span class="p">{</span><span class="s1">'timeout'</span><span class="p">:</span> <span class="nb">int</span><span class="p">(</span><span class="n">timeout</span> <span class="o">*</span> <span class="mi">1000</span><span class="p">)})</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">await</span> <span class="n">page</span><span class="o">.</span><span class="n">goto</span><span class="p">(</span><span class="n">f</span><span class="s1">'data:text/html,</span><span class="si">{self.html}</span><span class="s1">'</span><span class="p">,</span> <span class="n">options</span><span class="o">=</span><span class="p">{</span><span class="s1">'timeout'</span><span class="p">:</span> <span class="nb">int</span><span class="p">(</span><span class="n">timeout</span> <span class="o">*</span> <span class="mi">1000</span><span class="p">)})</span>
|
||
|
||
<span class="n">result</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="k">if</span> <span class="n">script</span><span class="p">:</span>
|
||
<span class="n">result</span> <span class="o">=</span> <span class="k">await</span> <span class="n">page</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">script</span><span class="p">)</span>
|
||
|
||
<span class="k">if</span> <span class="n">scrolldown</span><span class="p">:</span>
|
||
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">scrolldown</span><span class="p">):</span>
|
||
<span class="k">await</span> <span class="n">page</span><span class="o">.</span><span class="n">_keyboard</span><span class="o">.</span><span class="n">down</span><span class="p">(</span><span class="s1">'PageDown'</span><span class="p">)</span>
|
||
<span class="k">await</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">sleep</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">await</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">sleep</span><span class="p">)</span>
|
||
|
||
<span class="k">if</span> <span class="n">scrolldown</span><span class="p">:</span>
|
||
<span class="k">await</span> <span class="n">page</span><span class="o">.</span><span class="n">_keyboard</span><span class="o">.</span><span class="n">up</span><span class="p">(</span><span class="s1">'PageDown'</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Return the content of the page, JavaScript evaluated.</span>
|
||
<span class="n">content</span> <span class="o">=</span> <span class="k">await</span> <span class="n">page</span><span class="o">.</span><span class="n">content</span><span class="p">()</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">keep_page</span><span class="p">:</span>
|
||
<span class="k">await</span> <span class="n">page</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||
<span class="n">page</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="k">return</span> <span class="n">content</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">page</span>
|
||
<span class="k">except</span> <span class="ne">TimeoutError</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="kc">None</span>
|
||
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="o">.</span><span class="n">browser</span> <span class="c1"># Automatycally create a event loop and browser</span>
|
||
<span class="n">content</span> <span class="o">=</span> <span class="kc">None</span>
|
||
|
||
<span class="c1"># Automatically set Reload to False, if example URL is being used.</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">url</span> <span class="o">==</span> <span class="n">DEFAULT_URL</span><span class="p">:</span>
|
||
<span class="n">reload</span> <span class="o">=</span> <span class="kc">False</span>
|
||
|
||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">retries</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">content</span><span class="p">:</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
|
||
<span class="n">content</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">page</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="o">.</span><span class="n">loop</span><span class="o">.</span><span class="n">run_until_complete</span><span class="p">(</span><span class="n">_async_render</span><span class="p">(</span><span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">script</span><span class="o">=</span><span class="n">script</span><span class="p">,</span> <span class="n">sleep</span><span class="o">=</span><span class="n">sleep</span><span class="p">,</span> <span class="n">wait</span><span class="o">=</span><span class="n">wait</span><span class="p">,</span> <span class="n">content</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">html</span><span class="p">,</span> <span class="n">reload</span><span class="o">=</span><span class="n">reload</span><span class="p">,</span> <span class="n">scrolldown</span><span class="o">=</span><span class="n">scrolldown</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">,</span> <span class="n">keep_page</span><span class="o">=</span><span class="n">keep_page</span><span class="p">))</span>
|
||
<span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
|
||
<span class="k">pass</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">break</span>
|
||
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">content</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="n">MaxRetries</span><span class="p">(</span><span class="s2">"Unable to render the page. Try increasing timeout"</span><span class="p">)</span>
|
||
|
||
<span class="n">html</span> <span class="o">=</span> <span class="n">HTML</span><span class="p">(</span><span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">html</span><span class="o">=</span><span class="n">content</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">DEFAULT_ENCODING</span><span class="p">),</span> <span class="n">default_encoding</span><span class="o">=</span><span class="n">DEFAULT_ENCODING</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">html</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">page</span> <span class="o">=</span> <span class="n">page</span></div></div>
|
||
<span class="k">return</span> <span class="n">result</span>
|
||
|
||
|
||
<span class="k">class</span> <span class="nc">HTMLResponse</span><span class="p">(</span><span class="n">requests</span><span class="o">.</span><span class="n">Response</span><span class="p">):</span>
|
||
<span class="sd">"""An HTML-enabled :class:`requests.Response <requests.Response>` object.</span>
|
||
<span class="sd"> Effectively the same, but with an intelligent ``.html`` property added.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s1">'HTMLSession'</span><span class="p">,</span> <span class="s1">'AsyncHTMLSession'</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="nb">super</span><span class="p">(</span><span class="n">HTMLResponse</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_html</span> <span class="o">=</span> <span class="kc">None</span> <span class="c1"># type: HTML</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">session</span> <span class="o">=</span> <span class="n">session</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">html</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">HTML</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_html</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_html</span> <span class="o">=</span> <span class="n">HTML</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="p">,</span> <span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">html</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">content</span><span class="p">,</span> <span class="n">default_encoding</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">encoding</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_html</span>
|
||
|
||
<span class="nd">@classmethod</span>
|
||
<span class="k">def</span> <span class="nf">_from_response</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">response</span><span class="p">,</span> <span class="n">session</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s1">'HTMLSession'</span><span class="p">,</span> <span class="s1">'AsyncHTMLSession'</span><span class="p">]):</span>
|
||
<span class="n">html_r</span> <span class="o">=</span> <span class="bp">cls</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">)</span>
|
||
<span class="n">html_r</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">html_r</span>
|
||
|
||
|
||
<div class="viewcode-block" id="user_agent"><a class="viewcode-back" href="../index.html#requests_html.user_agent">[docs]</a><span class="k">def</span> <span class="nf">user_agent</span><span class="p">(</span><span class="n">style</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">_UserAgent</span><span class="p">:</span>
|
||
<span class="sd">"""Returns an apparently legit user-agent, if not requested one of a specific</span>
|
||
<span class="sd"> style. Defaults to a Chrome-style User-Agent.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">global</span> <span class="n">useragent</span>
|
||
<span class="k">if</span> <span class="p">(</span><span class="ow">not</span> <span class="n">useragent</span><span class="p">)</span> <span class="ow">and</span> <span class="n">style</span><span class="p">:</span>
|
||
<span class="n">useragent</span> <span class="o">=</span> <span class="n">UserAgent</span><span class="p">()</span>
|
||
</div>
|
||
<span class="k">return</span> <span class="n">useragent</span><span class="p">[</span><span class="n">style</span><span class="p">]</span> <span class="k">if</span> <span class="n">style</span> <span class="k">else</span> <span class="n">DEFAULT_USER_AGENT</span>
|
||
|
||
|
||
<span class="k">def</span> <span class="nf">_get_first_or_list</span><span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="n">first</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="n">first</span><span class="p">:</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">l</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="k">except</span> <span class="ne">IndexError</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="kc">None</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">l</span>
|
||
|
||
|
||
<div class="viewcode-block" id="HTMLSession"><a class="viewcode-back" href="../index.html#requests_html.HTMLSession">[docs]</a><span class="k">class</span> <span class="nc">HTMLSession</span><span class="p">(</span><span class="n">requests</span><span class="o">.</span><span class="n">Session</span><span class="p">):</span>
|
||
<span class="sd">"""A consumable session, for cookie persistence and connection pooling,</span>
|
||
<span class="sd"> amongst other things.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">mock_browser</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||
<span class="nb">super</span><span class="p">(</span><span class="n">HTMLSession</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||
|
||
<span class="c1"># Mock a web browser's user agent.</span>
|
||
<span class="k">if</span> <span class="n">mock_browser</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">headers</span><span class="p">[</span><span class="s1">'User-Agent'</span><span class="p">]</span> <span class="o">=</span> <span class="n">user_agent</span><span class="p">()</span>
|
||
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">hooks</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'response'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_response</span><span class="p">}</span>
|
||
|
||
<span class="nd">@staticmethod</span>
|
||
<span class="k">def</span> <span class="nf">_handle_response</span><span class="p">(</span><span class="n">response</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-></span> <span class="n">HTMLResponse</span><span class="p">:</span>
|
||
<span class="sd">"""Requests HTTP Response handler. Attaches .html property to</span>
|
||
<span class="sd"> class:`requests.Response <requests.Response>` objects.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">response</span><span class="o">.</span><span class="n">encoding</span><span class="p">:</span>
|
||
<span class="n">response</span><span class="o">.</span><span class="n">encoding</span> <span class="o">=</span> <span class="n">DEFAULT_ENCODING</span>
|
||
|
||
<span class="k">return</span> <span class="n">response</span>
|
||
|
||
<div class="viewcode-block" id="HTMLSession.request"><a class="viewcode-back" href="../index.html#requests_html.HTMLSession.request">[docs]</a> <span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-></span> <span class="n">HTMLResponse</span><span class="p">:</span>
|
||
<span class="sd">"""Makes an HTTP Request, with mocked User–Agent headers.</span>
|
||
<span class="sd"> Returns a class:`HTTPResponse <HTTPResponse>`.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="c1"># Convert Request object into HTTPRequest object.</span>
|
||
<span class="n">r</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="n">HTMLSession</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">request</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||
</div>
|
||
<span class="k">return</span> <span class="n">HTMLResponse</span><span class="o">.</span><span class="n">_from_response</span><span class="p">(</span><span class="n">r</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">browser</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s2">"_browser"</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">loop</span> <span class="o">=</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">get_event_loop</span><span class="p">()</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_browser</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loop</span><span class="o">.</span><span class="n">run_until_complete</span><span class="p">(</span><span class="n">pyppeteer</span><span class="o">.</span><span class="n">launch</span><span class="p">(</span><span class="n">headless</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">[</span><span class="s1">'--no-sandbox'</span><span class="p">]))</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_browser</span>
|
||
|
||
<div class="viewcode-block" id="HTMLSession.close"><a class="viewcode-back" href="../index.html#requests_html.HTMLSession.close">[docs]</a> <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="sd">""" If a browser was created close it first. """</span>
|
||
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s2">"_browser"</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">loop</span><span class="o">.</span><span class="n">run_until_complete</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_browser</span><span class="o">.</span><span class="n">close</span><span class="p">())</span></div></div>
|
||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||
|
||
|
||
<span class="k">class</span> <span class="nc">AsyncHTMLSession</span><span class="p">(</span><span class="n">requests</span><span class="o">.</span><span class="n">Session</span><span class="p">):</span>
|
||
<span class="sd">""" An async consumable session. """</span>
|
||
|
||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">loop</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">workers</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||
<span class="n">mock_browser</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||
<span class="sd">""" Set or create an event loop and a thread pool.</span>
|
||
|
||
<span class="sd"> :param loop: Asyncio lopp to use.</span>
|
||
<span class="sd"> :param workers: Amount of threads to use for executing async calls.</span>
|
||
<span class="sd"> If not pass it will default to the number of processors on the</span>
|
||
<span class="sd"> machine, multiplied by 5. """</span>
|
||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Mock a web browser's user agent.</span>
|
||
<span class="k">if</span> <span class="n">mock_browser</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">headers</span><span class="p">[</span><span class="s1">'User-Agent'</span><span class="p">]</span> <span class="o">=</span> <span class="n">user_agent</span><span class="p">()</span>
|
||
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">hooks</span><span class="p">[</span><span class="s1">'response'</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">response_hook</span><span class="p">)</span>
|
||
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">loop</span> <span class="o">=</span> <span class="n">loop</span> <span class="ow">or</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">get_event_loop</span><span class="p">()</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">thread_pool</span> <span class="o">=</span> <span class="n">ThreadPoolExecutor</span><span class="p">(</span><span class="n">max_workers</span><span class="o">=</span><span class="n">workers</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span> <span class="nf">response_hook</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">response</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-></span> <span class="n">HTMLResponse</span><span class="p">:</span>
|
||
<span class="sd">""" Change response enconding and replace it by a HTMLResponse. """</span>
|
||
<span class="n">response</span><span class="o">.</span><span class="n">encoding</span> <span class="o">=</span> <span class="n">DEFAULT_ENCODING</span>
|
||
<span class="k">return</span> <span class="n">HTMLResponse</span><span class="o">.</span><span class="n">_from_response</span><span class="p">(</span><span class="n">response</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||
<span class="sd">""" Partial original request func and run it in a thread. """</span>
|
||
<span class="n">func</span> <span class="o">=</span> <span class="n">partial</span><span class="p">(</span><span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">request</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">loop</span><span class="o">.</span><span class="n">run_in_executor</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">thread_pool</span><span class="p">,</span> <span class="n">func</span><span class="p">)</span>
|
||
</pre></div>
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||
<div class="sphinxsidebarwrapper"><p class="logo">
|
||
<a href="../index.html">
|
||
<img class="logo" src="../_static/requests-html-logo.png" title="https://kennethreitz.org/tattoos"/>
|
||
</a>
|
||
</p>
|
||
|
||
<p>
|
||
<iframe src="https://ghbtns.com/github-btn.html?user=kennethreitz&repo=requests-html&type=watch&count=true&size=large"
|
||
allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
|
||
</p>
|
||
|
||
<p>
|
||
<strong>Requests-HTML</strong> intends to make parsing HTML (e.g. scraping the web) as
|
||
simple and intuitive as possible.
|
||
</p>
|
||
|
||
<h3>Stay Informed</h3>
|
||
<p>Receive updates on new releases and upcoming projects.</p>
|
||
|
||
<p><iframe src="https://ghbtns.com/github-btn.html?user=kennethreitz&type=follow&count=false"
|
||
allowtransparency="true" frameborder="0" scrolling="0" width="200" height="20"></iframe></p>
|
||
|
||
<p><a href="https://twitter.com/kennethreitz" class="twitter-follow-button" data-show-count="false">Follow @kennethreitz</a> <script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script></p>
|
||
<p><a href="https://saythanks.io/to/kennethreitz">Say Thanks!</a></p>
|
||
<p><a href="http://tinyletter.com/kennethreitz">Join Mailing List</a>.</p>
|
||
|
||
<h3>Other Projects</h3>
|
||
|
||
<p>More <a href="http://kennethreitz.org/">Kenneth Reitz</a> projects:</p>
|
||
<ul>
|
||
<li><a href="https://python-requests.org/">python-requests.org</a></li>
|
||
<li><a href="http://howtopython.org/">howtopython.org</a></li>
|
||
<li><a href="http://pipenv.org/">pipenv</a></li>
|
||
<li><a href="http://pep8.org/">pep8.org</a></li>
|
||
<li><a href="http://httpbin.org/">httpbin.org</a></li>
|
||
<li><a href="http://python-guide.org">The Python Guide</a></li>
|
||
<li><a href="https://github.com/kennethreitz/maya">Maya: Datetimes for Humans</a></li>
|
||
<li><a href="https://github.com/kennethreitz/records">Records: SQL for Humans</a></li>
|
||
<li><a href="http://www.git-legit.org">Legit: Git for Humans</a></li>
|
||
<li><a href="http://docs.python-tablib.org/en/latest/">Tablib: Tabular Datasets</a></li>
|
||
</ul><div class="relations">
|
||
<h3>Related Topics</h3>
|
||
<ul>
|
||
<li><a href="../index.html">Documentation overview</a><ul>
|
||
<li><a href="index.html">Module code</a><ul>
|
||
</ul></li>
|
||
</ul></li>
|
||
</ul>
|
||
</div>
|
||
<div id="searchbox" style="display: none" role="search">
|
||
<h3>Quick search</h3>
|
||
<div class="searchformwrapper">
|
||
<form class="search" action="../search.html" method="get">
|
||
<input type="text" name="q" />
|
||
<input type="submit" value="Go" />
|
||
<input type="hidden" name="check_keywords" value="yes" />
|
||
<input type="hidden" name="area" value="default" />
|
||
</form>
|
||
</div>
|
||
</div>
|
||
<script type="text/javascript">$('#searchbox').show(0);</script><!-- Alabaster (krTheme++) Hacks -->
|
||
|
||
|
||
<!-- CSS Adjustments (I'm very picky.) -->
|
||
<style type="text/css">
|
||
|
||
/* Rezzy requires precise alignment. */
|
||
img.logo {margin-left: -20px!important;}
|
||
|
||
/* "Quick Search" should be capitalized. */
|
||
div#searchbox h3 {text-transform: capitalize;}
|
||
|
||
/* Make the document a little wider, less code is cut-off. */
|
||
div.document {width: 1008px;}
|
||
|
||
/* Much-improved spacing around code blocks. */
|
||
div.highlight pre {padding: 11px 14px;}
|
||
|
||
/* Remain Responsive! */
|
||
@media screen and (max-width: 1008px) {
|
||
div.sphinxsidebar {display: none;}
|
||
div.document {width: 100%!important;}
|
||
|
||
/* Have code blocks escape the document right-margin. */
|
||
div.highlight pre {margin-right: -30px;}
|
||
}
|
||
|
||
</style>
|
||
|
||
|
||
<!-- Analytics tracking for Kenneth. -->
|
||
<script type="text/javascript">
|
||
var _gauges = _gauges || [];
|
||
(function() {
|
||
var t = document.createElement('script');
|
||
t.type = 'text/javascript';
|
||
t.async = true;
|
||
t.id = 'gauges-tracker';
|
||
t.setAttribute('data-site-id', '5a956183ba4ae36e18000033');
|
||
t.setAttribute('data-track-path', 'https://track.gaug.es/track.gif');
|
||
t.src = 'https://d2fuc4clr7gvcn.cloudfront.net/track.js';
|
||
var s = document.getElementsByTagName('script')[0];
|
||
s.parentNode.insertBefore(t, s);
|
||
})();
|
||
</script>
|
||
|
||
|
||
<!-- That was not a hack. That was art. -->
|
||
</div>
|
||
</div>
|
||
<div class="clearer"></div>
|
||
</div>
|
||
<div class="footer">
|
||
©MMXVIII. A <a href="http://kennethreitz.com/pages/open-projects.html">Kenneth Reitz</a> Project.
|
||
|
||
</div>
|
||
|
||
|
||
<a href="https://github.com/kennethreitz/requests-html" class="github">
|
||
<img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png" alt="Fork me on GitHub" class="github"/>
|
||
</a>
|
||
|
||
|
||
|
||
</body>
|
||
</html> |