mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
updates
This commit is contained in:
+108
-66
@@ -56,6 +56,7 @@
|
||||
<span class="n">DEFAULT_ENCODING</span> <span class="o">=</span> <span class="s1">'utf-8'</span>
|
||||
<span class="n">DEFAULT_URL</span> <span class="o">=</span> <span class="s1">'https://example.org/'</span>
|
||||
<span class="n">DEFAULT_USER_AGENT</span> <span class="o">=</span> <span class="s1">'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8'</span>
|
||||
<span class="n">DEFAULT_NEXT_SYMBOL</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'next'</span><span class="p">,</span> <span class="s1">'more'</span><span class="p">,</span> <span class="s1">'older'</span><span class="p">]</span>
|
||||
|
||||
<span class="n">cleaner</span> <span class="o">=</span> <span class="n">Cleaner</span><span class="p">()</span>
|
||||
<span class="n">cleaner</span><span class="o">.</span><span class="n">javascript</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
@@ -81,6 +82,7 @@
|
||||
<span class="n">_Links</span> <span class="o">=</span> <span class="n">Set</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
||||
<span class="n">_Attrs</span> <span class="o">=</span> <span class="n">MutableMapping</span>
|
||||
<span class="n">_Next</span> <span class="o">=</span> <span class="n">Union</span><span class="p">[</span><span class="s1">'HTML'</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
|
||||
<span class="n">_NextSymbol</span> <span class="o">=</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
||||
|
||||
<span class="c1"># Sanity checking.</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
@@ -90,6 +92,12 @@
|
||||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s1">'Requests-HTML requires Python 3.6+!'</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">MaxRetries</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">message</span> <span class="o">=</span> <span class="n">message</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">BaseParser</span><span class="p">:</span>
|
||||
<span class="sd">"""A basic HTML/Element Parser, for Humans.</span>
|
||||
|
||||
@@ -100,9 +108,8 @@
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">element</span><span class="p">,</span> <span class="n">session</span><span class="p">:</span> <span class="s1">'HTTPSession'</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">default_encoding</span><span class="p">:</span> <span class="n">_DefaultEncoding</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">html</span><span class="p">:</span> <span class="n">_HTML</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="n">_URL</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">element</span><span class="p">,</span> <span class="n">default_encoding</span><span class="p">:</span> <span class="n">_DefaultEncoding</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">html</span><span class="p">:</span> <span class="n">_HTML</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="n">_URL</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">element</span> <span class="o">=</span> <span class="n">element</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">session</span> <span class="o">=</span> <span class="n">session</span> <span class="ow">or</span> <span class="n">HTMLSession</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">url</span> <span class="o">=</span> <span class="n">url</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">skip_anchors</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">default_encoding</span> <span class="o">=</span> <span class="n">default_encoding</span>
|
||||
@@ -151,6 +158,12 @@
|
||||
<span class="c1"># Scan meta tags for charset.</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_html</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span> <span class="o">=</span> <span class="n">html_to_unicode</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">default_encoding</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_html</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="c1"># Fall back to requests' detected encoding if decode fails.</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">raw_html</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">encoding</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="ne">UnicodeDecodeError</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">default_encoding</span>
|
||||
|
||||
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_encoding</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">default_encoding</span>
|
||||
|
||||
@@ -196,47 +209,6 @@
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">lxml</span><span class="o">.</span><span class="n">text_content</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">next</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fetch</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Next</span><span class="p">:</span>
|
||||
<span class="sd">"""Attempts to find the next page, if there is one. If ``fetch``</span>
|
||||
<span class="sd"> is ``True`` (default), returns :class:`HTML <HTML>` object of</span>
|
||||
<span class="sd"> next page. If ``fetch`` is ``False``, simply returns the next URL.</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_next</span><span class="p">():</span>
|
||||
<span class="n">candidates</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'a'</span><span class="p">,</span> <span class="n">containing</span><span class="o">=</span><span class="p">(</span><span class="s1">'next'</span><span class="p">,</span> <span class="s1">'more'</span><span class="p">,</span> <span class="s1">'older'</span><span class="p">))</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">candidate</span> <span class="ow">in</span> <span class="n">candidates</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">):</span>
|
||||
<span class="c1"># Support 'next' rel (e.g. reddit).</span>
|
||||
<span class="k">if</span> <span class="s1">'next'</span> <span class="ow">in</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'rel'</span><span class="p">,</span> <span class="p">[]):</span>
|
||||
<span class="k">return</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||||
|
||||
<span class="c1"># Support 'next' in classnames.</span>
|
||||
<span class="k">for</span> <span class="n">_class</span> <span class="ow">in</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'class'</span><span class="p">,</span> <span class="p">[]):</span>
|
||||
<span class="k">if</span> <span class="s1">'next'</span> <span class="ow">in</span> <span class="n">_class</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">'page'</span> <span class="ow">in</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]:</span>
|
||||
<span class="k">return</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="c1"># Resort to the last candidate.</span>
|
||||
<span class="k">return</span> <span class="n">candidates</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||||
<span class="k">except</span> <span class="ne">IndexError</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
<span class="nb">next</span> <span class="o">=</span> <span class="n">get_next</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="nb">next</span><span class="p">:</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_make_absolute</span><span class="p">(</span><span class="nb">next</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">fetch</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">url</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">find</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">selector</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"*"</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">containing</span><span class="p">:</span> <span class="n">_Containing</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">clean</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">first</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">_encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Find</span><span class="p">:</span>
|
||||
<span class="sd">"""Given a CSS Selector, returns a list of</span>
|
||||
<span class="sd"> :class:`Element <Element>` objects or a single one.</span>
|
||||
@@ -356,7 +328,7 @@
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">href</span> <span class="o">=</span> <span class="n">link</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">href</span> <span class="ow">and</span> <span class="ow">not</span> <span class="p">(</span><span class="n">href</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'#'</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">skip_anchors</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">href</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'javascript:'</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">href</span> <span class="ow">and</span> <span class="ow">not</span> <span class="p">(</span><span class="n">href</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'#'</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">skip_anchors</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">href</span><span class="o">.</span><span class="n">startswith</span><span class="p">((</span><span class="s1">'javascript:'</span><span class="p">,</span> <span class="s1">'mailto:'</span><span class="p">)):</span>
|
||||
<span class="k">yield</span> <span class="n">href</span>
|
||||
<span class="k">except</span> <span class="ne">KeyError</span><span class="p">:</span>
|
||||
<span class="k">pass</span>
|
||||
@@ -405,7 +377,7 @@
|
||||
<span class="c1"># Support for <base> tag.</span>
|
||||
<span class="n">base</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'base'</span><span class="p">,</span> <span class="n">first</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">base</span><span class="p">:</span>
|
||||
<span class="n">result</span> <span class="o">=</span> <span class="n">base</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="n">result</span> <span class="o">=</span> <span class="n">base</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">result</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">result</span>
|
||||
|
||||
@@ -413,7 +385,7 @@
|
||||
<span class="n">parsed</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">)</span><span class="o">.</span><span class="n">_asdict</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># Remove any part of the path after the last '/'</span>
|
||||
<span class="n">path</span> <span class="o">=</span> <span class="s1">'/'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">parsed</span><span class="p">[</span><span class="s1">'path'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
|
||||
<span class="n">parsed</span><span class="p">[</span><span class="s1">'path'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'/'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">parsed</span><span class="p">[</span><span class="s1">'path'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span> <span class="o">+</span> <span class="s1">'/'</span>
|
||||
|
||||
<span class="c1"># Reconstruct the url with the modified path</span>
|
||||
<span class="n">parsed</span> <span class="o">=</span> <span class="p">(</span><span class="n">v</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">parsed</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
|
||||
@@ -468,7 +440,7 @@
|
||||
<span class="sd"> :param default_encoding: Which encoding to default to.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">DEFAULT_URL</span><span class="p">,</span> <span class="n">html</span><span class="p">:</span> <span class="n">_HTML</span><span class="p">,</span> <span class="n">default_encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">DEFAULT_ENCODING</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">session</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s1">'HTTPSession'</span><span class="p">,</span> <span class="s1">'AsyncHTMLSession'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">DEFAULT_URL</span><span class="p">,</span> <span class="n">html</span><span class="p">:</span> <span class="n">_HTML</span><span class="p">,</span> <span class="n">default_encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">DEFAULT_ENCODING</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
|
||||
<span class="c1"># Convert incoming unicode HTML into bytes.</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||||
@@ -481,11 +453,54 @@
|
||||
<span class="n">url</span><span class="o">=</span><span class="n">url</span><span class="p">,</span>
|
||||
<span class="n">default_encoding</span><span class="o">=</span><span class="n">default_encoding</span>
|
||||
<span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">session</span> <span class="o">=</span> <span class="n">session</span> <span class="ow">or</span> <span class="n">HTMLSession</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">page</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">next_symbol</span> <span class="o">=</span> <span class="n">DEFAULT_NEXT_SYMBOL</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">f</span><span class="s2">"<HTML url=</span><span class="si">{self.url!r}</span><span class="s2">>"</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_next</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fetch</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">next_symbol</span><span class="p">:</span> <span class="n">_NextSymbol</span> <span class="o">=</span> <span class="n">DEFAULT_NEXT_SYMBOL</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Next</span><span class="p">:</span>
|
||||
<span class="sd">"""Attempts to find the next page, if there is one. If ``fetch``</span>
|
||||
<span class="sd"> is ``True`` (default), returns :class:`HTML <HTML>` object of</span>
|
||||
<span class="sd"> next page. If ``fetch`` is ``False``, simply returns the next URL.</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_next</span><span class="p">():</span>
|
||||
<span class="n">candidates</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'a'</span><span class="p">,</span> <span class="n">containing</span><span class="o">=</span><span class="n">next_symbol</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">candidate</span> <span class="ow">in</span> <span class="n">candidates</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">):</span>
|
||||
<span class="c1"># Support 'next' rel (e.g. reddit).</span>
|
||||
<span class="k">if</span> <span class="s1">'next'</span> <span class="ow">in</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'rel'</span><span class="p">,</span> <span class="p">[]):</span>
|
||||
<span class="k">return</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||||
|
||||
<span class="c1"># Support 'next' in classnames.</span>
|
||||
<span class="k">for</span> <span class="n">_class</span> <span class="ow">in</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'class'</span><span class="p">,</span> <span class="p">[]):</span>
|
||||
<span class="k">if</span> <span class="s1">'next'</span> <span class="ow">in</span> <span class="n">_class</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">'page'</span> <span class="ow">in</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]:</span>
|
||||
<span class="k">return</span> <span class="n">candidate</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="c1"># Resort to the last candidate.</span>
|
||||
<span class="k">return</span> <span class="n">candidates</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">'href'</span><span class="p">]</span>
|
||||
<span class="k">except</span> <span class="ne">IndexError</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
<span class="n">__next</span> <span class="o">=</span> <span class="n">get_next</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">__next</span><span class="p">:</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_make_absolute</span><span class="p">(</span><span class="n">__next</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">fetch</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">url</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
|
||||
<span class="nb">next</span> <span class="o">=</span> <span class="bp">self</span>
|
||||
@@ -493,14 +508,17 @@
|
||||
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
|
||||
<span class="k">yield</span> <span class="nb">next</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="nb">next</span> <span class="o">=</span> <span class="nb">next</span><span class="o">.</span><span class="n">next</span><span class="p">(</span><span class="n">fetch</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">html</span>
|
||||
<span class="nb">next</span> <span class="o">=</span> <span class="nb">next</span><span class="o">.</span><span class="n">_next</span><span class="p">(</span><span class="n">fetch</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">next_symbol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">next_symbol</span><span class="p">)</span><span class="o">.</span><span class="n">html</span>
|
||||
<span class="k">except</span> <span class="ne">AttributeError</span><span class="p">:</span>
|
||||
<span class="k">break</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__next__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">next</span><span class="p">(</span><span class="n">fetch</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">html</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_next</span><span class="p">(</span><span class="n">fetch</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">next_symbol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">next_symbol</span><span class="p">)</span><span class="o">.</span><span class="n">html</span>
|
||||
|
||||
<div class="viewcode-block" id="HTML.render"><a class="viewcode-back" href="../index.html#requests_html.HTML.render">[docs]</a> <span class="k">def</span> <span class="nf">render</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">retries</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span> <span class="n">script</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">wait</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.2</span><span class="p">,</span> <span class="n">scrolldown</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">sleep</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">reload</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="mf">8.0</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">add_next_symbol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">next_symbol</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">next_symbol</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">next_symbol</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="HTML.render"><a class="viewcode-back" href="../index.html#requests_html.HTML.render">[docs]</a> <span class="k">def</span> <span class="nf">render</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">retries</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span> <span class="n">script</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">wait</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.2</span><span class="p">,</span> <span class="n">scrolldown</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">sleep</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">reload</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="mf">8.0</span><span class="p">,</span> <span class="n">keep_page</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
|
||||
<span class="sd">"""Reloads the response in Chromium, and replaces HTML content</span>
|
||||
<span class="sd"> with an updated version, with JavaScript executed.</span>
|
||||
|
||||
@@ -510,6 +528,7 @@
|
||||
<span class="sd"> :param scrolldown: Integer, if provided, of how many times to page down.</span>
|
||||
<span class="sd"> :param sleep: Integer, if provided, of how many long to sleep after initial render.</span>
|
||||
<span class="sd"> :param reload: If ``False``, content will not be loaded from the browser, but will be provided from memory.</span>
|
||||
<span class="sd"> :param keep_page: If ``True`` will allow you to interact with the browser page through ``r.html.page``.</span>
|
||||
|
||||
<span class="sd"> If ``scrolldown`` is specified, the page will scrolldown the specified</span>
|
||||
<span class="sd"> number of times, after sleeping the specified amount of time</span>
|
||||
@@ -540,13 +559,15 @@
|
||||
<span class="sd"> >>> r.html.render(script=script)</span>
|
||||
<span class="sd"> {'width': 800, 'height': 600, 'deviceScaleFactor': 1}</span>
|
||||
|
||||
<span class="sd"> Warning: If you use keep_page, you're responsable for closing each page, since</span>
|
||||
<span class="sd"> opening to many at scale may crach the browser.</span>
|
||||
|
||||
<span class="sd"> Warning: the first time you run this method, it will download</span>
|
||||
<span class="sd"> Chromium into your home directory (``~/.pyppeteer``).</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">async</span> <span class="k">def</span> <span class="nf">_async_render</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">script</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">scrolldown</span><span class="p">,</span> <span class="n">sleep</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">wait</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">reload</span><span class="p">,</span> <span class="n">content</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">timeout</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">]):</span>
|
||||
<span class="k">async</span> <span class="k">def</span> <span class="nf">_async_render</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">script</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">scrolldown</span><span class="p">,</span> <span class="n">sleep</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">wait</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">reload</span><span class="p">,</span> <span class="n">content</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">timeout</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">keep_page</span><span class="p">:</span> <span class="nb">bool</span><span class="p">):</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">browser</span> <span class="o">=</span> <span class="n">pyppeteer</span><span class="o">.</span><span class="n">launch</span><span class="p">(</span><span class="n">headless</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">[</span><span class="s1">'--no-sandbox'</span><span class="p">])</span>
|
||||
<span class="n">page</span> <span class="o">=</span> <span class="k">await</span> <span class="n">browser</span><span class="o">.</span><span class="n">newPage</span><span class="p">()</span>
|
||||
<span class="n">page</span> <span class="o">=</span> <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="o">.</span><span class="n">browser</span><span class="o">.</span><span class="n">newPage</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># Wait before rendering the page, to prevent timeouts.</span>
|
||||
<span class="k">await</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">wait</span><span class="p">)</span>
|
||||
@@ -573,11 +594,14 @@
|
||||
|
||||
<span class="c1"># Return the content of the page, JavaScript evaluated.</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="k">await</span> <span class="n">page</span><span class="o">.</span><span class="n">content</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">keep_page</span><span class="p">:</span>
|
||||
<span class="k">await</span> <span class="n">page</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="n">page</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">return</span> <span class="n">content</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">page</span>
|
||||
<span class="k">except</span> <span class="ne">TimeoutError</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
<span class="n">loop</span> <span class="o">=</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">get_event_loop</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="o">.</span><span class="n">browser</span> <span class="c1"># Automatycally create a event loop and browser</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="c1"># Automatically set Reload to False, if example URL is being used.</span>
|
||||
@@ -588,9 +612,14 @@
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">content</span><span class="p">:</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
|
||||
<span class="n">content</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">page</span> <span class="o">=</span> <span class="n">loop</span><span class="o">.</span><span class="n">run_until_complete</span><span class="p">(</span><span class="n">_async_render</span><span class="p">(</span><span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">script</span><span class="o">=</span><span class="n">script</span><span class="p">,</span> <span class="n">sleep</span><span class="o">=</span><span class="n">sleep</span><span class="p">,</span> <span class="n">wait</span><span class="o">=</span><span class="n">wait</span><span class="p">,</span> <span class="n">content</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">html</span><span class="p">,</span> <span class="n">reload</span><span class="o">=</span><span class="n">reload</span><span class="p">,</span> <span class="n">scrolldown</span><span class="o">=</span><span class="n">scrolldown</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">))</span>
|
||||
<span class="k">except</span> <span class="ne">TimeoutError</span><span class="p">:</span>
|
||||
<span class="n">content</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">page</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="o">.</span><span class="n">loop</span><span class="o">.</span><span class="n">run_until_complete</span><span class="p">(</span><span class="n">_async_render</span><span class="p">(</span><span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">script</span><span class="o">=</span><span class="n">script</span><span class="p">,</span> <span class="n">sleep</span><span class="o">=</span><span class="n">sleep</span><span class="p">,</span> <span class="n">wait</span><span class="o">=</span><span class="n">wait</span><span class="p">,</span> <span class="n">content</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">html</span><span class="p">,</span> <span class="n">reload</span><span class="o">=</span><span class="n">reload</span><span class="p">,</span> <span class="n">scrolldown</span><span class="o">=</span><span class="n">scrolldown</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">,</span> <span class="n">keep_page</span><span class="o">=</span><span class="n">keep_page</span><span class="p">))</span>
|
||||
<span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
|
||||
<span class="k">pass</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">break</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">content</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="n">MaxRetries</span><span class="p">(</span><span class="s2">"Unable to render the page. Try increasing timeout"</span><span class="p">)</span>
|
||||
|
||||
<span class="n">html</span> <span class="o">=</span> <span class="n">HTML</span><span class="p">(</span><span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">html</span><span class="o">=</span><span class="n">content</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">DEFAULT_ENCODING</span><span class="p">),</span> <span class="n">default_encoding</span><span class="o">=</span><span class="n">DEFAULT_ENCODING</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">html</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span>
|
||||
@@ -603,20 +632,21 @@
|
||||
<span class="sd"> Effectively the same, but with an intelligent ``.html`` property added.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s1">'HTMLSession'</span><span class="p">,</span> <span class="s1">'AsyncHTMLSession'</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">HTMLResponse</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_html</span> <span class="o">=</span> <span class="kc">None</span> <span class="c1"># type: HTML</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">session</span> <span class="o">=</span> <span class="n">session</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">html</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">HTML</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_html</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_html</span> <span class="o">=</span> <span class="n">HTML</span><span class="p">(</span><span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">html</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">content</span><span class="p">,</span> <span class="n">default_encoding</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">encoding</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_html</span> <span class="o">=</span> <span class="n">HTML</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">session</span><span class="p">,</span> <span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">html</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">content</span><span class="p">,</span> <span class="n">default_encoding</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">encoding</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_html</span>
|
||||
|
||||
<span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">_from_response</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">response</span><span class="p">):</span>
|
||||
<span class="n">html_r</span> <span class="o">=</span> <span class="bp">cls</span><span class="p">()</span>
|
||||
<span class="k">def</span> <span class="nf">_from_response</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">response</span><span class="p">,</span> <span class="n">session</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s1">'HTMLSession'</span><span class="p">,</span> <span class="s1">'AsyncHTMLSession'</span><span class="p">]):</span>
|
||||
<span class="n">html_r</span> <span class="o">=</span> <span class="bp">cls</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">)</span>
|
||||
<span class="n">html_r</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">html_r</span>
|
||||
|
||||
@@ -672,8 +702,21 @@
|
||||
<span class="sd"> """</span>
|
||||
<span class="c1"># Convert Request object into HTTPRequest object.</span>
|
||||
<span class="n">r</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="n">HTMLSession</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">request</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
</div></div>
|
||||
<span class="k">return</span> <span class="n">HTMLResponse</span><span class="o">.</span><span class="n">_from_response</span><span class="p">(</span><span class="n">r</span><span class="p">)</span>
|
||||
</div>
|
||||
<span class="k">return</span> <span class="n">HTMLResponse</span><span class="o">.</span><span class="n">_from_response</span><span class="p">(</span><span class="n">r</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">browser</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s2">"_browser"</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">loop</span> <span class="o">=</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">get_event_loop</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_browser</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loop</span><span class="o">.</span><span class="n">run_until_complete</span><span class="p">(</span><span class="n">pyppeteer</span><span class="o">.</span><span class="n">launch</span><span class="p">(</span><span class="n">headless</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">[</span><span class="s1">'--no-sandbox'</span><span class="p">]))</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_browser</span>
|
||||
|
||||
<div class="viewcode-block" id="HTMLSession.close"><a class="viewcode-back" href="../index.html#requests_html.HTMLSession.close">[docs]</a> <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="sd">""" If a browser was created close it first. """</span>
|
||||
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s2">"_browser"</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">loop</span><span class="o">.</span><span class="n">run_until_complete</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_browser</span><span class="o">.</span><span class="n">close</span><span class="p">())</span></div></div>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">AsyncHTMLSession</span><span class="p">(</span><span class="n">requests</span><span class="o">.</span><span class="n">Session</span><span class="p">):</span>
|
||||
@@ -693,16 +736,15 @@
|
||||
<span class="k">if</span> <span class="n">mock_browser</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">headers</span><span class="p">[</span><span class="s1">'User-Agent'</span><span class="p">]</span> <span class="o">=</span> <span class="n">user_agent</span><span class="p">()</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">hooks</span><span class="p">[</span><span class="s2">"response"</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">response_hook</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">hooks</span><span class="p">[</span><span class="s1">'response'</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">response_hook</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">loop</span> <span class="o">=</span> <span class="n">loop</span> <span class="ow">or</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">get_event_loop</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">thread_pool</span> <span class="o">=</span> <span class="n">ThreadPoolExecutor</span><span class="p">(</span><span class="n">max_workers</span><span class="o">=</span><span class="n">workers</span><span class="p">)</span>
|
||||
|
||||
<span class="nd">@staticmethod</span>
|
||||
<span class="k">def</span> <span class="nf">response_hook</span><span class="p">(</span><span class="n">response</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-></span> <span class="n">HTMLResponse</span><span class="p">:</span>
|
||||
<span class="k">def</span> <span class="nf">response_hook</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">response</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-></span> <span class="n">HTMLResponse</span><span class="p">:</span>
|
||||
<span class="sd">""" Change response enconding and replace it by a HTMLResponse. """</span>
|
||||
<span class="n">response</span><span class="o">.</span><span class="n">encoding</span> <span class="o">=</span> <span class="n">DEFAULT_ENCODING</span>
|
||||
<span class="k">return</span> <span class="n">HTMLResponse</span><span class="o">.</span><span class="n">_from_response</span><span class="p">(</span><span class="n">response</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">HTMLResponse</span><span class="o">.</span><span class="n">_from_response</span><span class="p">(</span><span class="n">response</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="sd">""" Partial original request func and run it in a thread. """</span>
|
||||
|
||||
@@ -44,7 +44,6 @@
|
||||
| <a href="#H"><strong>H</strong></a>
|
||||
| <a href="#L"><strong>L</strong></a>
|
||||
| <a href="#M"><strong>M</strong></a>
|
||||
| <a href="#N"><strong>N</strong></a>
|
||||
| <a href="#O"><strong>O</strong></a>
|
||||
| <a href="#P"><strong>P</strong></a>
|
||||
| <a href="#R"><strong>R</strong></a>
|
||||
@@ -200,18 +199,6 @@
|
||||
</ul></td>
|
||||
</tr></table>
|
||||
|
||||
<h2 id="N">N</h2>
|
||||
<table style="width: 100%" class="indextable genindextable"><tr>
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
<li><a href="index.html#requests_html.Element.next">next() (requests_html.Element method)</a>
|
||||
|
||||
<ul>
|
||||
<li><a href="index.html#requests_html.HTML.next">(requests_html.HTML method)</a>
|
||||
</li>
|
||||
</ul></li>
|
||||
</ul></td>
|
||||
</tr></table>
|
||||
|
||||
<h2 id="O">O</h2>
|
||||
<table style="width: 100%" class="indextable genindextable"><tr>
|
||||
<td style="width: 33%; vertical-align: top;"><ul>
|
||||
|
||||
+7
-20
@@ -210,7 +210,7 @@ once.</p>
|
||||
<p>These classes are the main interface to <code class="docutils literal notranslate"><span class="pre">requests-html</span></code>:</p>
|
||||
<dl class="class">
|
||||
<dt id="requests_html.HTML">
|
||||
<em class="property">class </em><code class="descclassname">requests_html.</code><code class="descname">HTML</code><span class="sig-paren">(</span><em>*, url: str = 'https://example.org/', html: Union[str, bytes], default_encoding: str = 'utf-8'</em><span class="sig-paren">)</span> → None<a class="reference internal" href="_modules/requests_html.html#HTML"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#requests_html.HTML" title="Permalink to this definition">¶</a></dt>
|
||||
<em class="property">class </em><code class="descclassname">requests_html.</code><code class="descname">HTML</code><span class="sig-paren">(</span><em>*, session: Union[_ForwardRef('HTTPSession'), _ForwardRef('AsyncHTMLSession')] = None, url: str = 'https://example.org/', html: Union[str, bytes], default_encoding: str = 'utf-8'</em><span class="sig-paren">)</span> → None<a class="reference internal" href="_modules/requests_html.html#HTML"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#requests_html.HTML" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>An HTML document, ready for parsing.</p>
|
||||
<table class="docutils field-list" frame="void" rules="none">
|
||||
<col class="field-name" />
|
||||
@@ -306,14 +306,6 @@ for more details.</p>
|
||||
<a class="reference internal" href="#requests_html.Element" title="requests_html.Element"><code class="xref py py-class docutils literal notranslate"><span class="pre">Element</span></code></a> or <a class="reference internal" href="#requests_html.HTML" title="requests_html.HTML"><code class="xref py py-class docutils literal notranslate"><span class="pre">HTML</span></code></a>.</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="requests_html.HTML.next">
|
||||
<code class="descname">next</code><span class="sig-paren">(</span><em>fetch: bool = False</em><span class="sig-paren">)</span> → Union[_ForwardRef('HTML'), typing.List[str]]<a class="headerlink" href="#requests_html.HTML.next" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Attempts to find the next page, if there is one. If <code class="docutils literal notranslate"><span class="pre">fetch</span></code>
|
||||
is <code class="docutils literal notranslate"><span class="pre">True</span></code> (default), returns <a class="reference internal" href="#requests_html.HTML" title="requests_html.HTML"><code class="xref py py-class docutils literal notranslate"><span class="pre">HTML</span></code></a> object of
|
||||
next page. If <code class="docutils literal notranslate"><span class="pre">fetch</span></code> is <code class="docutils literal notranslate"><span class="pre">False</span></code>, simply returns the next URL.</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="attribute">
|
||||
<dt id="requests_html.HTML.pq">
|
||||
<code class="descname">pq</code><a class="headerlink" href="#requests_html.HTML.pq" title="Permalink to this definition">¶</a></dt>
|
||||
@@ -330,7 +322,7 @@ of the <a class="reference internal" href="#requests_html.Element" title="reques
|
||||
|
||||
<dl class="method">
|
||||
<dt id="requests_html.HTML.render">
|
||||
<code class="descname">render</code><span class="sig-paren">(</span><em>retries: int = 8</em>, <em>script: str = None</em>, <em>wait: float = 0.2</em>, <em>scrolldown=False</em>, <em>sleep: int = 0</em>, <em>reload: bool = True</em>, <em>timeout: Union[float</em>, <em>int] = 8.0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/requests_html.html#HTML.render"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#requests_html.HTML.render" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="descname">render</code><span class="sig-paren">(</span><em>retries: int = 8</em>, <em>script: str = None</em>, <em>wait: float = 0.2</em>, <em>scrolldown=False</em>, <em>sleep: int = 0</em>, <em>reload: bool = True</em>, <em>timeout: Union[float</em>, <em>int] = 8.0</em>, <em>keep_page: bool = False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/requests_html.html#HTML.render"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#requests_html.HTML.render" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Reloads the response in Chromium, and replaces HTML content
|
||||
with an updated version, with JavaScript executed.</p>
|
||||
<table class="docutils field-list" frame="void" rules="none">
|
||||
@@ -344,6 +336,7 @@ with an updated version, with JavaScript executed.</p>
|
||||
<li><strong>scrolldown</strong> – Integer, if provided, of how many times to page down.</li>
|
||||
<li><strong>sleep</strong> – Integer, if provided, of how many long to sleep after initial render.</li>
|
||||
<li><strong>reload</strong> – If <code class="docutils literal notranslate"><span class="pre">False</span></code>, content will not be loaded from the browser, but will be provided from memory.</li>
|
||||
<li><strong>keep_page</strong> – If <code class="docutils literal notranslate"><span class="pre">True</span></code> will allow you to interact with the browser page through <code class="docutils literal notranslate"><span class="pre">r.html.page</span></code>.</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -372,6 +365,8 @@ runtime. Example:</p>
|
||||
<span class="go">{'width': 800, 'height': 600, 'deviceScaleFactor': 1}</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Warning: If you use keep_page, you’re responsable for closing each page, since
|
||||
opening to many at scale may crach the browser.</p>
|
||||
<p>Warning: the first time you run this method, it will download
|
||||
Chromium into your home directory (<code class="docutils literal notranslate"><span class="pre">~/.pyppeteer</span></code>).</p>
|
||||
</dd></dl>
|
||||
@@ -546,14 +541,6 @@ for more details.</p>
|
||||
<a class="reference internal" href="#requests_html.Element" title="requests_html.Element"><code class="xref py py-class docutils literal notranslate"><span class="pre">Element</span></code></a> or <a class="reference internal" href="#requests_html.HTML" title="requests_html.HTML"><code class="xref py py-class docutils literal notranslate"><span class="pre">HTML</span></code></a>.</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="requests_html.Element.next">
|
||||
<code class="descname">next</code><span class="sig-paren">(</span><em>fetch: bool = False</em><span class="sig-paren">)</span> → Union[_ForwardRef('HTML'), typing.List[str]]<a class="headerlink" href="#requests_html.Element.next" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Attempts to find the next page, if there is one. If <code class="docutils literal notranslate"><span class="pre">fetch</span></code>
|
||||
is <code class="docutils literal notranslate"><span class="pre">True</span></code> (default), returns <a class="reference internal" href="#requests_html.HTML" title="requests_html.HTML"><code class="xref py py-class docutils literal notranslate"><span class="pre">HTML</span></code></a> object of
|
||||
next page. If <code class="docutils literal notranslate"><span class="pre">fetch</span></code> is <code class="docutils literal notranslate"><span class="pre">False</span></code>, simply returns the next URL.</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="attribute">
|
||||
<dt id="requests_html.Element.pq">
|
||||
<code class="descname">pq</code><a class="headerlink" href="#requests_html.Element.pq" title="Permalink to this definition">¶</a></dt>
|
||||
@@ -654,8 +641,8 @@ style. Defaults to a Chrome-style User-Agent.</p>
|
||||
amongst other things.</p>
|
||||
<dl class="method">
|
||||
<dt id="requests_html.HTMLSession.close">
|
||||
<code class="descname">close</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#requests_html.HTMLSession.close" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Closes all adapters and as such the session</p>
|
||||
<code class="descname">close</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/requests_html.html#HTMLSession.close"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#requests_html.HTMLSession.close" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>If a browser was created close it first.</p>
|
||||
</dd></dl>
|
||||
|
||||
<dl class="method">
|
||||
|
||||
BIN
Binary file not shown.
+1
-1
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user