mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
updates
This commit is contained in:
+107
-23
@@ -32,8 +32,10 @@
|
||||
<h1>Source code for requests_html</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">sys</span>
|
||||
<span class="kn">import</span> <span class="nn">asyncio</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="k">import</span> <span class="n">urlparse</span><span class="p">,</span> <span class="n">urlunparse</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="k">import</span> <span class="n">urlparse</span><span class="p">,</span> <span class="n">urlunparse</span><span class="p">,</span> <span class="n">urljoin</span>
|
||||
<span class="kn">from</span> <span class="nn">concurrent.futures</span> <span class="k">import</span> <span class="n">ThreadPoolExecutor</span>
|
||||
<span class="kn">from</span> <span class="nn">concurrent.futures._base</span> <span class="k">import</span> <span class="ne">TimeoutError</span>
|
||||
<span class="kn">from</span> <span class="nn">functools</span> <span class="k">import</span> <span class="n">partial</span>
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="k">import</span> <span class="n">Set</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">MutableMapping</span><span class="p">,</span> <span class="n">Optional</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">pyppeteer</span>
|
||||
@@ -41,9 +43,11 @@
|
||||
<span class="kn">from</span> <span class="nn">pyquery</span> <span class="k">import</span> <span class="n">PyQuery</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">fake_useragent</span> <span class="k">import</span> <span class="n">UserAgent</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml.html.clean</span> <span class="k">import</span> <span class="n">Cleaner</span>
|
||||
<span class="kn">import</span> <span class="nn">lxml</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="k">import</span> <span class="n">etree</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml.html</span> <span class="k">import</span> <span class="n">HtmlElement</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml.html</span> <span class="k">import</span> <span class="n">tostring</span> <span class="k">as</span> <span class="n">lxml_html_tostring</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml.html.soupparser</span> <span class="k">import</span> <span class="n">fromstring</span> <span class="k">as</span> <span class="n">soup_parse</span>
|
||||
<span class="kn">from</span> <span class="nn">parse</span> <span class="k">import</span> <span class="n">search</span> <span class="k">as</span> <span class="n">parse_search</span>
|
||||
<span class="kn">from</span> <span class="nn">parse</span> <span class="k">import</span> <span class="n">findall</span><span class="p">,</span> <span class="n">Result</span>
|
||||
@@ -53,6 +57,10 @@
|
||||
<span class="n">DEFAULT_URL</span> <span class="o">=</span> <span class="s1">'https://example.org/'</span>
|
||||
<span class="n">DEFAULT_USER_AGENT</span> <span class="o">=</span> <span class="s1">'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8'</span>
|
||||
|
||||
<span class="n">cleaner</span> <span class="o">=</span> <span class="n">Cleaner</span><span class="p">()</span>
|
||||
<span class="n">cleaner</span><span class="o">.</span><span class="n">javascript</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">cleaner</span><span class="o">.</span><span class="n">style</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
<span class="n">useragent</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="c1"># Typing.</span>
|
||||
@@ -124,7 +132,11 @@
|
||||
<span class="k">return</span> <span class="n">etree</span><span class="o">.</span><span class="n">tostring</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">element</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'unicode'</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
|
||||
<span class="nd">@html</span><span class="o">.</span><span class="n">setter</span>
|
||||
<span class="k">def</span> <span class="nf">html</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">html</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">def</span> <span class="nf">html</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">html</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_html</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">encoding</span><span class="p">)</span>
|
||||
|
||||
<span class="nd">@raw_html</span><span class="o">.</span><span class="n">setter</span>
|
||||
<span class="k">def</span> <span class="nf">raw_html</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">html</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="sd">"""Property setter for self.html."""</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_html</span> <span class="o">=</span> <span class="n">html</span>
|
||||
|
||||
@@ -225,11 +237,12 @@
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">url</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">find</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">selector</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"*"</span><span class="p">,</span> <span class="n">containing</span><span class="p">:</span> <span class="n">_Containing</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">first</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">_encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Find</span><span class="p">:</span>
|
||||
<span class="k">def</span> <span class="nf">find</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">selector</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"*"</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">containing</span><span class="p">:</span> <span class="n">_Containing</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">clean</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">first</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">_encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">_Find</span><span class="p">:</span>
|
||||
<span class="sd">"""Given a CSS Selector, returns a list of</span>
|
||||
<span class="sd"> :class:`Element <Element>` objects or a single one.</span>
|
||||
|
||||
<span class="sd"> :param selector: CSS Selector to use.</span>
|
||||
<span class="sd"> :param clean: Whether or not to sanitize the found HTML of ``<script>`` and ``<style>`` tags.</span>
|
||||
<span class="sd"> :param containing: If specified, only return elements that contain the provided text.</span>
|
||||
<span class="sd"> :param first: Whether or not to return just the first result.</span>
|
||||
<span class="sd"> :param _encoding: The encoding format.</span>
|
||||
@@ -269,13 +282,23 @@
|
||||
|
||||
<span class="n">elements</span><span class="o">.</span><span class="n">reverse</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># Sanitize the found HTML.</span>
|
||||
<span class="k">if</span> <span class="n">clean</span><span class="p">:</span>
|
||||
<span class="n">elements_copy</span> <span class="o">=</span> <span class="n">elements</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
||||
<span class="n">elements</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">elements_copy</span><span class="p">:</span>
|
||||
<span class="n">element</span><span class="o">.</span><span class="n">raw_html</span> <span class="o">=</span> <span class="n">lxml_html_tostring</span><span class="p">(</span><span class="n">cleaner</span><span class="o">.</span><span class="n">clean_html</span><span class="p">(</span><span class="n">element</span><span class="o">.</span><span class="n">lxml</span><span class="p">))</span>
|
||||
<span class="n">elements</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">element</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">_get_first_or_list</span><span class="p">(</span><span class="n">elements</span><span class="p">,</span> <span class="n">first</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">xpath</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">selector</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">first</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">_encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">_XPath</span><span class="p">:</span>
|
||||
<span class="k">def</span> <span class="nf">xpath</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">selector</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">clean</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">first</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">_encoding</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">_XPath</span><span class="p">:</span>
|
||||
<span class="sd">"""Given an XPath selector, returns a list of</span>
|
||||
<span class="sd"> :class:`Element <Element>` objects or a single one.</span>
|
||||
|
||||
<span class="sd"> :param selector: XPath Selector to use.</span>
|
||||
<span class="sd"> :param clean: Whether or not to sanitize the found HTML of ``<script>`` and ``<style>`` tags.</span>
|
||||
<span class="sd"> :param first: Whether or not to return just the first result.</span>
|
||||
<span class="sd"> :param _encoding: The encoding format.</span>
|
||||
|
||||
@@ -297,6 +320,15 @@
|
||||
<span class="k">for</span> <span class="n">selection</span> <span class="ow">in</span> <span class="n">selected</span>
|
||||
<span class="p">]</span>
|
||||
|
||||
<span class="c1"># Sanitize the found HTML.</span>
|
||||
<span class="k">if</span> <span class="n">clean</span><span class="p">:</span>
|
||||
<span class="n">elements_copy</span> <span class="o">=</span> <span class="n">elements</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
||||
<span class="n">elements</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">elements_copy</span><span class="p">:</span>
|
||||
<span class="n">element</span><span class="o">.</span><span class="n">raw_html</span> <span class="o">=</span> <span class="n">lxml_html_tostring</span><span class="p">(</span><span class="n">cleaner</span><span class="o">.</span><span class="n">clean_html</span><span class="p">(</span><span class="n">element</span><span class="o">.</span><span class="n">lxml</span><span class="p">))</span>
|
||||
<span class="n">elements</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">element</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">_get_first_or_list</span><span class="p">(</span><span class="n">elements</span><span class="p">,</span> <span class="n">first</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">template</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">Result</span><span class="p">:</span>
|
||||
@@ -337,15 +369,20 @@
|
||||
<span class="c1"># Parse the link with stdlib.</span>
|
||||
<span class="n">parsed</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">link</span><span class="p">)</span><span class="o">.</span><span class="n">_asdict</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># Appears to be a relative link:</span>
|
||||
<span class="c1"># If link is relative, then join it with base_url.</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">parsed</span><span class="p">[</span><span class="s1">'netloc'</span><span class="p">]:</span>
|
||||
<span class="n">parsed</span><span class="p">[</span><span class="s1">'netloc'</span><span class="p">]</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_url</span><span class="p">)</span><span class="o">.</span><span class="n">netloc</span>
|
||||
<span class="k">return</span> <span class="n">urljoin</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_url</span><span class="p">,</span> <span class="n">link</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Link is absolute; if it lacks a scheme, add one from base_url.</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">parsed</span><span class="p">[</span><span class="s1">'scheme'</span><span class="p">]:</span>
|
||||
<span class="n">parsed</span><span class="p">[</span><span class="s1">'scheme'</span><span class="p">]</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_url</span><span class="p">)</span><span class="o">.</span><span class="n">scheme</span>
|
||||
|
||||
<span class="c1"># Re-construct URL, with new data.</span>
|
||||
<span class="n">parsed</span> <span class="o">=</span> <span class="p">(</span><span class="n">v</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">parsed</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
|
||||
<span class="k">return</span> <span class="n">urlunparse</span><span class="p">(</span><span class="n">parsed</span><span class="p">)</span>
|
||||
<span class="c1"># Reconstruct the URL to incorporate the new scheme.</span>
|
||||
<span class="n">parsed</span> <span class="o">=</span> <span class="p">(</span><span class="n">v</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">parsed</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
|
||||
<span class="k">return</span> <span class="n">urlunparse</span><span class="p">(</span><span class="n">parsed</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Link is absolute and complete with scheme; nothing to be done here.</span>
|
||||
<span class="k">return</span> <span class="n">link</span>
|
||||
|
||||
|
||||
<span class="nd">@property</span>
|
||||
@@ -372,9 +409,15 @@
|
||||
<span class="k">if</span> <span class="n">result</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">result</span>
|
||||
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="s1">'/'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="n">url</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">'/'</span><span class="p">):</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">url</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="c1"># Parse the url to separate out the path</span>
|
||||
<span class="n">parsed</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">)</span><span class="o">.</span><span class="n">_asdict</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># Remove any part of the path after the last '/'</span>
|
||||
<span class="n">path</span> <span class="o">=</span> <span class="s1">'/'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">parsed</span><span class="p">[</span><span class="s1">'path'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
|
||||
|
||||
<span class="c1"># Reconstruct the url with the modified path</span>
|
||||
<span class="n">parsed</span> <span class="o">=</span> <span class="p">(</span><span class="n">v</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">parsed</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">urlunparse</span><span class="p">(</span><span class="n">parsed</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">url</span>
|
||||
|
||||
@@ -389,12 +432,13 @@
|
||||
|
||||
<span class="vm">__slots__</span> <span class="o">=</span> <span class="p">[</span>
|
||||
<span class="s1">'element'</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">,</span> <span class="s1">'skip_anchors'</span><span class="p">,</span> <span class="s1">'default_encoding'</span><span class="p">,</span> <span class="s1">'_encoding'</span><span class="p">,</span>
|
||||
<span class="s1">'_encoding'</span><span class="p">,</span> <span class="s1">'_html'</span><span class="p">,</span> <span class="s1">'_lxml'</span><span class="p">,</span> <span class="s1">'_pq'</span><span class="p">,</span> <span class="s1">'session'</span>
|
||||
<span class="s1">'_html'</span><span class="p">,</span> <span class="s1">'_lxml'</span><span class="p">,</span> <span class="s1">'_pq'</span><span class="p">,</span> <span class="s1">'_attrs'</span><span class="p">,</span> <span class="s1">'session'</span>
|
||||
<span class="p">]</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">element</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="n">_URL</span><span class="p">,</span> <span class="n">default_encoding</span><span class="p">:</span> <span class="n">_DefaultEncoding</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">Element</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">element</span><span class="o">=</span><span class="n">element</span><span class="p">,</span> <span class="n">url</span><span class="o">=</span><span class="n">url</span><span class="p">,</span> <span class="n">default_encoding</span><span class="o">=</span><span class="n">default_encoding</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">element</span> <span class="o">=</span> <span class="n">element</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||||
<span class="n">attrs</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'</span><span class="si">{}</span><span class="s1">=</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">attr</span><span class="p">,</span> <span class="nb">repr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="n">attr</span><span class="p">]))</span> <span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">attrs</span><span class="p">]</span>
|
||||
@@ -405,14 +449,15 @@
|
||||
<span class="sd">"""Returns a dictionary of the attributes of the :class:`Element <Element>`</span>
|
||||
<span class="sd"> (`learn more <https://www.w3schools.com/tags/ref_attributes.asp>`_).</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">attrs</span> <span class="o">=</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">element</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span> <span class="o">=</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">element</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
|
||||
|
||||
<span class="c1"># Split class and rel up, as there are ussually many of them:</span>
|
||||
<span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'class'</span><span class="p">,</span> <span class="s1">'rel'</span><span class="p">]:</span>
|
||||
<span class="k">if</span> <span class="n">attr</span> <span class="ow">in</span> <span class="n">attrs</span><span class="p">:</span>
|
||||
<span class="n">attrs</span><span class="p">[</span><span class="n">attr</span><span class="p">]</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">attrs</span><span class="p">[</span><span class="n">attr</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">())</span>
|
||||
<span class="c1"># Split class and rel up, as there are ussually many of them:</span>
|
||||
<span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'class'</span><span class="p">,</span> <span class="s1">'rel'</span><span class="p">]:</span>
|
||||
<span class="k">if</span> <span class="n">attr</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span><span class="p">[</span><span class="n">attr</span><span class="p">]</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span><span class="p">[</span><span class="n">attr</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">())</span>
|
||||
</div>
|
||||
<span class="k">return</span> <span class="n">attrs</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_attrs</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="HTML"><a class="viewcode-back" href="../index.html#requests_html.HTML">[docs]</a><span class="k">class</span> <span class="nc">HTML</span><span class="p">(</span><span class="n">BaseParser</span><span class="p">):</span>
|
||||
@@ -436,6 +481,7 @@
|
||||
<span class="n">url</span><span class="o">=</span><span class="n">url</span><span class="p">,</span>
|
||||
<span class="n">default_encoding</span><span class="o">=</span><span class="n">default_encoding</span>
|
||||
<span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">page</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">f</span><span class="s2">"<HTML url=</span><span class="si">{self.url!r}</span><span class="s2">>"</span>
|
||||
@@ -451,6 +497,9 @@
|
||||
<span class="k">except</span> <span class="ne">AttributeError</span><span class="p">:</span>
|
||||
<span class="k">break</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__next__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">next</span><span class="p">(</span><span class="n">fetch</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">html</span>
|
||||
|
||||
<div class="viewcode-block" id="HTML.render"><a class="viewcode-back" href="../index.html#requests_html.HTML.render">[docs]</a> <span class="k">def</span> <span class="nf">render</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">retries</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span> <span class="n">script</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">wait</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.2</span><span class="p">,</span> <span class="n">scrolldown</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">sleep</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">reload</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="mf">8.0</span><span class="p">):</span>
|
||||
<span class="sd">"""Reloads the response in Chromium, and replaces HTML content</span>
|
||||
<span class="sd"> with an updated version, with JavaScript executed.</span>
|
||||
@@ -496,7 +545,7 @@
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">async</span> <span class="k">def</span> <span class="nf">_async_render</span><span class="p">(</span><span class="o">*</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">script</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">scrolldown</span><span class="p">,</span> <span class="n">sleep</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">wait</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">reload</span><span class="p">,</span> <span class="n">content</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">timeout</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">]):</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">browser</span> <span class="o">=</span> <span class="n">pyppeteer</span><span class="o">.</span><span class="n">launch</span><span class="p">(</span><span class="n">headless</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">browser</span> <span class="o">=</span> <span class="n">pyppeteer</span><span class="o">.</span><span class="n">launch</span><span class="p">(</span><span class="n">headless</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">[</span><span class="s1">'--no-sandbox'</span><span class="p">])</span>
|
||||
<span class="n">page</span> <span class="o">=</span> <span class="k">await</span> <span class="n">browser</span><span class="o">.</span><span class="n">newPage</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># Wait before rendering the page, to prevent timeouts.</span>
|
||||
@@ -524,7 +573,7 @@
|
||||
|
||||
<span class="c1"># Return the content of the page, JavaScript evaluated.</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="k">await</span> <span class="n">page</span><span class="o">.</span><span class="n">content</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">content</span><span class="p">,</span> <span class="n">result</span>
|
||||
<span class="k">return</span> <span class="n">content</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">page</span>
|
||||
<span class="k">except</span> <span class="ne">TimeoutError</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
@@ -539,12 +588,13 @@
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">content</span><span class="p">:</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
|
||||
<span class="n">content</span><span class="p">,</span> <span class="n">result</span> <span class="o">=</span> <span class="n">loop</span><span class="o">.</span><span class="n">run_until_complete</span><span class="p">(</span><span class="n">_async_render</span><span class="p">(</span><span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">script</span><span class="o">=</span><span class="n">script</span><span class="p">,</span> <span class="n">sleep</span><span class="o">=</span><span class="n">sleep</span><span class="p">,</span> <span class="n">wait</span><span class="o">=</span><span class="n">wait</span><span class="p">,</span> <span class="n">content</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">html</span><span class="p">,</span> <span class="n">reload</span><span class="o">=</span><span class="n">reload</span><span class="p">,</span> <span class="n">scrolldown</span><span class="o">=</span><span class="n">scrolldown</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">))</span>
|
||||
<span class="n">content</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">page</span> <span class="o">=</span> <span class="n">loop</span><span class="o">.</span><span class="n">run_until_complete</span><span class="p">(</span><span class="n">_async_render</span><span class="p">(</span><span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">script</span><span class="o">=</span><span class="n">script</span><span class="p">,</span> <span class="n">sleep</span><span class="o">=</span><span class="n">sleep</span><span class="p">,</span> <span class="n">wait</span><span class="o">=</span><span class="n">wait</span><span class="p">,</span> <span class="n">content</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">html</span><span class="p">,</span> <span class="n">reload</span><span class="o">=</span><span class="n">reload</span><span class="p">,</span> <span class="n">scrolldown</span><span class="o">=</span><span class="n">scrolldown</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">))</span>
|
||||
<span class="k">except</span> <span class="ne">TimeoutError</span><span class="p">:</span>
|
||||
<span class="k">pass</span>
|
||||
|
||||
<span class="n">html</span> <span class="o">=</span> <span class="n">HTML</span><span class="p">(</span><span class="n">url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">html</span><span class="o">=</span><span class="n">content</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">DEFAULT_ENCODING</span><span class="p">),</span> <span class="n">default_encoding</span><span class="o">=</span><span class="n">DEFAULT_ENCODING</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">html</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span></div></div>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">html</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">page</span> <span class="o">=</span> <span class="n">page</span></div></div>
|
||||
<span class="k">return</span> <span class="n">result</span>
|
||||
|
||||
|
||||
@@ -624,6 +674,40 @@
|
||||
<span class="n">r</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="n">HTMLSession</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">request</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
</div></div>
|
||||
<span class="k">return</span> <span class="n">HTMLResponse</span><span class="o">.</span><span class="n">_from_response</span><span class="p">(</span><span class="n">r</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">AsyncHTMLSession</span><span class="p">(</span><span class="n">requests</span><span class="o">.</span><span class="n">Session</span><span class="p">):</span>
|
||||
<span class="sd">""" An async consumable session. """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">loop</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">workers</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">mock_browser</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="sd">""" Set or create an event loop and a thread pool.</span>
|
||||
|
||||
<span class="sd"> :param loop: Asyncio lopp to use.</span>
|
||||
<span class="sd"> :param workers: Amount of threads to use for executing async calls.</span>
|
||||
<span class="sd"> If not pass it will default to the number of processors on the</span>
|
||||
<span class="sd"> machine, multiplied by 5. """</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Mock a web browser's user agent.</span>
|
||||
<span class="k">if</span> <span class="n">mock_browser</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">headers</span><span class="p">[</span><span class="s1">'User-Agent'</span><span class="p">]</span> <span class="o">=</span> <span class="n">user_agent</span><span class="p">()</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">hooks</span><span class="p">[</span><span class="s2">"response"</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">response_hook</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">loop</span> <span class="o">=</span> <span class="n">loop</span> <span class="ow">or</span> <span class="n">asyncio</span><span class="o">.</span><span class="n">get_event_loop</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">thread_pool</span> <span class="o">=</span> <span class="n">ThreadPoolExecutor</span><span class="p">(</span><span class="n">max_workers</span><span class="o">=</span><span class="n">workers</span><span class="p">)</span>
|
||||
|
||||
<span class="nd">@staticmethod</span>
|
||||
<span class="k">def</span> <span class="nf">response_hook</span><span class="p">(</span><span class="n">response</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-></span> <span class="n">HTMLResponse</span><span class="p">:</span>
|
||||
<span class="sd">""" Change response enconding and replace it by a HTMLResponse. """</span>
|
||||
<span class="n">response</span><span class="o">.</span><span class="n">encoding</span> <span class="o">=</span> <span class="n">DEFAULT_ENCODING</span>
|
||||
<span class="k">return</span> <span class="n">HTMLResponse</span><span class="o">.</span><span class="n">_from_response</span><span class="p">(</span><span class="n">response</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="sd">""" Partial original request func and run it in a thread. """</span>
|
||||
<span class="n">func</span> <span class="o">=</span> <span class="n">partial</span><span class="p">(</span><span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">request</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">loop</span><span class="o">.</span><span class="n">run_in_executor</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">thread_pool</span><span class="p">,</span> <span class="n">func</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -144,7 +144,7 @@ XPath is also supported (`learn more <https://msdn.microsoft.com/en-us/library/m
|
||||
>>> r.html.xpath('a')
|
||||
[<Element 'a' class='btn' href='https://help.github.com/articles/supported-browsers'>]
|
||||
|
||||
You can also select only elements containing certian text:
|
||||
You can also select only elements containing certain text:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
|
||||
+9
-5
@@ -126,7 +126,7 @@ simple and intuitive as possible.</p>
|
||||
<span class="go">[<Element 'a' class='btn' href='https://help.github.com/articles/supported-browsers'>]</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>You can also select only elements containing certian text:</p>
|
||||
<p>You can also select only elements containing certain text:</p>
|
||||
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">r</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'http://python-requests.org/'</span><span class="p">)</span>
|
||||
<span class="gp">>>> </span><span class="n">r</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'a'</span><span class="p">,</span> <span class="n">containing</span><span class="o">=</span><span class="s1">'kenneth'</span><span class="p">)</span>
|
||||
<span class="go">[<Element 'a' href='http://kennethreitz.com/pages/open-projects.html'>, <Element 'a' href='http://kennethreitz.org/'>, <Element 'a' href='https://twitter.com/kennethreitz' class=('twitter-follow-button',) data-show-count='false'>, <Element 'a' class=('reference', 'internal') href='dev/contributing/#kenneth-reitz-s-code-style'>]</span>
|
||||
@@ -248,7 +248,7 @@ once.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="requests_html.HTML.find">
|
||||
<code class="descname">find</code><span class="sig-paren">(</span><em>selector: str = '*'</em>, <em>containing: Union[str</em>, <em>typing.List[str]] = None</em>, <em>first: bool = False</em>, <em>_encoding: str = None</em><span class="sig-paren">)</span> → Union[typing.List[_ForwardRef('Element')], _ForwardRef('Element')]<a class="headerlink" href="#requests_html.HTML.find" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="descname">find</code><span class="sig-paren">(</span><em>selector: str = '*'</em>, <em>*</em>, <em>containing: Union[str</em>, <em>typing.List[str]] = None</em>, <em>clean: bool = False</em>, <em>first: bool = False</em>, <em>_encoding: str = None</em><span class="sig-paren">)</span> → Union[typing.List[_ForwardRef('Element')], _ForwardRef('Element')]<a class="headerlink" href="#requests_html.HTML.find" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a CSS Selector, returns a list of
|
||||
<a class="reference internal" href="#requests_html.Element" title="requests_html.Element"><code class="xref py py-class docutils literal notranslate"><span class="pre">Element</span></code></a> objects or a single one.</p>
|
||||
<table class="docutils field-list" frame="void" rules="none">
|
||||
@@ -257,6 +257,7 @@ once.</p>
|
||||
<tbody valign="top">
|
||||
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
|
||||
<li><strong>selector</strong> – CSS Selector to use.</li>
|
||||
<li><strong>clean</strong> – Whether or not to sanitize the found HTML of <code class="docutils literal notranslate"><span class="pre"><script></span></code> and <code class="docutils literal notranslate"><span class="pre"><style></span></code> tags.</li>
|
||||
<li><strong>containing</strong> – If specified, only return elements that contain the provided text.</li>
|
||||
<li><strong>first</strong> – Whether or not to return just the first result.</li>
|
||||
<li><strong>_encoding</strong> – The encoding format.</li>
|
||||
@@ -413,7 +414,7 @@ template.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="requests_html.HTML.xpath">
|
||||
<code class="descname">xpath</code><span class="sig-paren">(</span><em>selector: str</em>, <em>first: bool = False</em>, <em>_encoding: str = None</em><span class="sig-paren">)</span> → Union[typing.List[str], typing.List[_ForwardRef('Element')], str, _ForwardRef('Element')]<a class="headerlink" href="#requests_html.HTML.xpath" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="descname">xpath</code><span class="sig-paren">(</span><em>selector: str</em>, <em>*</em>, <em>clean: bool = False</em>, <em>first: bool = False</em>, <em>_encoding: str = None</em><span class="sig-paren">)</span> → Union[typing.List[str], typing.List[_ForwardRef('Element')], str, _ForwardRef('Element')]<a class="headerlink" href="#requests_html.HTML.xpath" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given an XPath selector, returns a list of
|
||||
<a class="reference internal" href="#requests_html.Element" title="requests_html.Element"><code class="xref py py-class docutils literal notranslate"><span class="pre">Element</span></code></a> objects or a single one.</p>
|
||||
<table class="docutils field-list" frame="void" rules="none">
|
||||
@@ -422,6 +423,7 @@ template.</p>
|
||||
<tbody valign="top">
|
||||
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
|
||||
<li><strong>selector</strong> – XPath Selector to use.</li>
|
||||
<li><strong>clean</strong> – Whether or not to sanitize the found HTML of <code class="docutils literal notranslate"><span class="pre"><script></span></code> and <code class="docutils literal notranslate"><span class="pre"><style></span></code> tags.</li>
|
||||
<li><strong>first</strong> – Whether or not to return just the first result.</li>
|
||||
<li><strong>_encoding</strong> – The encoding format.</li>
|
||||
</ul>
|
||||
@@ -486,7 +488,7 @@ for more details.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="requests_html.Element.find">
|
||||
<code class="descname">find</code><span class="sig-paren">(</span><em>selector: str = '*'</em>, <em>containing: Union[str</em>, <em>typing.List[str]] = None</em>, <em>first: bool = False</em>, <em>_encoding: str = None</em><span class="sig-paren">)</span> → Union[typing.List[_ForwardRef('Element')], _ForwardRef('Element')]<a class="headerlink" href="#requests_html.Element.find" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="descname">find</code><span class="sig-paren">(</span><em>selector: str = '*'</em>, <em>*</em>, <em>containing: Union[str</em>, <em>typing.List[str]] = None</em>, <em>clean: bool = False</em>, <em>first: bool = False</em>, <em>_encoding: str = None</em><span class="sig-paren">)</span> → Union[typing.List[_ForwardRef('Element')], _ForwardRef('Element')]<a class="headerlink" href="#requests_html.Element.find" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given a CSS Selector, returns a list of
|
||||
<a class="reference internal" href="#requests_html.Element" title="requests_html.Element"><code class="xref py py-class docutils literal notranslate"><span class="pre">Element</span></code></a> objects or a single one.</p>
|
||||
<table class="docutils field-list" frame="void" rules="none">
|
||||
@@ -495,6 +497,7 @@ for more details.</p>
|
||||
<tbody valign="top">
|
||||
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
|
||||
<li><strong>selector</strong> – CSS Selector to use.</li>
|
||||
<li><strong>clean</strong> – Whether or not to sanitize the found HTML of <code class="docutils literal notranslate"><span class="pre"><script></span></code> and <code class="docutils literal notranslate"><span class="pre"><style></span></code> tags.</li>
|
||||
<li><strong>containing</strong> – If specified, only return elements that contain the provided text.</li>
|
||||
<li><strong>first</strong> – Whether or not to return just the first result.</li>
|
||||
<li><strong>_encoding</strong> – The encoding format.</li>
|
||||
@@ -603,7 +606,7 @@ template.</p>
|
||||
|
||||
<dl class="method">
|
||||
<dt id="requests_html.Element.xpath">
|
||||
<code class="descname">xpath</code><span class="sig-paren">(</span><em>selector: str</em>, <em>first: bool = False</em>, <em>_encoding: str = None</em><span class="sig-paren">)</span> → Union[typing.List[str], typing.List[_ForwardRef('Element')], str, _ForwardRef('Element')]<a class="headerlink" href="#requests_html.Element.xpath" title="Permalink to this definition">¶</a></dt>
|
||||
<code class="descname">xpath</code><span class="sig-paren">(</span><em>selector: str</em>, <em>*</em>, <em>clean: bool = False</em>, <em>first: bool = False</em>, <em>_encoding: str = None</em><span class="sig-paren">)</span> → Union[typing.List[str], typing.List[_ForwardRef('Element')], str, _ForwardRef('Element')]<a class="headerlink" href="#requests_html.Element.xpath" title="Permalink to this definition">¶</a></dt>
|
||||
<dd><p>Given an XPath selector, returns a list of
|
||||
<a class="reference internal" href="#requests_html.Element" title="requests_html.Element"><code class="xref py py-class docutils literal notranslate"><span class="pre">Element</span></code></a> objects or a single one.</p>
|
||||
<table class="docutils field-list" frame="void" rules="none">
|
||||
@@ -612,6 +615,7 @@ template.</p>
|
||||
<tbody valign="top">
|
||||
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
|
||||
<li><strong>selector</strong> – XPath Selector to use.</li>
|
||||
<li><strong>clean</strong> – Whether or not to sanitize the found HTML of <code class="docutils literal notranslate"><span class="pre"><script></span></code> and <code class="docutils literal notranslate"><span class="pre"><style></span></code> tags.</li>
|
||||
<li><strong>first</strong> – Whether or not to return just the first result.</li>
|
||||
<li><strong>_encoding</strong> – The encoding format.</li>
|
||||
</ul>
|
||||
|
||||
+1
-1
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user