From 21ce89ea4d384def792aad324f2170af0defeef1 Mon Sep 17 00:00:00 2001
From: Chyroc <chen_yunpeng@foxmail.com>
Date: Wed, 28 Feb 2018 20:08:54 +0800
Subject: [PATCH 1/8] clean and format code

---
 requests_html.py | 62 ++++++++++++++++++++----------------------------
 1 file changed, 26 insertions(+), 36 deletions(-)
diff --git a/requests_html.py b/requests_html.py
index ee2f6b2..0745edb 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -15,14 +15,12 @@ from parse import search as parse_search
 from parse import findall, Result
 from w3lib.encoding import html_to_unicode
 
-
 DEFAULT_ENCODING = 'utf-8'
 DEFAULT_URL = 'https://example.org/'
 
 useragent = UserAgent()
 
 
-
 class BaseParser:
     """A basic HTML/Element Parser, for Humans."""
 
@@ -114,13 +112,7 @@ class BaseParser:
             for found in self.pq(selector)
         ]
 
-        if first:
-            try:
-                return elements[0]
-            except IndexError:
-                return None
-        else:
-            return elements
+        return _get_first_or_list(elements, first)
 
     def xpath(self, selector: str, first: bool = False, _encoding: str = None):
         """Given an XPath selector, returns a list of :class:`Element <Element>` objects.
@@ -129,13 +121,8 @@ class BaseParser:
 
         If ``first`` is ``True``, only returns the first :class:`Element <Element>` found."""
         c = [Element(element=e, url=self.url, default_encoding=_encoding or self.encoding) for e in self.lxml.xpath(selector)]
-        if first:
-            try:
-                return c[0]
-            except IndexError:
-                return None
-        else:
-            return c
+
+        return _get_first_or_list(c, first)
 
     def search(self, template: str) -> Result:
         """Searches the :class:`Element <Element>` for the given parse template."""
@@ -150,14 +137,14 @@ class BaseParser:
     @property
     def links(self) -> Set[str]:
         """All found links on page, in as–is form."""
+
         def gen():
             for link in self.find('a'):
 
                 try:
                     href = link.attrs['href'].strip()
-                    if not(href.startswith('#') and self.skip_anchors) and href not in ['javascript:;']:
-                        if href:
-                            yield href
+                    if href and not (href.startswith('#') and self.skip_anchors and href in ['javascript:;']):
+                        yield href
                 except KeyError:
                     pass
 
@@ -168,6 +155,7 @@ class BaseParser:
         """All found links on page, in absolute form
         (`learn more <https://www.navegabem.com/absolute-or-relative-links.html>`_).
         """
+
         def gen():
             for link in self.links:
                 # Parse the link with stdlib.
@@ -197,12 +185,11 @@ class BaseParser:
         if base:
             return base.attrs['href'].strip()
 
-        else:
-            url = '/'.join(self.url.split('/')[:-1])
-            if url.endswith('/'):
-                url = url[:-1]
+        url = '/'.join(self.url.split('/')[:-1])
+        if url.endswith('/'):
+            url = url[:-1]
 
-            return url
+        return url
 
 
 class Element(BaseParser):
@@ -213,10 +200,7 @@ class Element(BaseParser):
         self.element = element
 
     def __repr__(self) -> str:
-        attrs = []
-        for attr in self.attrs:
-            attrs.append('{}={}'.format(attr, repr(self.attrs[attr])))
-
+        attrs = ['{}={}'.format(attr, repr(self.attrs[attr])) for attr in self.attrs]
         return "<Element {} {}>".format(repr(self.element.tag), ' '.join(attrs))
 
     @property
@@ -289,6 +273,7 @@ class HTML(BaseParser):
         Warning: the first time you run this method, it will download
         Chromium into your home directory (``~/.pyppeteer``).
         """
+
         async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int):
             try:
                 browser = pyppeteer.launch(headless=True)
@@ -344,10 +329,9 @@ class HTMLResponse(requests.Response):
 
     @property
     def html(self) -> HTML:
-        if self._html:
-            return self._html
+        if not self._html:
+            self._html = HTML(url=self.url, html=self.content, default_encoding=self.encoding)
 
-        self._html = HTML(url=self.url, html=self.content, default_encoding=self.encoding)
         return self._html
 
     @classmethod
@@ -362,10 +346,17 @@ def user_agent(style='chrome') -> str:
     style. Defaults to a Chrome-style User-Agent.
     """
 
-    if not style:
-        return useragent.random
+    return useragent[style] if style else useragent.random
+
+
+def _get_first_or_list(l, first=True):
+    if first:
+        try:
+            return l[0]
+        except IndexError:
+            return None
     else:
-        return useragent[style]
+        return l
 
 
 class HTMLSession(requests.Session):
@@ -395,6 +386,5 @@ class HTMLSession(requests.Session):
     def request(self, *args, **kwargs) -> HTMLResponse:
         # Convert Request object into HTTPRequest object.
         r = super(HTMLSession, self).request(*args, **kwargs)
-        html_r = HTMLResponse._from_response(r)
 
-        return html_r
+        return HTMLResponse._from_response(r)

From 2504efb35fa7c6262de5dc8d75d46d7e10e848d2 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.org>
Date: Wed, 28 Feb 2018 08:17:46 -0500
Subject: [PATCH 2/8] support render of non-loaded websites

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
---
 requests_html.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/requests_html.py b/requests_html.py
index 0be722c..082ca5f 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -2,7 +2,7 @@ import sys
 import asyncio
 from urllib.parse import urlparse, urlunparse
 from concurrent.futures._base import TimeoutError
-from typing import Set, Union, List, MutableMapping
+from typing import Set, Union, List, MutableMapping, Optional
 
 import pyppeteer
 import requests
@@ -325,7 +325,7 @@ class HTML(BaseParser):
     def __repr__(self) -> str:
         return "<HTML url={}>".format(repr(self.url))
 
-    def render(self, retries: int = 8, script: str = None, scrolldown=False, sleep: int = 0):
+    def render(self, retries: int = 8, script: str = None, scrolldown=False, sleep: int = 0, reload: bool = True):
         """Reloads the response in Chromium, and replaces HTML content
         with an updated version, with JavaScript executed.
 
@@ -361,13 +361,16 @@ class HTML(BaseParser):
         Warning: the first time you run this method, it will download
         Chromium into your home directory (``~/.pyppeteer``).
         """
-        async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int):
+        async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, reload: bool = True, content: Optional[str]):
             try:
                 browser = pyppeteer.launch(headless=True)
                 page = await browser.newPage()
 
                 # Load the given page (GET request, obviously.)
-                await page.goto(url)
+                if reload:
+                    await page.goto(url)
+                else:
+                    await page.setContent(content)
 
                 result = None
                 if script:
@@ -395,7 +398,7 @@ class HTML(BaseParser):
         for i in range(retries):
             if not content:
                 try:
-                    content, result = loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, scrolldown=scrolldown))
+                    content, result = loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, content=self.html, reload=reload, scrolldown=scrolldown))
                 except TimeoutError:
                     pass
 

From e9c162f5f76c87452fb84a833287b313ae7932a4 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.org>
Date: Wed, 28 Feb 2018 08:20:34 -0500
Subject: [PATCH 3/8] next version

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
---
 requests_html.py | 6 ++++++
 setup.py         | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/requests_html.py b/requests_html.py
index 082ca5f..7311667 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -329,6 +329,12 @@ class HTML(BaseParser):
         """Reloads the response in Chromium, and replaces HTML content
         with an updated version, with JavaScript executed.
 
+        :param retries: The number of times to retry loading the page in Chromium.
+        :param script: JavaScript to execute upon page load (optional).
+        :param scrolldown: Integer, if provided, of how many times to page down.
+        :param sleep: Integer, if provided, of how many long to sleep after initial render.
+        :param reload: If ``False``, content will not be loaded from the browser, but will be provided from memory.
+
         If ``scrolldown`` is specified, the page will scrolldown the specified
         number of times, after sleeping the specified amount of time
         (e.g. ``scrolldown=10, sleep=1``).
diff --git a/setup.py b/setup.py
index 873dabe..b28f984 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@ DESCRIPTION = 'HTML Parsing for Humans.'
 URL = 'https://github.com/kennethreitz/requests-html'
 EMAIL = 'me@kennethreitz.org'
 AUTHOR = 'Kenneth Reitz'
-VERSION = '0.6.7'
+VERSION = '0.6.8'
 
 # What packages are required for this module to be executed?
 REQUIRED = [

From 565cd496c2a37973182ab01575e991a58ca8c209 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.org>
Date: Wed, 28 Feb 2018 08:39:58 -0500
Subject: [PATCH 4/8] bare render tests

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
---
 tests/test_requests_html.py | 43 +++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/tests/test_requests_html.py b/tests/test_requests_html.py
index 5780bde..dbf02d8 100644
--- a/tests/test_requests_html.py
+++ b/tests/test_requests_html.py
@@ -44,8 +44,8 @@ def test_links():
     r = get()
     about = r.html.find('#about', first=True)
 
-    len(about.links) == 6
-    len(about.absolute_links) == 6
+    assert len(about.links) == 6
+    assert len(about.absolute_links) == 6
 
 
 def test_search():
@@ -79,5 +79,44 @@ def test_anchor_links():
     assert '#site-map' in r.html.links
 
 
+def test_render():
+    r = get()
+    script = """
+    () => {
+        return {
+            width: document.documentElement.clientWidth,
+            height: document.documentElement.clientHeight,
+            deviceScaleFactor: window.devicePixelRatio,
+        }
+    }
+    """
+    val = r.html.render(script=script)
+    for value in ('width', 'height', 'deviceScaleFactor'):
+        assert value in val
+
+    about = r.html.find('#about', first=True)
+    assert len(about.links) == 6
+
+
+def test_bare_render():
+    doc = """<a href='https://httpbin.org'>"""
+    html = HTML(html=doc)
+    script = """
+        () => {
+            return {
+                width: document.documentElement.clientWidth,
+                height: document.documentElement.clientHeight,
+                deviceScaleFactor: window.devicePixelRatio,
+            }
+        }
+    """
+    val = html.render(script=script, reload=False)
+    for value in ('width', 'height', 'deviceScaleFactor'):
+        assert value in val
+
+    assert html.find('html')
+    assert 'https://httpbin.org' in html.links
+
+
 if __name__ == '__main__':
     test_xpath()

From f2a99240655c18a1b719f75c2642fdb7615d2ff6 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.org>
Date: Wed, 28 Feb 2018 08:43:42 -0500
Subject: [PATCH 5/8] render without requests

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
---
 docs/source/index.rst | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index feb45aa..86182c7 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -177,6 +177,27 @@ You can also use this library without Requests:
     >>> html.links
     {'https://httpbin.org'}
 
+You can also render JavaScript pages without Requests:
+
+.. code-block:: pycon
+
+    # ^^ proceeding from above ^^
+    >>> script = """
+                () => {
+            return {
+                width: document.documentElement.clientWidth,
+                height: document.documentElement.clientHeight,
+                deviceScaleFactor: window.devicePixelRatio,
+            }
+        }
+        """
+    >>> val = html.render(script=script, reload=False)
+
+    >>> print(val)
+    {'width': 800, 'height': 600, 'deviceScaleFactor': 1}
+
+    >>> print(html.html)
+    <html><head></head><body><a href="https://httpbin.org"></a></body></html>
 
 API Documentation
 =================

From dcb828f3a341a656f69aa54169c874da8cbc6e5c Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.org>
Date: Wed, 28 Feb 2018 08:44:24 -0500
Subject: [PATCH 6/8] fix

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
---
 docs/source/index.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 86182c7..40b8905 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -183,13 +183,13 @@ You can also render JavaScript pages without Requests:
 
     # ^^ proceeding from above ^^
     >>> script = """
-                () => {
-            return {
-                width: document.documentElement.clientWidth,
-                height: document.documentElement.clientHeight,
-                deviceScaleFactor: window.devicePixelRatio,
+            () => {
+                return {
+                    width: document.documentElement.clientWidth,
+                    height: document.documentElement.clientHeight,
+                    deviceScaleFactor: window.devicePixelRatio,
+                }
             }
-        }
         """
     >>> val = html.render(script=script, reload=False)
 

From e531e5cab2105805ec8328a76dab4de2e4e109dd Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.org>
Date: Wed, 28 Feb 2018 08:49:38 -0500
Subject: [PATCH 7/8] updated tests for travis

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
---
 Pipfile                     |  2 +-
 Pipfile.lock                | 28 +---------------------------
 tests/test_requests_html.py | 11 +++++++++++
 3 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/Pipfile b/Pipfile
index c0dda7f..62550b6 100644
--- a/Pipfile
+++ b/Pipfile
@@ -28,4 +28,4 @@ mypy = "*"
 
 [scripts]
 
-tests = "pytest"
+tests = "pytest -v -m ok"
diff --git a/Pipfile.lock b/Pipfile.lock
index 03398aa..55c5b45 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "ef6f9504ed9751cf2f4c5aef06e59838981c79d84fa1d36fb5ce258d8dba189f"
+            "sha256": "cf67076e9c185c3bc951910b2a44b8b548ce954e0e3ff2a5bef1942d13275e8e"
         },
         "host-environment-markers": {
             "implementation_name": "cpython",
@@ -127,15 +127,6 @@
             ],
             "version": "==0.0.10"
         },
-        "pyqt5": {
-            "hashes": [
-                "sha256:128285176240e990fce9c50293105ffd0d2884d8910bb338118f867b171ec6e8",
-                "sha256:dbd1777d8e7540a6e7350482f1d7c981a073ce1b7195ac2cd21c204b3a28df57",
-                "sha256:3563ac935fca8e8b1dbd4856d8eedc982b5de90c53f0280e8fca8060a262d4f4",
-                "sha256:2ce953cb849e5265b9d1abe075471148ad5fb6d7e6a9881f37dfe05590571d23"
-            ],
-            "version": "==5.10"
-        },
         "pyquery": {
             "hashes": [
                 "sha256:07987c2ed2aed5cba29ff18af95e56e9eb04a2249f42ce47bddfb37f487229a3",
@@ -150,23 +141,6 @@
             ],
             "version": "==2.18.4"
         },
-        "sip": {
-            "hashes": [
-                "sha256:f31bb63e63a958f65887ae27f06e62af9f9cb818ba7456a99f78a5ec3082d3dd",
-                "sha256:776e169da554729f80337070348db49a6742d8aa317aec931a4d0f47b7ef535d",
-                "sha256:beac2bc1b9457a693fb3122c797cad5678a168ecff6ccbad4aa3a9f1ff1a2d86",
-                "sha256:1c5a1ad409e97833a4a873fae5bcd7a365651f7372806992d03891082821bc41",
-                "sha256:248ecca386d4832138f6a044dceb0bfc38fb8503b7ffbfeb474073f56930144b",
-                "sha256:ab338095e32ebb2047b6184f1383c667c47b9822d7320fdfb93870567a972343",
-                "sha256:ebea4619e9626e2eb197835049807c8173f11e2023b05140cbee4b274a91ef5e",
-                "sha256:2db24e65c99b7d20a67fa461f6bc2e15bddb6cd5fde52e37d6609566d79a69a1",
-                "sha256:3b45eecf6f68a29f5629dc064079e919987b030628bb6614da7f4eefedbe145e",
-                "sha256:2120e9d713120687558b6699cf5ff6a8f7b070776b19d6c7fc96fc64ea8ca056",
-                "sha256:18350ebf82beaef6a73d2c14320f19961242ed424670407df6fd5a9b65f0e7fc",
-                "sha256:92413edcb4fea75ebd1f8142c882dc5db398025eb8a0a273385838fd791de73c"
-            ],
-            "version": "==4.19.7"
-        },
         "six": {
             "hashes": [
                 "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb",
diff --git a/tests/test_requests_html.py b/tests/test_requests_html.py
index dbf02d8..3dbcd2e 100644
--- a/tests/test_requests_html.py
+++ b/tests/test_requests_html.py
@@ -1,5 +1,6 @@
 import os
 
+import pytest
 from requests_html import HTMLSession, HTML
 from requests_file import FileAdapter
 
@@ -14,11 +15,13 @@ def get():
     return session.get(url)
 
 
+@pytest.mark.ok
 def test_file_get():
     r = get()
     assert r.status_code == 200
 
 
+@pytest.mark.ok
 def test_css_selector():
     r = get()
 
@@ -32,6 +35,7 @@ def test_css_selector():
         assert menu_item in about.full_text.split('\n')
 
 
+@pytest.mark.ok
 def test_attrs():
     r = get()
     about = r.html.find('#about', first=True)
@@ -40,6 +44,7 @@ def test_attrs():
     assert len(about.attrs['class']) == 2
 
 
+@pytest.mark.ok
 def test_links():
     r = get()
     about = r.html.find('#about', first=True)
@@ -48,12 +53,14 @@ def test_links():
     assert len(about.absolute_links) == 6
 
 
+@pytest.mark.ok
 def test_search():
     r = get()
     style = r.html.search('Python is a {} language')[0]
     assert style == 'programming'
 
 
+@pytest.mark.ok
 def test_xpath():
     r = get()
     html = r.html.xpath('/html', first=True)
@@ -63,6 +70,7 @@ def test_xpath():
     assert '#site-map' in a_hrefs
 
 
+@pytest.mark.ok
 def test_html_loading():
     doc = """<a href='https://httpbin.org'>"""
     html = HTML(html=doc)
@@ -72,6 +80,7 @@ def test_html_loading():
     assert isinstance(html.html, str)
 
 
+@pytest.mark.ok
 def test_anchor_links():
     r = get()
     r.html.skip_anchors = False
@@ -79,6 +88,7 @@ def test_anchor_links():
     assert '#site-map' in r.html.links
 
 
+@pytest.mark.render
 def test_render():
     r = get()
     script = """
@@ -98,6 +108,7 @@ def test_render():
     assert len(about.links) == 6
 
 
+@pytest.mark.render
 def test_bare_render():
     doc = """<a href='https://httpbin.org'>"""
     html = HTML(html=doc)

From 0b8fe5f6c68a55e7747c1575e79c95a8d3424729 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.org>
Date: Wed, 28 Feb 2018 08:53:45 -0500
Subject: [PATCH 8/8] working

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
---
 docs/source/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 40b8905..ce87fe4 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -199,6 +199,7 @@ You can also render JavaScript pages without Requests:
     >>> print(html.html)
     <html><head></head><body><a href="https://httpbin.org"></a></body></html>
 
+
 API Documentation
 =================