From 14da46f03d193012c750fd5e6b180900d1a91f3e Mon Sep 17 00:00:00 2001
From: Andrew Gorcester <gorcester@google.com>
Date: Tue, 6 Mar 2018 16:06:23 -0800
Subject: [PATCH 1/3] Add tests for ._make_absolute() and make them pass.

---
 requests_html.py            | 29 ++++++++++++++++++++---------
 tests/test_requests_html.py | 25 +++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 9 deletions(-)
diff --git a/requests_html.py b/requests_html.py
index 9c14ff5..fee56bb 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -1,6 +1,6 @@
 import sys
 import asyncio
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import urlparse, urlunparse, urljoin
 from concurrent.futures import ThreadPoolExecutor
 from concurrent.futures._base import TimeoutError
 from functools import partial
@@ -307,15 +307,20 @@ class BaseParser:
         # Parse the link with stdlib.
         parsed = urlparse(link)._asdict()
 
-        # Appears to be a relative link:
+        # If link is relative, then join it with base_url.
         if not parsed['netloc']:
-            parsed['netloc'] = urlparse(self.base_url).netloc
+            return urljoin(self.base_url, link)
+
+        # Link is absolute; if it lacks a scheme, add one from base_url.
         if not parsed['scheme']:
             parsed['scheme'] = urlparse(self.base_url).scheme
 
-        # Re-construct URL, with new data.
-        parsed = (v for v in parsed.values())
-        return urlunparse(parsed)
+            # Reconstruct the URL to incorporate the new scheme.
+            parsed = (v for v in parsed.values())
+            return urlunparse(parsed)
+
+        # Link is absolute and complete with scheme; nothing to be done here.
+        return link
 
 
     @property
@@ -342,9 +347,15 @@ class BaseParser:
             if result:
                 return result
 
-        url = '/'.join(self.url.split('/')[:-1])
-        if url.endswith('/'):
-            url = url[:-1]
+        # Parse the url to separate out the path
+        parsed = urlparse(self.url)._asdict()
+
+        # Remove any part of the path after the last '/'
+        path = '/'.join(parsed['path'].split('/')[:-1])
+
+        # Reconstruct the url with the modified path
+        parsed = (v for v in parsed.values())
+        url = urlunparse(parsed)
 
         return url
 
diff --git a/tests/test_requests_html.py b/tests/test_requests_html.py
index 5f35aa7..a2cd4e9 100644
--- a/tests/test_requests_html.py
+++ b/tests/test_requests_html.py
@@ -137,6 +137,31 @@ def test_anchor_links():
     assert '#site-map' in r.html.links
 
 
+@pytest.mark.ok
+@pytest.mark.parametrize('url,link,expected', [
+    ('http://example.com/', 'test.html', 'http://example.com/test.html'),
+    ('http://example.com', 'test.html', 'http://example.com/test.html'),
+    ('http://example.com/foo/', 'test.html', 'http://example.com/foo/test.html'),
+    ('http://example.com/foo/bar', 'test.html', 'http://example.com/foo/test.html'),
+    ('http://example.com/foo/', '/test.html', 'http://example.com/test.html'),
+    ('http://example.com/', 'http://xkcd.com/about/', 'http://xkcd.com/about/'),
+    ('http://example.com/', '//xkcd.com/about/', 'http://xkcd.com/about/'),
+])
+def test_absolute_links(url, link, expected):
+    head_template = """<head><base href='{}'></head>"""
+    body_template = """<body><a href='{}'>Next</a></body>"""
+
+    # Test without `<base>` tag (url is base)
+    html = HTML(html=body_template.format(link), url=url)
+    assert html.absolute_links.pop() == expected
+
+    # Test with `<base>` tag (url is other)
+    html = HTML(
+        html=head_template.format(url) + body_template.format(link),
+        url='http://example.com/foobar/')
+    assert html.absolute_links.pop() == expected
+
+
 @pytest.mark.render
 def test_render():
     r = get()

From 016054fc8323257916943548d60eb366a60420e9 Mon Sep 17 00:00:00 2001
From: John Ludwig <johnludwigm@users.noreply.github.com>
Date: Wed, 7 Mar 2018 18:46:03 -0600
Subject: [PATCH 2/3] Change website to Amazon Smile

---
 tests/test_internet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_internet.py b/tests/test_internet.py
index 05cfff5..19527bb 100644
--- a/tests/test_internet.py
+++ b/tests/test_internet.py
@@ -6,7 +6,7 @@ def test_pagination():
     pages = (
         'https://xkcd.com/1957/',
         'https://reddit.com/',
-        'https://pornhub.com/',
+        'https://smile.amazon.com/',
         'https://theverge.com/archives'
     )
 

From fcaac85af9e6387a91efb6c4f79a62f0fc9c8bf1 Mon Sep 17 00:00:00 2001
From: bkcsfi <bkc@sfi.ca>
Date: Thu, 8 Mar 2018 12:33:06 -0500
Subject: [PATCH 3/3] fix typo in index.rst

---
 docs/source/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index ef794b3..4f51fe7 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -144,7 +144,7 @@ XPath is also supported (`learn more <https://msdn.microsoft.com/en-us/library/m
    >>> r.html.xpath('a')
    [<Element 'a' class='btn' href='https://help.github.com/articles/supported-browsers'>]
 
-You can also select only elements containing certian text:
+You can also select only elements containing certain text:
 
 .. code-block:: pycon