From b0bd9783923e702ef964c35e0366e2ed2924e067 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 28 Feb 2018 19:27:41 -0500
Subject: [PATCH 1/3] Don't initialize the UserAgent object until one is
 requested

This avoids hitting external servers to get a plausible user agent
string when the module is imported. When the first user agent string is
requested the object is initialized as usual.
---
 requests_html.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/requests_html.py b/requests_html.py
index d6ddecd..7964625 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -19,7 +19,7 @@ from w3lib.encoding import html_to_unicode
 DEFAULT_ENCODING = 'utf-8'
 DEFAULT_URL = 'https://example.org/'
 
-useragent = UserAgent()
+useragent = None
 
 # Typing.
 _Find = Union[List['Element'], 'Element']
@@ -431,6 +431,9 @@ def user_agent(style='chrome') -> _UserAgent:
     """Returns a random user-agent, if not requested one of a specific
     style. Defaults to a Chrome-style User-Agent.
     """
+    global useragent
+    if not useragent:
+        useragent = UserAgent()
 
     return useragent[style] if style else useragent.random
 

From 9ed0bac87b1edaed3da31374679f3714269787ac Mon Sep 17 00:00:00 2001
From: alxia <aldridgexia@gmail.com>
Date: Wed, 28 Feb 2018 22:45:10 -0500
Subject: [PATCH 2/3] UserWarning fix

fixed UserWarning by passing default **bsargs to lxml
---
 requests_html.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requests_html.py b/requests_html.py
index 7964625..82a72c2 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -117,7 +117,7 @@ class BaseParser:
         """`lxml <http://lxml.de>`_ representation of the
         :class:`Element <Element>` or :class:`HTML <HTML>`.
         """
-        return soup_parse(self.html)
+        return soup_parse(self.html, features='html.parser')
 
     @property
     def text(self) -> _Text:

From 64e67d46ff802766c5ef531aa0cb99dc9db9de4b Mon Sep 17 00:00:00 2001
From: camper42 <camper.xlii@gmail.com>
Date: Thu, 1 Mar 2018 20:34:56 +0800
Subject: [PATCH 3/3] ignore javascript links

ignore anything startswith `javascript:` in href attr
---
 requests_html.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requests_html.py b/requests_html.py
index 7964625..bd41ba8 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -212,7 +212,7 @@ class BaseParser:
 
                 try:
                     href = link.attrs['href'].strip()
-                    if href and not (href.startswith('#') and self.skip_anchors and href in ['javascript:;']):
+                    if href and not (href.startswith('#') and self.skip_anchors) and not href.startswith('javascript:'):
                         yield href
                 except KeyError:
                     pass