From 05ff6e87ca7206a241d3f1ba7ff49ebf7cda7217 Mon Sep 17 00:00:00 2001
From: Angus Dippenaar <angusdippenaar@gmail.com>
Date: Sat, 7 Apr 2018 17:15:51 +0200
Subject: [PATCH] Replace errors when decoding raw_html

Some websites don't have valid bytes, even when the encoding is specified. I'm not 100% sure if replacing "bad" bytes is the correct way to fix the problem. It seems to fix the issues I've run into with some sites.
---
 requests_html.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requests_html.py b/requests_html.py
index ea8f3bd..6cdab39 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -102,7 +102,7 @@ class BaseParser:
         (`learn more <http://www.diveintopython3.net/strings.html>`_).
         """
         if self._html:
-            return self.raw_html.decode(self.encoding)
+            return self.raw_html.decode(self.encoding, errors='replace')
         else:
             return etree.tostring(self.element, encoding='unicode').strip()
 
@@ -128,7 +128,7 @@ class BaseParser:
             self._encoding = html_to_unicode(self.default_encoding, self._html)[0]
             # Fall back to requests' detected encoding if decode fails.
             try:
-                self.raw_html.decode(self.encoding)
+                self.raw_html.decode(self.encoding, errors='replace')
             except UnicodeDecodeError:
                 self._encoding = self.default_encoding