Merge pull request #162 from SN9NV/patch-2

Replace errors when decoding raw_html
This commit is contained in:
2018-09-18 02:52:37 -04:00
committed by GitHub
+2 -2
View File
@@ -102,7 +102,7 @@ class BaseParser:
(`learn more <http://www.diveintopython3.net/strings.html>`_).
"""
if self._html:
return self.raw_html.decode(self.encoding)
return self.raw_html.decode(self.encoding, errors='replace')
else:
return etree.tostring(self.element, encoding='unicode').strip()
@@ -128,7 +128,7 @@ class BaseParser:
self._encoding = html_to_unicode(self.default_encoding, self._html)[0]
# Fall back to requests' detected encoding if decode fails.
try:
self.raw_html.decode(self.encoding)
self.raw_html.decode(self.encoding, errors='replace')
except UnicodeDecodeError:
self._encoding = self.default_encoding