diff --git a/requests_html.py b/requests_html.py
index 879ba1d..d752423 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -9,7 +9,7 @@ from lxml.html.soupparser import fromstring
from parse import search as parse_search
from parse import findall
-
+DEFAULT_ENCODING = 'utf-8'
useragent = UserAgent()
@@ -95,9 +95,9 @@ class BaseParser:
else:
return c
- def xpath(self, selector, first=False):
+ def xpath(self, selector, first=False, _encoding=None):
"""Given an XPath selector, returns a list of element objects."""
- c = [Element(element=e, url=self.url, encoding=self.encoding) for e in self.lxml.xpath(selector)]
+ c = [Element(element=e, url=self.url, default_encoding=_encoding or self.encoding) for e in self.lxml.xpath(selector)]
if first:
try:
return c[0]
@@ -250,8 +250,11 @@ class Session(requests.Session):
"""Requests HTTP Response handler. Attaches .html property to Response
objects.
"""
+ if not response.encoding:
+ response.encoding = DEFAULT_ENCODING
response.html = HTML(response=response)
+
return response
diff --git a/tests/test_requests_html.py b/tests/test_requests_html.py
index 6ba02c9..a8a22c3 100644
--- a/tests/test_requests_html.py
+++ b/tests/test_requests_html.py
@@ -11,7 +11,10 @@ def get():
path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html'))
url = 'file://{}'.format(path)
- return session.get(url)
+ r = session.get(url)
+ r.encoding = 'utf-8'
+
+ return r
def test_file_get():
@@ -21,6 +24,7 @@ def test_file_get():
def test_css_selector():
r = get()
+
about = r.html.find('#about', first=True)
for menu_item in (
@@ -57,3 +61,6 @@ def test_xpath():
html = r.html.xpath('/html', first=True)
assert 'no-js' in html.attrs['class']
+if __name__ == '__main__':
+ # test_file_get()
+ test_css_selector()