From 00f5dcf4e1357150112435c7fb671313dcc7bf27 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 26 Feb 2018 10:10:24 -0500 Subject: [PATCH] fix things Signed-off-by: Kenneth Reitz --- requests_html.py | 9 ++++++--- tests/test_requests_html.py | 9 ++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/requests_html.py b/requests_html.py index 879ba1d..d752423 100644 --- a/requests_html.py +++ b/requests_html.py @@ -9,7 +9,7 @@ from lxml.html.soupparser import fromstring from parse import search as parse_search from parse import findall - +DEFAULT_ENCODING = 'utf-8' useragent = UserAgent() @@ -95,9 +95,9 @@ class BaseParser: else: return c - def xpath(self, selector, first=False): + def xpath(self, selector, first=False, _encoding=None): """Given an XPath selector, returns a list of element objects.""" - c = [Element(element=e, url=self.url, encoding=self.encoding) for e in self.lxml.xpath(selector)] + c = [Element(element=e, url=self.url, default_encoding=_encoding or self.encoding) for e in self.lxml.xpath(selector)] if first: try: return c[0] @@ -250,8 +250,11 @@ class Session(requests.Session): """Requests HTTP Response handler. Attaches .html property to Response objects. """ + if not response.encoding: + response.encoding = DEFAULT_ENCODING response.html = HTML(response=response) + return response diff --git a/tests/test_requests_html.py b/tests/test_requests_html.py index 6ba02c9..a8a22c3 100644 --- a/tests/test_requests_html.py +++ b/tests/test_requests_html.py @@ -11,7 +11,10 @@ def get(): path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html')) url = 'file://{}'.format(path) - return session.get(url) + r = session.get(url) + r.encoding = 'utf-8' + + return r def test_file_get(): @@ -21,6 +24,7 @@ def test_file_get(): def test_css_selector(): r = get() + about = r.html.find('#about', first=True) for menu_item in ( @@ -57,3 +61,6 @@ def test_xpath(): html = r.html.xpath('/html', first=True) assert 'no-js' in html.attrs['class'] +if __name__ == '__main__': + # test_file_get() + test_css_selector()