fix things

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
This commit is contained in:
2018-02-26 10:10:24 -05:00
parent 5a83a8837e
commit 00f5dcf4e1
2 changed files with 14 additions and 4 deletions
+6 -3
View File
@@ -9,7 +9,7 @@ from lxml.html.soupparser import fromstring
from parse import search as parse_search
from parse import findall
DEFAULT_ENCODING = 'utf-8'
useragent = UserAgent()
@@ -95,9 +95,9 @@ class BaseParser:
else:
return c
def xpath(self, selector, first=False):
def xpath(self, selector, first=False, _encoding=None):
"""Given an XPath selector, returns a list of element objects."""
c = [Element(element=e, url=self.url, encoding=self.encoding) for e in self.lxml.xpath(selector)]
c = [Element(element=e, url=self.url, default_encoding=_encoding or self.encoding) for e in self.lxml.xpath(selector)]
if first:
try:
return c[0]
@@ -250,8 +250,11 @@ class Session(requests.Session):
"""Requests HTTP Response handler. Attaches .html property to Response
objects.
"""
if not response.encoding:
response.encoding = DEFAULT_ENCODING
response.html = HTML(response=response)
return response
+8 -1
View File
@@ -11,7 +11,10 @@ def get():
path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html'))
url = 'file://{}'.format(path)
return session.get(url)
r = session.get(url)
r.encoding = 'utf-8'
return r
def test_file_get():
@@ -21,6 +24,7 @@ def test_file_get():
def test_css_selector():
r = get()
about = r.html.find('#about', first=True)
for menu_item in (
@@ -57,3 +61,6 @@ def test_xpath():
html = r.html.xpath('/html', first=True)
assert 'no-js' in html.attrs['class']
if __name__ == '__main__':
# test_file_get()
test_css_selector()