mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
+6
-3
@@ -9,7 +9,7 @@ from lxml.html.soupparser import fromstring
|
||||
from parse import search as parse_search
|
||||
from parse import findall
|
||||
|
||||
|
||||
DEFAULT_ENCODING = 'utf-8'
|
||||
|
||||
useragent = UserAgent()
|
||||
|
||||
@@ -95,9 +95,9 @@ class BaseParser:
|
||||
else:
|
||||
return c
|
||||
|
||||
def xpath(self, selector, first=False):
|
||||
def xpath(self, selector, first=False, _encoding=None):
|
||||
"""Given an XPath selector, returns a list of element objects."""
|
||||
c = [Element(element=e, url=self.url, encoding=self.encoding) for e in self.lxml.xpath(selector)]
|
||||
c = [Element(element=e, url=self.url, default_encoding=_encoding or self.encoding) for e in self.lxml.xpath(selector)]
|
||||
if first:
|
||||
try:
|
||||
return c[0]
|
||||
@@ -250,8 +250,11 @@ class Session(requests.Session):
|
||||
"""Requests HTTP Response handler. Attaches .html property to Response
|
||||
objects.
|
||||
"""
|
||||
if not response.encoding:
|
||||
response.encoding = DEFAULT_ENCODING
|
||||
|
||||
response.html = HTML(response=response)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
|
||||
@@ -11,7 +11,10 @@ def get():
|
||||
path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html'))
|
||||
url = 'file://{}'.format(path)
|
||||
|
||||
return session.get(url)
|
||||
r = session.get(url)
|
||||
r.encoding = 'utf-8'
|
||||
|
||||
return r
|
||||
|
||||
|
||||
def test_file_get():
|
||||
@@ -21,6 +24,7 @@ def test_file_get():
|
||||
|
||||
def test_css_selector():
|
||||
r = get()
|
||||
|
||||
about = r.html.find('#about', first=True)
|
||||
|
||||
for menu_item in (
|
||||
@@ -57,3 +61,6 @@ def test_xpath():
|
||||
html = r.html.xpath('/html', first=True)
|
||||
assert 'no-js' in html.attrs['class']
|
||||
|
||||
if __name__ == '__main__':
|
||||
# test_file_get()
|
||||
test_css_selector()
|
||||
|
||||
Reference in New Issue
Block a user