mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
+15
-2
@@ -1,6 +1,8 @@
|
||||
import requests
|
||||
from pyquery import PyQuery
|
||||
|
||||
from lxml.etree import tostring
|
||||
|
||||
class Element:
|
||||
"""docstring for Element"""
|
||||
def __init__(self, element):
|
||||
@@ -27,6 +29,18 @@ class Element:
|
||||
def text(self):
|
||||
return self.pq.text()
|
||||
|
||||
@property
|
||||
def html(self):
|
||||
return tostring(self.element).decode('utf-8').strip()
|
||||
|
||||
def find(self, selector):
|
||||
def gen():
|
||||
for found in self.pq(selector):
|
||||
yield Element(found)
|
||||
|
||||
return [g for g in gen()]
|
||||
|
||||
|
||||
|
||||
class HTML(object):
|
||||
"""docstring for HTML"""
|
||||
@@ -87,7 +101,6 @@ class HTML(object):
|
||||
return PyQuery(self.html)
|
||||
|
||||
|
||||
|
||||
def handle_response(response, **kwargs):
|
||||
response.html = HTML(response)
|
||||
return response
|
||||
@@ -97,4 +110,4 @@ session = requests.Session()
|
||||
session.hooks = {'response': handle_response}
|
||||
|
||||
r = session.get('https://kennethreitz.org/')
|
||||
print(r.html.absolute_links)
|
||||
print(r.html.find('a')[0].html)
|
||||
|
||||
Reference in New Issue
Block a user