markdown support

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
This commit is contained in:
2018-02-24 16:50:35 -05:00
parent 53c7409d7c
commit ca40d0d0ce
2 changed files with 22 additions and 4 deletions
+21 -3
View File
@@ -2,6 +2,10 @@ import requests
from pyquery import PyQuery
from lxml.etree import tostring
import html2text
html2text = html2text.HTML2Text()
# TODO: Markdown converter.
class Element:
"""docstring for Element"""
@@ -29,6 +33,14 @@ class Element:
def text(self):
return self.pq.text()
@property
def full_text(self):
return self.pq.text_content()
@property
def markdown(self):
return html2text.handle(self.html)
@property
def html(self):
return tostring(self.element).decode('utf-8').strip()
@@ -50,15 +62,19 @@ class HTML(object):
self.skip_anchors = True
def __repr__(self):
return repr(self.html)
return repr("<HTML url={}>".format(repr(self.url)))
def find(self, selector):
def find(self, selector=None):
def gen():
for found in self.pq(selector):
yield Element(found)
return [g for g in gen()]
@property
def markdown(self):
return html2text.handle(self.html)
@property
def links(self):
def gen():
@@ -107,4 +123,6 @@ def handle_response(response, **kwargs):
session = requests.Session()
session.hooks = {'response': handle_response}
session.hooks = {'response': handle_response}
print(session.get('http://httpbin.org/').html.markdown)
+1 -1
View File
@@ -21,7 +21,7 @@ VERSION = '0.0.1'
# What packages are required for this module to be executed?
REQUIRED = [
'requests', 'pyquery'
'requests', 'pyquery', 'html2text'
]
# The rest you shouldn't have to touch too much :)