diff --git a/requests_html.py b/requests_html.py
index 436ccb5..95ff295 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -2,6 +2,10 @@ import requests
from pyquery import PyQuery
from lxml.etree import tostring
+import html2text
+html2text = html2text.HTML2Text()
+
+# TODO: Markdown converter.
class Element:
"""docstring for Element"""
@@ -29,6 +33,14 @@ class Element:
def text(self):
return self.pq.text()
+ @property
+ def full_text(self):
+ return self.pq.text_content()
+
+ @property
+ def markdown(self):
+ return html2text.handle(self.html)
+
@property
def html(self):
return tostring(self.element).decode('utf-8').strip()
@@ -50,15 +62,19 @@ class HTML(object):
self.skip_anchors = True
def __repr__(self):
- return repr(self.html)
+ return repr("".format(repr(self.url)))
- def find(self, selector):
+ def find(self, selector=None):
def gen():
for found in self.pq(selector):
yield Element(found)
return [g for g in gen()]
+ @property
+ def markdown(self):
+ return html2text.handle(self.html)
+
@property
def links(self):
def gen():
@@ -107,4 +123,6 @@ def handle_response(response, **kwargs):
session = requests.Session()
-session.hooks = {'response': handle_response}
\ No newline at end of file
+session.hooks = {'response': handle_response}
+
+print(session.get('http://httpbin.org/').html.markdown)
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 3c953f8..e5d709b 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@ VERSION = '0.0.1'
# What packages are required for this module to be executed?
REQUIRED = [
- 'requests', 'pyquery'
+ 'requests', 'pyquery', 'html2text'
]
# The rest you shouldn't have to touch too much :)