Merge pull request #56 from Chyroc/clean_and_format_code

clean and format code
This commit is contained in:
2018-02-28 09:13:59 -05:00
committed by GitHub
+24 -36
View File
@@ -16,7 +16,6 @@ from parse import search as parse_search
from parse import findall, Result
from w3lib.encoding import html_to_unicode
DEFAULT_ENCODING = 'utf-8'
DEFAULT_URL = 'https://example.org/'
@@ -45,7 +44,6 @@ try:
except AssertionError:
raise RuntimeError('Requests-HTML requires Python 3.6+!')
class BaseParser:
"""A basic HTML/Element Parser, for Humans.
@@ -151,13 +149,7 @@ class BaseParser:
for found in self.pq(selector)
]
if first:
try:
return elements[0]
except IndexError:
return None
else:
return elements
return _get_first_or_list(elements, first)
def xpath(self, selector: str, first: bool = False, _encoding: str = None) -> _XPath:
"""Given an XPath selector, returns a list of
@@ -185,13 +177,7 @@ class BaseParser:
element = str(selection)
c.append(element)
if first:
try:
return c[0]
except IndexError:
return None
else:
return c
return _get_first_or_list(c, first)
def search(self, template: str) -> Result:
"""Searches the :class:`Element <Element>` for the given Parse template.
@@ -212,14 +198,14 @@ class BaseParser:
@property
def links(self) -> _Links:
"""All found links on page, in asis form."""
def gen():
for link in self.find('a'):
try:
href = link.attrs['href'].strip()
if not(href.startswith('#') and self.skip_anchors) and href not in ['javascript:;']:
if href:
yield href
if href and not (href.startswith('#') and self.skip_anchors and href in ['javascript:;']):
yield href
except KeyError:
pass
@@ -230,6 +216,7 @@ class BaseParser:
"""All found links on page, in absolute form
(`learn more <https://www.navegabem.com/absolute-or-relative-links.html>`_).
"""
def gen():
for link in self.links:
# Parse the link with stdlib.
@@ -259,12 +246,11 @@ class BaseParser:
if base:
return base.attrs['href'].strip()
else:
url = '/'.join(self.url.split('/')[:-1])
if url.endswith('/'):
url = url[:-1]
url = '/'.join(self.url.split('/')[:-1])
if url.endswith('/'):
url = url[:-1]
return url
return url
class Element(BaseParser):
@@ -280,10 +266,7 @@ class Element(BaseParser):
self.element = element
def __repr__(self) -> str:
attrs = []
for attr in self.attrs:
attrs.append('{}={}'.format(attr, repr(self.attrs[attr])))
attrs = ['{}={}'.format(attr, repr(self.attrs[attr])) for attr in self.attrs]
return "<Element {} {}>".format(repr(self.element.tag), ' '.join(attrs))
@property
@@ -424,10 +407,9 @@ class HTMLResponse(requests.Response):
@property
def html(self) -> HTML:
if self._html:
return self._html
if not self._html:
self._html = HTML(url=self.url, html=self.content, default_encoding=self.encoding)
self._html = HTML(url=self.url, html=self.content, default_encoding=self.encoding)
return self._html
@classmethod
@@ -442,10 +424,17 @@ def user_agent(style='chrome') -> _UserAgent:
style. Defaults to a Chrome-style User-Agent.
"""
if not style:
return useragent.random
return useragent[style] if style else useragent.random
def _get_first_or_list(l, first=True):
if first:
try:
return l[0]
except IndexError:
return None
else:
return useragent[style]
return l
class HTMLSession(requests.Session):
@@ -478,6 +467,5 @@ class HTMLSession(requests.Session):
"""
# Convert Request object into HTTPRequest object.
r = super(HTMLSession, self).request(*args, **kwargs)
html_r = HTMLResponse._from_response(r)
return html_r
return HTMLResponse._from_response(r)