diff --git a/requests_html.py b/requests_html.py
index 6203ba4..54f3d05 100644
--- a/requests_html.py
+++ b/requests_html.py
@@ -70,9 +70,8 @@ class BaseParser:
"""
- def __init__(self, *, element, session: 'HTTPSession' = None, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL) -> None:
+ def __init__(self, *, element, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL) -> None:
self.element = element
- self.session = session or HTMLSession()
self.url = url
self.skip_anchors = True
self.default_encoding = default_encoding
@@ -166,47 +165,6 @@ class BaseParser:
"""
return self.lxml.text_content()
- def next(self, fetch: bool = False, next_symbol: _NextSymbol = DEFAULT_NEXT_SYMBOL) -> _Next:
- """Attempts to find the next page, if there is one. If ``fetch``
- is ``True`` (default), returns :class:`HTML ` object of
- next page. If ``fetch`` is ``False``, simply returns the next URL.
-
- """
-
- def get_next():
- candidates = self.find('a', containing=next_symbol)
-
- for candidate in candidates:
- if candidate.attrs.get('href'):
- # Support 'next' rel (e.g. reddit).
- if 'next' in candidate.attrs.get('rel', []):
- return candidate.attrs['href']
-
- # Support 'next' in classnames.
- for _class in candidate.attrs.get('class', []):
- if 'next' in _class:
- return candidate.attrs['href']
-
- if 'page' in candidate.attrs['href']:
- return candidate.attrs['href']
-
- try:
- # Resort to the last candidate.
- return candidates[-1].attrs['href']
- except IndexError:
- return None
-
- next = get_next()
- if next:
- url = self._make_absolute(next)
- else:
- return None
-
- if fetch:
- return self.session.get(url)
- else:
- return url
-
def find(self, selector: str = "*", *, containing: _Containing = None, clean: bool = False, first: bool = False, _encoding: str = None) -> _Find:
"""Given a CSS Selector, returns a list of
:class:`Element ` objects or a single one.
@@ -438,7 +396,7 @@ class HTML(BaseParser):
:param default_encoding: Which encoding to default to.
"""
- def __init__(self, *, url: str = DEFAULT_URL, html: _HTML, default_encoding: str = DEFAULT_ENCODING) -> None:
+ def __init__(self, *, session: 'HTTPSession' = None, url: str = DEFAULT_URL, html: _HTML, default_encoding: str = DEFAULT_ENCODING) -> None:
# Convert incoming unicode HTML into bytes.
if isinstance(html, str):
@@ -451,12 +409,54 @@ class HTML(BaseParser):
url=url,
default_encoding=default_encoding
)
+ self.session = session or HTMLSession()
self.page = None
self.next_symbol = DEFAULT_NEXT_SYMBOL
def __repr__(self) -> str:
return f""
+ def _next(self, fetch: bool = False, next_symbol: _NextSymbol = DEFAULT_NEXT_SYMBOL) -> _Next:
+ """Attempts to find the next page, if there is one. If ``fetch``
+ is ``True`` (default), returns :class:`HTML ` object of
+ next page. If ``fetch`` is ``False``, simply returns the next URL.
+
+ """
+
+ def get_next():
+ candidates = self.find('a', containing=next_symbol)
+
+ for candidate in candidates:
+ if candidate.attrs.get('href'):
+ # Support 'next' rel (e.g. reddit).
+ if 'next' in candidate.attrs.get('rel', []):
+ return candidate.attrs['href']
+
+ # Support 'next' in classnames.
+ for _class in candidate.attrs.get('class', []):
+ if 'next' in _class:
+ return candidate.attrs['href']
+
+ if 'page' in candidate.attrs['href']:
+ return candidate.attrs['href']
+
+ try:
+ # Resort to the last candidate.
+ return candidates[-1].attrs['href']
+ except IndexError:
+ return None
+
+ __next = get_next()
+ if __next:
+ url = self._make_absolute(__next)
+ else:
+ return None
+
+ if fetch:
+ return self.session.get(url)
+ else:
+ return url
+
def __iter__(self):
next = self
@@ -464,12 +464,12 @@ class HTML(BaseParser):
while True:
yield next
try:
- next = next.next(fetch=True, next_symbol=self.next_symbol).html
+ next = next._next(fetch=True, next_symbol=self.next_symbol).html
except AttributeError:
break
def __next__(self):
- return self.next(fetch=True, next_symbol=self.next_symbol).html
+ return self._next(fetch=True, next_symbol=self.next_symbol).html
def add_next_symbol(self, next_symbol):
self.next_symbol.append(next_symbol)