diff --git a/docs/source/index.rst b/docs/source/index.rst index ba7013e..23bde53 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -144,6 +144,15 @@ XPath is also supported (`learn more >> r.html.xpath('a') [] +You can also select only elements containing certian text: + +.. code-block:: pycon + + >>> r = session.get('http://python-requests.org/') + >>> r.html.find('a', containing=('kenneth',)) + [, , , ] + + JavaScript Support ================== @@ -162,6 +171,32 @@ Note, the first time you ever run the ``render()`` method, it will download Chromium into your home directory (e.g. ``~/.pyppeteer/``). This only happens once. +Pagination +========== + +There's also intelligent pagination support (always improving): + +.. code-block:: pycon + + >>> r = session.get('https://reddit.com') + >>> for html in r.html: + ... print(html) + + + + + + + + … + +You can also just request the next URL easily: + +.. code-block:: pycon + + >>> r = session.get('https://reddit.com') + >>> r.html.next() + 'https://www.reddit.com/?count=25&after=t3_81pm82' Using without Requests ====================== diff --git a/requests_html.py b/requests_html.py index 786e7f9..fab76a9 100644 --- a/requests_html.py +++ b/requests_html.py @@ -37,6 +37,7 @@ _Encoding = str _LXML = HtmlElement _Text = str _Search = Result +_Containing = Union[str, List[str]] _Links = Set[str] _Attrs = MutableMapping _Next = Union['HTML', List[str]] @@ -59,11 +60,6 @@ class BaseParser: """ - __slots__ = [ - 'element', 'url', 'skip_anchors', 'default_encoding', '_encoding', - '_encoding', '_html', '_lxml', '_pq', 'session' - ] - def __init__(self, *, element, session: 'HTTPSession' = None, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL) -> None: self.element = element self.session = session or HTMLSession() @@ -156,7 +152,7 @@ class BaseParser: """ return self.lxml.text_content() - def next(self, fetch: bool = True) -> _Next: + def next(self, fetch: bool = False) -> _Next: """Attempts to find the next page, if there is one. If ``fetch`` is ``True`` (default), returns :class:`HTML ` object of next page. If ``fetch`` is ``False``, simply returns the next URL. @@ -186,7 +182,6 @@ class BaseParser: except IndexError: return None - next = get_next() if next: url = self._make_absolute(next) @@ -198,8 +193,7 @@ class BaseParser: else: return url - - def find(self, selector: str = "*", containing: Optional[str] = None, first: bool = False, _encoding: str = None) -> _Find: + def find(self, selector: str = "*", containing: _Containing = None, first: bool = False, _encoding: str = None) -> _Find: """Given a CSS Selector, returns a list of :class:`Element ` objects or a single one. @@ -223,6 +217,10 @@ class BaseParser: :class:`Element ` found. """ + # Convert a single containing into a list. + if isinstance(containing, str): + containing = [containing] + encoding = _encoding or self.encoding elements = [ Element(element=found, url=self.url, default_encoding=encoding) @@ -357,7 +355,10 @@ class Element(BaseParser): :param default_encoding: Which encoding to default to. """ - __slots__ = BaseParser.__slots__ + __slots__ = [ + 'element', 'url', 'skip_anchors', 'default_encoding', '_encoding', + '_encoding', '_html', '_lxml', '_pq', 'session' + ] def __init__(self, *, element, url: _URL, default_encoding: _DefaultEncoding = None) -> None: super(Element, self).__init__(element=element, url=url, default_encoding=default_encoding)