improvements

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
2026-06-05 23:00:20 +00:00 · 2018-03-03 10:05:22 -05:00
parent df1b037f74
commit 90de9b7ac5
2 changed files with 46 additions and 10 deletions
@@ -144,6 +144,15 @@ XPath is also supported (`learn more <https://msdn.microsoft.com/en-us/library/m
   >>> r.html.xpath('a')
   [<Element 'a' class='btn' href='https://help.github.com/articles/supported-browsers'>]

+You can also select only elements containing certian text:
+
+.. code-block:: pycon
+
+    >>> r = session.get('http://python-requests.org/')
+    >>> r.html.find('a', containing=('kenneth',))
+    [<Element 'a' href='http://kennethreitz.com/pages/open-projects.html'>, <Element 'a' href='http://kennethreitz.org/'>, <Element 'a' href='https://twitter.com/kennethreitz' class=('twitter-follow-button',) data-show-count='false'>, <Element 'a' class=('reference', 'internal') href='dev/contributing/#kenneth-reitz-s-code-style'>]
+
+
 JavaScript Support
 ==================

@@ -162,6 +171,32 @@ Note, the first time you ever run the ``render()`` method, it will download
 Chromium into your home directory (e.g. ``~/.pyppeteer/``). This only happens
 once.

+Pagination
+==========
+
+There's also intelligent pagination support (always improving):
+
+.. code-block:: pycon
+
+    >>> r = session.get('https://reddit.com')
+    >>> for html in r.html:
+    ...     print(html)
+    <HTML url='https://www.reddit.com/'>
+    <HTML url='https://www.reddit.com/?count=25&after=t3_81puu5'>
+    <HTML url='https://www.reddit.com/?count=50&after=t3_81nevg'>
+    <HTML url='https://www.reddit.com/?count=75&after=t3_81lqtp'>
+    <HTML url='https://www.reddit.com/?count=100&after=t3_81k1c8'>
+    <HTML url='https://www.reddit.com/?count=125&after=t3_81p438'>
+    <HTML url='https://www.reddit.com/?count=150&after=t3_81nrcd'>
+    …
+
+You can also just request the next URL easily:
+
+.. code-block:: pycon
+
+    >>> r = session.get('https://reddit.com')
+    >>> r.html.next()
+    'https://www.reddit.com/?count=25&after=t3_81pm82'

 Using without Requests
 ======================
@@ -37,6 +37,7 @@ _Encoding = str
 _LXML = HtmlElement
 _Text = str
 _Search = Result
+_Containing = Union[str, List[str]]
 _Links = Set[str]
 _Attrs = MutableMapping
 _Next = Union['HTML', List[str]]
@@ -59,11 +60,6 @@ class BaseParser:

    """

-    __slots__ = [
-        'element', 'url', 'skip_anchors', 'default_encoding', '_encoding',
-        '_encoding', '_html', '_lxml', '_pq', 'session'
-    ]
-
    def __init__(self, *, element, session: 'HTTPSession' = None, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL) -> None:
        self.element = element
        self.session = session or HTMLSession()
@@ -156,7 +152,7 @@ class BaseParser:
        """
        return self.lxml.text_content()

-    def next(self, fetch: bool = True) -> _Next:
+    def next(self, fetch: bool = False) -> _Next:
        """Attempts to find the next page, if there is one. If ``fetch``
        is ``True`` (default), returns :class:`HTML <HTML>` object of
        next page. If ``fetch`` is ``False``, simply returns the next URL.
@@ -186,7 +182,6 @@ class BaseParser:
            except IndexError:
                return None

-
        next = get_next()
        if next:
            url = self._make_absolute(next)
@@ -198,8 +193,7 @@ class BaseParser:
        else:
            return url

-
-    def find(self, selector: str = "*", containing: Optional[str] = None, first: bool = False, _encoding: str = None) -> _Find:
+    def find(self, selector: str = "*", containing: _Containing = None, first: bool = False, _encoding: str = None) -> _Find:
        """Given a CSS Selector, returns a list of
        :class:`Element <Element>` objects or a single one.

@@ -223,6 +217,10 @@ class BaseParser:
        :class:`Element <Element>` found.
        """

+        # Convert a single containing into a list.
+        if isinstance(containing, str):
+            containing = [containing]
+
        encoding = _encoding or self.encoding
        elements = [
            Element(element=found, url=self.url, default_encoding=encoding)
@@ -357,7 +355,10 @@ class Element(BaseParser):
    :param default_encoding: Which encoding to default to.
    """

-    __slots__ = BaseParser.__slots__
+    __slots__ = [
+        'element', 'url', 'skip_anchors', 'default_encoding', '_encoding',
+        '_encoding', '_html', '_lxml', '_pq', 'session'
+    ]

    def __init__(self, *, element, url: _URL, default_encoding: _DefaultEncoding = None) -> None:
        super(Element, self).__init__(element=element, url=url, default_encoding=default_encoding)