From 72a7e0be69b6748ef1e4b003d9ade5dcd3c15fb2 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 4 Mar 2018 15:23:54 -0500 Subject: [PATCH] Merge branch 'master' of github.com:kennethreitz/requests-html # Please enter a commit message to explain why this merge is necessary, # especially if it merges an updated upstream into a topic branch. # # Lines starting with '#' will be ignored, and an empty message aborts # the commit. --- requests_html.py | 5 ++++- tests/test_internet.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 tests/test_internet.py diff --git a/requests_html.py b/requests_html.py index eacd4f0..2ddb41c 100644 --- a/requests_html.py +++ b/requests_html.py @@ -418,10 +418,13 @@ class HTML(BaseParser): while True: yield next try: - next = next.next(fetch=True).html + next = next(next) except AttributeError: break + def __next__(self): + return self.next(fetch=True).html + def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0): """Reloads the response in Chromium, and replaces HTML content with an updated version, with JavaScript executed. diff --git a/tests/test_internet.py b/tests/test_internet.py new file mode 100644 index 0000000..05cfff5 --- /dev/null +++ b/tests/test_internet.py @@ -0,0 +1,16 @@ +from requests_html import HTMLSession + +session = HTMLSession() + +def test_pagination(): + pages = ( + 'https://xkcd.com/1957/', + 'https://reddit.com/', + 'https://pornhub.com/', + 'https://theverge.com/archives' + ) + + for page in pages: + r = session.get(page) + assert next(r.html) +