From 881687d6b1c414efcb78f794e397e3000762a5ae Mon Sep 17 00:00:00 2001 From: Sarcastic-Pharm <28820955+Sarcastic-Pharm@users.noreply.github.com> Date: Fri, 26 Oct 2018 02:47:01 +1000 Subject: [PATCH] Updated README.rst to fix async and render examples --- README.rst | 90 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 16 deletions(-) diff --git a/README.rst b/README.rst index a55a984..db3f19e 100644 --- a/README.rst +++ b/README.rst @@ -36,7 +36,6 @@ Make a GET request to 'python.org', using Requests: >>> from requests_html import HTMLSession >>> session = HTMLSession() - >>> r = session.get('https://python.org/') Try async and get some sites at the same time: @@ -45,17 +44,30 @@ Try async and get some sites at the same time: >>> from requests_html import AsyncHTMLSession >>> asession = AsyncHTMLSession() - >>> async def get_pythonorg(): ... r = await asession.get('https://python.org/') - + ... return r + ... >>> async def get_reddit(): ... r = await asession.get('https://reddit.com/') - + ... return r + ... >>> async def get_google(): ... r = await asession.get('https://google.com/') + ... return r + ... + >>> results = asession.run(get_pythonorg, get_reddit, get_google) + >>> results # check the requests all returned a 200 (success) code + [, , ] + >>> # Each item in the results list is a response object and can be interacted with as such + >>> for result in results: + ... print(result.html.url) + ... + https://www.python.org/ + https://www.google.com/ + https://www.reddit.com/ - >>> result = session.run(get_pythonorg, get_reddit, get_google) +Note that the order of the objects in the results list represents the order they were returned in, not the order that the coroutines are passed to the ``run`` method, which is shown in the examply by the order being different. Grab a list of all links on the page, as–is (anchors excluded): @@ -133,7 +145,6 @@ More complex CSS Selector example (copied from Chrome dev tools): >>> r = session.get('https://github.com/') >>> sel = 'body > div.application-main > div.jumbotron.jumbotron-codelines > div > div > div.col-md-7.text-center.text-md-left > p' - >>> print(r.html.find(sel, first=True).text) GitHub is a development platform inspired by the way you work. From open source to business, you can host and review code, manage projects, and build software alongside millions of other developers. @@ -148,27 +159,75 @@ XPath is also supported: JavaScript Support ================== -Let's grab some text that's rendered by JavaScript: +Let's grab some text that's rendered by JavaScript. Until 2020, the Python 2.7 countdown clock (https://pythonclock.org) will serve as a good test page: .. code-block:: pycon - >>> r = session.get('http://python-requests.org') + >>> r = session.get('https://pythonclock.org') + +Let's try and see the dynamically rendered code (The countdown clock). To do that quickly at first, we'll search between the last text we see before it ('Python 2.7 will retire in...') and the first text we see after it ('Enable Guido Mode'). + +.. code-block:: pycon + + >>> r.html.search('Python 2.7 will retire in...{}Enable Guido Mode')[0] + '\n \n
\n
\n
\n
\n
1Year2Months28Days16Hours52Minutes46Seconds
\n
\n
\n
\n' + '
1Year2Months28Days16Hours52Minutes46Seconds
\n' + '
\n' + '
\n' + '