From d23ce77a6d4969495b2f10310b9d11570631601a Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 26 Feb 2018 08:19:34 -0500 Subject: [PATCH] tests, better code Signed-off-by: Kenneth Reitz --- Pipfile | 7 + Pipfile.lock | 127 ++++- requests_html.py | 44 +- tests/python.html | 1053 +++++++++++++++++++++++++++++++++++ tests/test_requests_html.py | 59 ++ 5 files changed, 1273 insertions(+), 17 deletions(-) create mode 100644 tests/python.html create mode 100644 tests/test_requests_html.py diff --git a/Pipfile b/Pipfile index b2bbb78..e3eea50 100644 --- a/Pipfile +++ b/Pipfile @@ -18,3 +18,10 @@ parse = "*" [dev-packages] twine = "*" +requests-file = "*" +pytest = "*" +"e1839a8" = {path = ".", editable = true} + +[scripts] + +tests = "pytest" \ No newline at end of file diff --git a/Pipfile.lock b/Pipfile.lock index 3a773fd..292a498 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "75c60031e7b2733e07a2cf09fc24895c85b0c5032161ebbca6b7fc68f6c933a3" + "sha256": "0472672d45b7404f1329bdf8d01dc6429fa5668ecff3367094b1048fc4be65a9" }, "host-environment-markers": { "implementation_name": "cpython", @@ -144,6 +144,27 @@ } }, "develop": { + "attrs": { + "hashes": [ + "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450", + "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9" + ], + "version": "==17.4.0" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:7015e76bf32f1f574636c4288399a6de66ce08fb7b2457f628a8d70c0fbabb11", + "sha256:11a9a27b7d3bddc6d86f59fb76afb70e921a25ac2d6cc55b40d072bd68435a76", + "sha256:808b6ac932dccb0a4126558f7dfdcf41710dd44a4ef497a0bb59a77f9f078e89" + ], + "version": "==4.6.0" + }, + "bs4": { + "hashes": [ + "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a" + ], + "version": "==0.0.1" + }, "certifi": { "hashes": [ "sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296", @@ -158,6 +179,30 @@ ], "version": "==3.0.4" }, + "cssselect": { + "hashes": [ + "sha256:3b5103e8789da9e936a68d993b70df732d06b8bb9a337a05ed4eb52c17ef7206", + "sha256:066d8bc5229af09617e24b3ca4d52f1f9092d9e061931f4184cd572885c23204" + ], + "version": "==1.0.3" + }, + "e1839a8": { + "editable": true, + "path": "." + }, + "fake-useragent": { + "hashes": [ + "sha256:cc9b9ddcebc708b3deac846f5fccb16e37c02ee47435a4ec7132271dd96aec8c" + ], + "version": "==0.1.10" + }, + "html2text": { + "hashes": [ + "sha256:490db40fe5b2cd79c461cf56be4d39eb8ca68191ae41ba3ba79f6cb05b7dd662", + "sha256:627514fb30e7566b37be6900df26c2c78a030cc9e6211bda604d8181233bcdd4" + ], + "version": "==2018.1.9" + }, "idna": { "hashes": [ "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4", @@ -165,6 +210,45 @@ ], "version": "==2.6" }, + "lxml": { + "hashes": [ + "sha256:41f59cbdab232f11680d5d4dec9f2e6782fd24d78e37ee833447702e34e675f4", + "sha256:e7e41d383f19bab9d57f5f3b18d158655bcd682e7e723f441b9e183e1e35a6b5", + "sha256:155521c337acecf8202091cff85bb9f709f238130ebadf04280fb1db11f5ad8b", + "sha256:d2c985d2460b81c6ca5feb8b86f1bc594ad59405d0bdf68626b85852b701553c", + "sha256:950e63387514aa1b881eba5ac6cb2ec51a118b3dafe99dd80ca19d8fb0142f30", + "sha256:470d7ce41e8047208ba1a376560bad17f1468df1f3097bc83902b26cfafdbb0c", + "sha256:e608839a5ee2180164424ccf279c8e2d9bbe8816d002c58fd97d6b621ba4aa94", + "sha256:87a66bcadac270fc010cb029022a93fc722bf1204a8b03e782d4c790f0edf7ca", + "sha256:2dedfeeecc2d5a939cf622602f5a1ce443ca82407f386880f739f1a9f08053ad", + "sha256:ba05732e4bcf59e948f61588851dcf620fd60d5bbd9d704203e5f59bbaa60219", + "sha256:2190266059fec3c5a55f9d6c30532c64c6d414d3228909c0af573fe4907e78d1", + "sha256:dd291debfaa535d9cb6cee8d7aca2328775e037d02d13f1634e57f49bc302cc4", + "sha256:29a36e354c39b2e24bc4ee103de53417ebb80f976a6ab9e8d093d559e2ac03e1", + "sha256:e37427d5a27eefbcfc48847e0b37f348113fac7280bc857421db39ffc6372570", + "sha256:b106d4d2383382399ad82108fd187e92f40b1c90f55c2d36bbcb1c44bcf940fc", + "sha256:0ee07da52d240f1dc3c83eef5cd5f1b7f018226c1121f2a54d446645779a6d17", + "sha256:3b33549fb8f91b38a7500078242b03cca513f3412a2cdae722e89bf83f95971d", + "sha256:4c12e90886d9c53ab434c8d0cebea122321cce19614c3c6b6d1a7700d7cc6212", + "sha256:79322000279cda10b53c374d53ca632ead3bc51c6aebf8e62c8fa93a4d08b750", + "sha256:6cba398eb37e0631e60e0e080c101cfe91769b2c8267105b64b4625e2581ea21", + "sha256:49a655956f8de69e1258bc0fcfc43eb3bd1e038655784d77d1869b4b81444e37", + "sha256:af8a5373241d09b8fc53e0490e1719ce5dc90a21b19db89b6596c1adcdd52270", + "sha256:e6b6698415c7e8d227a47a3b1038e1b37c2b438a1b48c2db7ad9e74ddbcd1149", + "sha256:155c916cf2645b4a8f2bd5d09065e92d1b67b8d464bdc001e0b524af84bedf6f", + "sha256:fa7320679ced5e25b20203d157280680fc84eb783b6cc650cb0c98e1858b7dd3", + "sha256:4187c4b0cefc3353181db048c51f42c489d9ac51e40b86c4851dc0671372971d", + "sha256:d5d29663e979e83b3fc361e97200f959cddb3a14797391d15273d84a5a8ae44b", + "sha256:940caef1ec7c78e0c34b0f6b94fe42d0f2022915ffc78643d28538a5cfd0f40e" + ], + "version": "==4.1.1" + }, + "parse": { + "hashes": [ + "sha256:8048dde3f5ca07ad7ac7350460952d83b63eaacecdac1b37f45fd74870d849d2" + ], + "version": "==1.8.2" + }, "pkginfo": { "hashes": [ "sha256:31a49103180ae1518b65d3f4ce09c784e2bc54e338197668b4fb7dc539521024", @@ -172,6 +256,33 @@ ], "version": "==1.4.1" }, + "pluggy": { + "hashes": [ + "sha256:7f8ae7f5bdf75671a718d2daf0a64b7885f74510bcd98b1a0bb420eb9a9d0cff" + ], + "version": "==0.6.0" + }, + "py": { + "hashes": [ + "sha256:8cca5c229d225f8c1e3085be4fcf306090b00850fefad892f9d96c7b6e2f310f", + "sha256:ca18943e28235417756316bfada6cd96b23ce60dd532642690dcfdaba988a76d" + ], + "version": "==1.5.2" + }, + "pyquery": { + "hashes": [ + "sha256:07987c2ed2aed5cba29ff18af95e56e9eb04a2249f42ce47bddfb37f487229a3", + "sha256:4771db76bd14352eba006463656aef990a0147a0eeaf094725097acfa90442bf" + ], + "version": "==1.4.0" + }, + "pytest": { + "hashes": [ + "sha256:8970e25181e15ab14ae895599a0a0e0ade7d1f1c4c8ca1072ce16f25526a184d", + "sha256:9ddcb879c8cc859d2540204b5399011f842e5e8823674bf429f70ada281b3cc6" + ], + "version": "==3.4.1" + }, "requests": { "hashes": [ "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b", @@ -179,6 +290,13 @@ ], "version": "==2.18.4" }, + "requests-file": { + "hashes": [ + "sha256:75c175eed739270aec3c5279ffd74e6527dada275c5c0d76b5817e9c86bb7dea", + "sha256:8f04aa6201bacda0567e7ac7f677f1499b0fc76b22140c54bc06edf1ba92e2fa" + ], + "version": "==1.4.3" + }, "requests-toolbelt": { "hashes": [ "sha256:42c9c170abc2cacb78b8ab23ac957945c7716249206f90874651971a4acff237", @@ -186,6 +304,13 @@ ], "version": "==0.8.0" }, + "six": { + "hashes": [ + "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb", + "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9" + ], + "version": "==1.11.0" + }, "tqdm": { "hashes": [ "sha256:4c041f8019f7be65b8028ddde9a836f7ccc51c4637f1ff2ba9b5813d38d19d5a", diff --git a/requests_html.py b/requests_html.py index c90ce1a..a672721 100644 --- a/requests_html.py +++ b/requests_html.py @@ -62,9 +62,16 @@ class BaseParser: else: return c - def xpath(self, selector): + def xpath(self, selector, first=False): """Given an XPath selector, returns a list of element objects.""" - return [Element(element=e, url=self.url) for e in self.lxml.xpath(selector)] + c = [Element(element=e, url=self.url) for e in self.lxml.xpath(selector)] + if first: + try: + return c[0] + except IndexError: + return None + else: + return c def search(self, template): """Searches the element for the given parse template.""" @@ -146,7 +153,7 @@ class HTML(BaseParser): """An HTML document.""" def __init__(self, *, response): super(HTML, self).__init__( - element=fromstring(self.html), + element=fromstring(response.text), html=response.text, url=response.url ) @@ -174,21 +181,26 @@ def user_agent(style=None): else: return useragent[style] -def get_session(mock_browser=True): - """Returns a consumable session, for cookie persistence and connection - pooling, amongst other things. - """ - # Requests Session. - session = requests.Session() +class Session(requests.Session): + """A consumable session, for cookie persistience and connection pooling, + amongst other things.""" + def __init__(self, mock_browser=True, *args, **kwargs): + super(Session, self).__init__(*args, **kwargs) - # Mock a web browser's user agent. - if mock_browser: - session.headers['User-Agent'] = user_agent() + # Mock a web browser's user agent. + if mock_browser: + self.headers['User-Agent'] = user_agent() - # Hook into Requests. - session.hooks = {'response': _handle_response} + self.hooks = {'response': self._handle_response} - return session + @staticmethod + def _handle_response(response, **kwargs): + """Requests HTTP Response handler. Attaches .html property to Response + objects. + """ -session = get_session() + response.html = HTML(response=response) + return response + +session = Session() diff --git a/tests/python.html b/tests/python.html new file mode 100644 index 0000000..ce95f3a --- /dev/null +++ b/tests/python.html @@ -0,0 +1,1053 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Welcome to Python.org + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+

Notice: While Javascript is not essential for this website, your interaction with the content will be limited. Please turn Javascript on for the full experience.

+
+ + + + +
+ + + +
+ + + + +
+ +
+ +
+ + + + + + + + +
+ +
+

Get Started

+

Whether you're new to programming or an experienced developer, it's easy to learn and use Python.

+

Start with our Beginner’s Guide

+
+ +
+

Download

+

Python source code and installers are available for download for all versions! Not sure which version to use? Check here.

+

Latest: Python 3.6.4 - Python 2.7.14

+
+ +
+

Docs

+

Documentation for Python's standard library, along with tutorials and guides, are available online.

+

docs.python.org

+
+ +
+

Jobs

+

Looking for work or have a Python related position that you're trying to hire for? Our relaunched community-run job board is the place to go.

+

jobs.python.org

+
+ +
+ + + + + + +
+ +

+ >>> Python Enhancement Proposals (PEPs): The future of Python is discussed here. + +

+ + + + +
+ +
+ + + +

+ >>> Python Software Foundation +

+

The mission of the Python Software Foundation is to promote, protect, and advance the Python programming language, and to support and facilitate the growth of a diverse and international community of Python programmers. Learn more

+

+ Become a Member + Donate to the PSF +

+
+ + + + +
+ + + + + + + + +
+
+ + + + +
+ + + + + + + + + + + + + + + + + + + + + diff --git a/tests/test_requests_html.py b/tests/test_requests_html.py new file mode 100644 index 0000000..6ba02c9 --- /dev/null +++ b/tests/test_requests_html.py @@ -0,0 +1,59 @@ +import os + +from requests_html import Session +from requests_file import FileAdapter + +session = Session() +session.mount('file://', FileAdapter()) + + +def get(): + path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html')) + url = 'file://{}'.format(path) + + return session.get(url) + + +def test_file_get(): + r = get() + assert r.status_code == 200 + + +def test_css_selector(): + r = get() + about = r.html.find('#about', first=True) + + for menu_item in ( + 'About', 'Applications', 'Quotes', 'Getting Started', 'Help', + 'Python Brochure' + ): + assert menu_item in about.text.split('\n') + + +def test_attrs(): + r = get() + about = r.html.find('#about', first=True) + + assert 'aria-haspopup' in about.attrs + assert len(about.attrs['class']) == 2 + + +def test_links(): + r = get() + about = r.html.find('#about', first=True) + + len(about.links) == 6 + len(about.absolute_links) == 6 + + +def test_search(): + r = get() + style = r.html.search('Python is a {} language')[0] + assert style == 'programming' + + +def test_xpath(): + r = get() + html = r.html.xpath('/html', first=True) + assert 'no-js' in html.attrs['class'] +