mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
Merge branch 'master' of github.com:kennethreitz/requests-html
This commit is contained in:
@@ -24,6 +24,7 @@ pytest = "*"
|
||||
"e1839a8" = {path = ".", editable = true}
|
||||
sphinx = "*"
|
||||
mypy = "*"
|
||||
pytest-asyncio = "*"
|
||||
|
||||
|
||||
[scripts]
|
||||
|
||||
+37
-1
@@ -1,7 +1,9 @@
|
||||
import sys
|
||||
import asyncio
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from concurrent.futures._base import TimeoutError
|
||||
from functools import partial
|
||||
from typing import Set, Union, List, MutableMapping, Optional
|
||||
|
||||
import pyppeteer
|
||||
@@ -357,7 +359,7 @@ class Element(BaseParser):
|
||||
|
||||
__slots__ = [
|
||||
'element', 'url', 'skip_anchors', 'default_encoding', '_encoding',
|
||||
'_encoding', '_html', '_lxml', '_pq', '_attrs', 'session'
|
||||
'_html', '_lxml', '_pq', '_attrs', 'session'
|
||||
]
|
||||
|
||||
def __init__(self, *, element, url: _URL, default_encoding: _DefaultEncoding = None) -> None:
|
||||
@@ -599,3 +601,37 @@ class HTMLSession(requests.Session):
|
||||
r = super(HTMLSession, self).request(*args, **kwargs)
|
||||
|
||||
return HTMLResponse._from_response(r)
|
||||
|
||||
|
||||
class AsyncHTMLSession(requests.Session):
|
||||
""" An async consumable session. """
|
||||
|
||||
def __init__(self, loop=None, workers=None,
|
||||
mock_browser: bool = True, *args, **kwargs):
|
||||
""" Set or create an event loop and a thread pool.
|
||||
|
||||
:param loop: Asyncio lopp to use.
|
||||
:param workers: Amount of threads to use for executing async calls.
|
||||
If not pass it will default to the number of processors on the
|
||||
machine, multiplied by 5. """
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
# Mock a web browser's user agent.
|
||||
if mock_browser:
|
||||
self.headers['User-Agent'] = user_agent()
|
||||
|
||||
self.hooks["response"].append(self.response_hook)
|
||||
|
||||
self.loop = loop or asyncio.get_event_loop()
|
||||
self.thread_pool = ThreadPoolExecutor(max_workers=workers)
|
||||
|
||||
@staticmethod
|
||||
def response_hook(response, **kwargs) -> HTMLResponse:
|
||||
""" Change response enconding and replace it by a HTMLResponse. """
|
||||
response.encoding = DEFAULT_ENCODING
|
||||
return HTMLResponse._from_response(response)
|
||||
|
||||
def request(self, *args, **kwargs):
|
||||
""" Partial original request func and run it in a thread. """
|
||||
func = partial(super().request, *args, **kwargs)
|
||||
return self.loop.run_in_executor(self.thread_pool, func)
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import os
|
||||
from functools import partial
|
||||
|
||||
import pytest
|
||||
from requests_html import HTMLSession, HTML
|
||||
from requests_html import HTMLSession, AsyncHTMLSession, HTML
|
||||
from requests_file import FileAdapter
|
||||
|
||||
session = HTMLSession()
|
||||
@@ -15,12 +16,31 @@ def get():
|
||||
return session.get(url)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def async_get(event_loop):
|
||||
""" AsyncSession cannot be created global since it will create
|
||||
a different loop from pytest-asyncio. """
|
||||
async_session = AsyncHTMLSession()
|
||||
async_session.mount('file://', FileAdapter())
|
||||
path = os.path.sep.join((os.path.dirname(os.path.abspath(__file__)), 'python.html'))
|
||||
url = 'file://{}'.format(path)
|
||||
|
||||
return partial(async_session.get, url)
|
||||
|
||||
|
||||
@pytest.mark.ok
|
||||
def test_file_get():
|
||||
r = get()
|
||||
assert r.status_code == 200
|
||||
|
||||
|
||||
@pytest.mark.ok
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_file_get(async_get):
|
||||
r = await async_get()
|
||||
assert r.status_code == 200
|
||||
|
||||
|
||||
@pytest.mark.ok
|
||||
def test_class_seperation():
|
||||
r = get()
|
||||
@@ -53,6 +73,7 @@ def test_containing():
|
||||
for e in python:
|
||||
assert 'python' in e.full_text.lower()
|
||||
|
||||
|
||||
@pytest.mark.ok
|
||||
def test_attrs():
|
||||
r = get()
|
||||
@@ -71,6 +92,16 @@ def test_links():
|
||||
assert len(about.absolute_links) == 6
|
||||
|
||||
|
||||
@pytest.mark.ok
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_links(async_get):
|
||||
r = await async_get()
|
||||
about = r.html.find('#about', first=True)
|
||||
|
||||
assert len(about.links) == 6
|
||||
assert len(about.absolute_links) == 6
|
||||
|
||||
|
||||
@pytest.mark.ok
|
||||
def test_search():
|
||||
r = get()
|
||||
|
||||
Reference in New Issue
Block a user