From b1e353d1f19bccbde714b0c74338368615b4ca5a Mon Sep 17 00:00:00 2001 From: yech Date: Sun, 4 Mar 2018 14:25:13 +0800 Subject: [PATCH 1/2] add args '--no-sandbox' add args '--no-sandbox' --- requests_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requests_html.py b/requests_html.py index fab76a9..5e222a2 100644 --- a/requests_html.py +++ b/requests_html.py @@ -464,7 +464,7 @@ class HTML(BaseParser): """ async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int]): try: - browser = pyppeteer.launch(headless=True) + browser = pyppeteer.launch(headless=True, args=['--no-sandbox']) page = await browser.newPage() # Wait before rendering the page, to prevent timeouts. From 98d19d73f1be64bef6f9c1b7e607a4244b2b15d8 Mon Sep 17 00:00:00 2001 From: Prodesire Date: Sun, 4 Mar 2018 23:05:15 +0800 Subject: [PATCH 2/2] when calling element.attrs, using cache if available --- requests_html.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/requests_html.py b/requests_html.py index 5e222a2..d128f88 100644 --- a/requests_html.py +++ b/requests_html.py @@ -357,12 +357,13 @@ class Element(BaseParser): __slots__ = [ 'element', 'url', 'skip_anchors', 'default_encoding', '_encoding', - '_encoding', '_html', '_lxml', '_pq', 'session' + '_encoding', '_html', '_lxml', '_pq', '_attrs', 'session' ] def __init__(self, *, element, url: _URL, default_encoding: _DefaultEncoding = None) -> None: super(Element, self).__init__(element=element, url=url, default_encoding=default_encoding) self.element = element + self._attrs = None def __repr__(self) -> str: attrs = ['{}={}'.format(attr, repr(self.attrs[attr])) for attr in self.attrs] @@ -373,14 +374,15 @@ class Element(BaseParser): """Returns a dictionary of the attributes of the :class:`Element ` (`learn more `_). """ - attrs = {k: v for k, v in self.element.items()} + if self._attrs is None: + self._attrs = {k: v for k, v in self.element.items()} - # Split class and rel up, as there are ussually many of them: - for attr in ['class', 'rel']: - if attr in attrs: - attrs[attr] = tuple(attrs[attr].split()) + # Split class and rel up, as there are ussually many of them: + for attr in ['class', 'rel']: + if attr in self._attrs: + self._attrs[attr] = tuple(self._attrs[attr].split()) - return attrs + return self._attrs class HTML(BaseParser):