diff --git a/_modules/requests_html.html b/_modules/requests_html.html index 0eb042f..18011c8 100644 --- a/_modules/requests_html.html +++ b/_modules/requests_html.html @@ -56,6 +56,7 @@ DEFAULT_ENCODING = 'utf-8' DEFAULT_URL = 'https://example.org/' DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8' +DEFAULT_NEXT_SYMBOL = ['next', 'more', 'older'] cleaner = Cleaner() cleaner.javascript = True @@ -81,6 +82,7 @@ _Links = Set[str] _Attrs = MutableMapping _Next = Union['HTML', List[str]] +_NextSymbol = List[str] # Sanity checking. try: @@ -90,6 +92,12 @@ raise RuntimeError('Requests-HTML requires Python 3.6+!') +class MaxRetries(Exception): + + def __init__(self, message): + self.message = message + + class BaseParser: """A basic HTML/Element Parser, for Humans. @@ -100,9 +108,8 @@ """ - def __init__(self, *, element, session: 'HTTPSession' = None, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL) -> None: + def __init__(self, *, element, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL) -> None: self.element = element - self.session = session or HTMLSession() self.url = url self.skip_anchors = True self.default_encoding = default_encoding @@ -151,6 +158,12 @@ # Scan meta tags for charset. if self._html: self._encoding = html_to_unicode(self.default_encoding, self._html)[0] + # Fall back to requests' detected encoding if decode fails. + try: + self.raw_html.decode(self.encoding) + except UnicodeDecodeError: + self._encoding = self.default_encoding + return self._encoding if self._encoding else self.default_encoding @@ -196,47 +209,6 @@ """ return self.lxml.text_content() - def next(self, fetch: bool = False) -> _Next: - """Attempts to find the next page, if there is one. If ``fetch`` - is ``True`` (default), returns :class:`HTML <HTML>` object of - next page. If ``fetch`` is ``False``, simply returns the next URL. - - """ - - def get_next(): - candidates = self.find('a', containing=('next', 'more', 'older')) - - for candidate in candidates: - if candidate.attrs.get('href'): - # Support 'next' rel (e.g. reddit). - if 'next' in candidate.attrs.get('rel', []): - return candidate.attrs['href'] - - # Support 'next' in classnames. - for _class in candidate.attrs.get('class', []): - if 'next' in _class: - return candidate.attrs['href'] - - if 'page' in candidate.attrs['href']: - return candidate.attrs['href'] - - try: - # Resort to the last candidate. - return candidates[-1].attrs['href'] - except IndexError: - return None - - next = get_next() - if next: - url = self._make_absolute(next) - else: - return None - - if fetch: - return self.session.get(url) - else: - return url - def find(self, selector: str = "*", *, containing: _Containing = None, clean: bool = False, first: bool = False, _encoding: str = None) -> _Find: """Given a CSS Selector, returns a list of :class:`Element <Element>` objects or a single one. @@ -356,7 +328,7 @@ try: href = link.attrs['href'].strip() - if href and not (href.startswith('#') and self.skip_anchors) and not href.startswith('javascript:'): + if href and not (href.startswith('#') and self.skip_anchors) and not href.startswith(('javascript:', 'mailto:')): yield href except KeyError: pass @@ -405,7 +377,7 @@ # Support for <base> tag. base = self.find('base', first=True) if base: - result = base.attrs['href'].strip() + result = base.attrs.get('href', '').strip() if result: return result @@ -413,7 +385,7 @@ parsed = urlparse(self.url)._asdict() # Remove any part of the path after the last '/' - path = '/'.join(parsed['path'].split('/')[:-1]) + parsed['path'] = '/'.join(parsed['path'].split('/')[:-1]) + '/' # Reconstruct the url with the modified path parsed = (v for v in parsed.values()) @@ -468,7 +440,7 @@ :param default_encoding: Which encoding to default to. """ - def __init__(self, *, url: str = DEFAULT_URL, html: _HTML, default_encoding: str = DEFAULT_ENCODING) -> None: + def __init__(self, *, session: Union['HTTPSession', 'AsyncHTMLSession'] = None, url: str = DEFAULT_URL, html: _HTML, default_encoding: str = DEFAULT_ENCODING) -> None: # Convert incoming unicode HTML into bytes. if isinstance(html, str): @@ -481,11 +453,54 @@ url=url, default_encoding=default_encoding ) + self.session = session or HTMLSession() self.page = None + self.next_symbol = DEFAULT_NEXT_SYMBOL def __repr__(self) -> str: return f"<HTML url={self.url!r}>" + def _next(self, fetch: bool = False, next_symbol: _NextSymbol = DEFAULT_NEXT_SYMBOL) -> _Next: + """Attempts to find the next page, if there is one. If ``fetch`` + is ``True`` (default), returns :class:`HTML <HTML>` object of + next page. If ``fetch`` is ``False``, simply returns the next URL. + + """ + + def get_next(): + candidates = self.find('a', containing=next_symbol) + + for candidate in candidates: + if candidate.attrs.get('href'): + # Support 'next' rel (e.g. reddit). + if 'next' in candidate.attrs.get('rel', []): + return candidate.attrs['href'] + + # Support 'next' in classnames. + for _class in candidate.attrs.get('class', []): + if 'next' in _class: + return candidate.attrs['href'] + + if 'page' in candidate.attrs['href']: + return candidate.attrs['href'] + + try: + # Resort to the last candidate. + return candidates[-1].attrs['href'] + except IndexError: + return None + + __next = get_next() + if __next: + url = self._make_absolute(__next) + else: + return None + + if fetch: + return self.session.get(url) + else: + return url + def __iter__(self): next = self @@ -493,14 +508,17 @@ while True: yield next try: - next = next.next(fetch=True).html + next = next._next(fetch=True, next_symbol=self.next_symbol).html except AttributeError: break def __next__(self): - return self.next(fetch=True).html + return self._next(fetch=True, next_symbol=self.next_symbol).html -
[docs] def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0): + def add_next_symbol(self, next_symbol): + self.next_symbol.append(next_symbol) + +
[docs] def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False): """Reloads the response in Chromium, and replaces HTML content with an updated version, with JavaScript executed. @@ -510,6 +528,7 @@ :param scrolldown: Integer, if provided, of how many times to page down. :param sleep: Integer, if provided, of how many long to sleep after initial render. :param reload: If ``False``, content will not be loaded from the browser, but will be provided from memory. + :param keep_page: If ``True`` will allow you to interact with the browser page through ``r.html.page``. If ``scrolldown`` is specified, the page will scrolldown the specified number of times, after sleeping the specified amount of time @@ -540,13 +559,15 @@ >>> r.html.render(script=script) {'width': 800, 'height': 600, 'deviceScaleFactor': 1} + Warning: If you use keep_page, you're responsable for closing each page, since + opening to many at scale may crach the browser. + Warning: the first time you run this method, it will download Chromium into your home directory (``~/.pyppeteer``). """ - async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int]): + async def _async_render(*, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int], keep_page: bool): try: - browser = pyppeteer.launch(headless=True, args=['--no-sandbox']) - page = await browser.newPage() + page = await self.session.browser.newPage() # Wait before rendering the page, to prevent timeouts. await asyncio.sleep(wait) @@ -573,11 +594,14 @@ # Return the content of the page, JavaScript evaluated. content = await page.content() + if not keep_page: + await page.close() + page = None return content, result, page except TimeoutError: return None - loop = asyncio.get_event_loop() + self.session.browser # Automatycally create a event loop and browser content = None # Automatically set Reload to False, if example URL is being used. @@ -588,9 +612,14 @@ if not content: try: - content, result, page = loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout)) - except TimeoutError: + content, result, page = self.session.loop.run_until_complete(_async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page)) + except TypeError: pass + else: + break + + if not content: + raise MaxRetries("Unable to render the page. Try increasing timeout") html = HTML(url=self.url, html=content.encode(DEFAULT_ENCODING), default_encoding=DEFAULT_ENCODING) self.__dict__.update(html.__dict__) @@ -603,20 +632,21 @@ Effectively the same, but with an intelligent ``.html`` property added. """ - def __init__(self) -> None: + def __init__(self, session: Union['HTMLSession', 'AsyncHTMLSession']) -> None: super(HTMLResponse, self).__init__() self._html = None # type: HTML + self.session = session @property def html(self) -> HTML: if not self._html: - self._html = HTML(url=self.url, html=self.content, default_encoding=self.encoding) + self._html = HTML(session=self.session, url=self.url, html=self.content, default_encoding=self.encoding) return self._html @classmethod - def _from_response(cls, response): - html_r = cls() + def _from_response(cls, response, session: Union['HTMLSession', 'AsyncHTMLSession']): + html_r = cls(session=session) html_r.__dict__.update(response.__dict__) return html_r @@ -672,8 +702,21 @@ """ # Convert Request object into HTTPRequest object. r = super(HTMLSession, self).request(*args, **kwargs) -
- return HTMLResponse._from_response(r) + + return HTMLResponse._from_response(r, self) + + @property + def browser(self): + if not hasattr(self, "_browser"): + self.loop = asyncio.get_event_loop() + self._browser = self.loop.run_until_complete(pyppeteer.launch(headless=True, args=['--no-sandbox'])) + return self._browser + +
[docs] def close(self): + """ If a browser was created close it first. """ + if hasattr(self, "_browser"): + self.loop.run_until_complete(self._browser.close())
+ super().close() class AsyncHTMLSession(requests.Session): @@ -693,16 +736,15 @@ if mock_browser: self.headers['User-Agent'] = user_agent() - self.hooks["response"].append(self.response_hook) + self.hooks['response'].append(self.response_hook) self.loop = loop or asyncio.get_event_loop() self.thread_pool = ThreadPoolExecutor(max_workers=workers) - @staticmethod - def response_hook(response, **kwargs) -> HTMLResponse: + def response_hook(self, response, **kwargs) -> HTMLResponse: """ Change response enconding and replace it by a HTMLResponse. """ response.encoding = DEFAULT_ENCODING - return HTMLResponse._from_response(response) + return HTMLResponse._from_response(response, self) def request(self, *args, **kwargs): """ Partial original request func and run it in a thread. """ diff --git a/genindex.html b/genindex.html index f20b8cf..807070b 100644 --- a/genindex.html +++ b/genindex.html @@ -44,7 +44,6 @@ | H | L | M - | N | O | P | R @@ -200,18 +199,6 @@ -

N

- - -
-

O

    diff --git a/index.html b/index.html index 7934e14..836c4ea 100644 --- a/index.html +++ b/index.html @@ -210,7 +210,7 @@ once.

    These classes are the main interface to requests-html:

    -class requests_html.HTML(*, url: str = 'https://example.org/', html: Union[str, bytes], default_encoding: str = 'utf-8') → None[source]
    +class requests_html.HTML(*, session: Union[_ForwardRef('HTTPSession'), _ForwardRef('AsyncHTMLSession')] = None, url: str = 'https://example.org/', html: Union[str, bytes], default_encoding: str = 'utf-8') → None[source]

    An HTML document, ready for parsing.

    @@ -306,14 +306,6 @@ for more details.

    Element or HTML.

    -
    -
    -next(fetch: bool = False) → Union[_ForwardRef('HTML'), typing.List[str]]
    -

    Attempts to find the next page, if there is one. If fetch -is True (default), returns HTML object of -next page. If fetch is False, simply returns the next URL.

    -
    -
    pq
    @@ -330,7 +322,7 @@ of the
    -render(retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0)[source]
    +render(retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False)[source]

    Reloads the response in Chromium, and replaces HTML content with an updated version, with JavaScript executed.

    @@ -344,6 +336,7 @@ with an updated version, with JavaScript executed.

  • scrolldown – Integer, if provided, of how many times to page down.
  • sleep – Integer, if provided, of how many long to sleep after initial render.
  • reload – If False, content will not be loaded from the browser, but will be provided from memory.
  • +
  • keep_page – If True will allow you to interact with the browser page through r.html.page.
  • @@ -372,6 +365,8 @@ runtime. Example:

    {'width': 800, 'height': 600, 'deviceScaleFactor': 1} +

    Warning: If you use keep_page, you’re responsable for closing each page, since +opening to many at scale may crach the browser.

    Warning: the first time you run this method, it will download Chromium into your home directory (~/.pyppeteer).

    @@ -546,14 +541,6 @@ for more details.

    Element or HTML.

    -
    -
    -next(fetch: bool = False) → Union[_ForwardRef('HTML'), typing.List[str]]
    -

    Attempts to find the next page, if there is one. If fetch -is True (default), returns HTML object of -next page. If fetch is False, simply returns the next URL.

    -
    -
    pq
    @@ -654,8 +641,8 @@ style. Defaults to a Chrome-style User-Agent.

    amongst other things.

    -close()
    -

    Closes all adapters and as such the session

    +close()[source] +

    If a browser was created close it first.

    diff --git a/objects.inv b/objects.inv index bcfdbe1..00083a2 100644 Binary files a/objects.inv and b/objects.inv differ diff --git a/searchindex.js b/searchindex.js index 3921a9b..7bd2025 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["index"],envversion:53,filenames:["index.rst"],objects:{"":{requests_html:[0,0,0,"-"]},"requests_html.Element":{absolute_links:[0,2,1,""],attrs:[0,2,1,""],base_url:[0,2,1,""],encoding:[0,2,1,""],find:[0,3,1,""],full_text:[0,2,1,""],html:[0,2,1,""],links:[0,2,1,""],lxml:[0,2,1,""],next:[0,3,1,""],pq:[0,2,1,""],raw_html:[0,2,1,""],search:[0,3,1,""],search_all:[0,3,1,""],text:[0,2,1,""],xpath:[0,3,1,""]},"requests_html.HTML":{absolute_links:[0,2,1,""],base_url:[0,2,1,""],encoding:[0,2,1,""],find:[0,3,1,""],full_text:[0,2,1,""],html:[0,2,1,""],links:[0,2,1,""],lxml:[0,2,1,""],next:[0,3,1,""],pq:[0,2,1,""],raw_html:[0,2,1,""],render:[0,3,1,""],search:[0,3,1,""],search_all:[0,3,1,""],text:[0,2,1,""],xpath:[0,3,1,""]},"requests_html.HTMLSession":{"delete":[0,3,1,""],close:[0,3,1,""],get:[0,3,1,""],get_adapter:[0,3,1,""],get_redirect_target:[0,3,1,""],head:[0,3,1,""],merge_environment_settings:[0,3,1,""],mount:[0,3,1,""],options:[0,3,1,""],patch:[0,3,1,""],post:[0,3,1,""],prepare_request:[0,3,1,""],put:[0,3,1,""],rebuild_auth:[0,3,1,""],rebuild_method:[0,3,1,""],rebuild_proxies:[0,3,1,""],request:[0,3,1,""],resolve_redirects:[0,3,1,""],send:[0,3,1,""]},requests_html:{Element:[0,1,1,""],HTML:[0,1,1,""],HTMLSession:[0,1,1,""],user_agent:[0,4,1,""]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","attribute","Python attribute"],"3":["py","method","Python method"],"4":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:attribute","3":"py:method","4":"py:function"},terms:{"348rc1":0,"355rc1":0,"370a4":0,"370b1":0,"4c4":0,"byte":0,"case":0,"default":0,"float":0,"import":0,"int":0,"long":0,"new":0,"return":0,"true":0,The:0,There:0,These:0,_blank:0,_encod:0,_forwardref:0,abil:0,about:0,abov:0,absolut:0,absolute_link:0,account:0,adapt:0,adapter_kwarg:0,after:0,agent:0,all:0,alongsid:0,also:0,altern:0,alwai:0,amobel8b8mc:0,amongst:0,amount:0,anchor:0,ani:0,ansibl:0,app:0,appar:0,applic:0,appropri:0,arg:0,argument:0,aria:0,art:0,articl:0,attempt:0,attr:0,attribut:0,authent:0,author:0,automat:0,avail:0,avoid:0,award:0,base:0,base_url:0,baseadapt:0,befor:0,beginnersguid:0,behavior:0,being:0,blog:0,blogspot:0,bodi:0,bool:0,bottlepi:0,brochur:0,browser:0,btn:0,bug:0,build:0,buildbot:0,busi:0,button:0,calcul:0,calendar:0,can:0,center:0,cert:0,certain:0,certian:[],chang:0,check:0,chrome:0,chromium:0,clean:0,clientheight:0,clientwidth:0,close:0,code:0,codelin:0,codeofconduct:0,col:0,com:0,commun:0,complex:0,configur:0,connect:0,consid:0,construct:0,consum:0,contain:0,content:0,contribut:0,controlflow:0,cooki:0,copi:0,core:0,count:0,cover:0,credenti:0,css:0,data:0,default_encod:0,defin:0,delet:0,descend:0,detail:0,dev:0,develop:0,devguid:0,devicepixelratio:0,devicescalefactor:0,dict:0,dictionari:0,directori:0,div:0,divers:0,djangoproject:0,doc:0,documentel:0,donat:0,down:0,download:0,easili:0,edgewal:0,educ:0,element:0,encod:0,engin:0,environ:0,essai:0,evalu:0,event:0,ever:0,exampl:0,exclud:0,execut:0,experi:0,extract:0,facebook:0,faint:0,fals:0,faq:0,feedproxi:0,fetch:0,file:0,find:0,first:0,flask:0,follow:0,form:0,format:0,forum:0,found:0,fref:0,from:0,full:0,full_text:0,gener:0,get:0,get_adapt:0,get_redirect_target:0,getpython:0,gettingstart:0,github:0,given:0,gnome:0,googl:0,govern:0,grab:0,group:0,happen:0,has:0,haspopup:0,head:0,header:0,heart:0,height:0,help:0,hidden:0,home:0,host:0,how:0,href:0,htmlrespons:0,htmlsession:0,http:0,httpbin:0,httprespons:0,improv:0,includ:0,index:0,industri:0,info:0,initi:0,inspir:0,instanc:0,integ:0,intellig:0,intend:0,interfac:0,intern:0,intro:0,introduct:0,introspect:0,intuit:0,ipython:0,irc:0,issu:0,job:0,jqueri:0,json:0,jumbotron:0,just:0,kei:0,kenneth:0,kennethreitz:0,kihd2dw98yi:0,kivi:0,know:0,kwarg:0,land:0,languag:0,leak:0,learn:0,left:0,legal:0,legit:0,length:0,let:0,librari:0,licens:0,light:0,like:0,link:0,list:0,listinfo:0,load:0,login:0,logo:0,loss:0,love:0,lxml:0,mac:0,magic:0,mai:0,mail:0,mailman:0,make:0,manag:0,mani:0,membership:0,memori:0,mentorship:0,menu:0,merchandis:0,merg:0,merge_environment_set:0,method:0,million:0,miss:0,mock:0,mock_brows:0,modul:0,moin:0,month:0,more:0,mount:0,multipl:0,n7q:0,necessari:0,net:0,next:0,no_proxi:0,none:0,note:0,now:0,number:0,object:0,onc:0,one:0,onli:0,open:0,openstack:0,option:0,order:0,org:0,origin:0,osx:0,other:0,otherwis:0,out:0,page:0,panda:0,paramet:0,past:0,patch:0,pep:0,persist:0,pipenv:0,planetpython:0,platform:0,plu:0,pocoo:0,pool:0,possibl:0,post:0,prefix:0,prepar:0,prepare_request:0,prepared_request:0,preparedrequest:0,prevent:0,previou:0,print:0,privaci:0,proceed:0,program:0,project:0,provid:0,proxi:0,psf:0,put:0,pycon:0,pydata:0,pyfound:0,pygobject:0,pylonsproject:0,pypi:0,pyppet:0,pyqt:0,pyqueri:0,pysid:0,python2orpython3:0,pythonbook:0,pythondotorg:0,pythoneventscalendar:0,pythoninsid:0,pythonlang:0,quot:0,raw_html:0,readi:0,real:0,reappli:0,rebuild_auth:0,rebuild_method:0,rebuild_proxi:0,receiv:0,reddit:0,redirect:0,refer:0,regist:0,reitz:0,releas:0,reload:0,remov:0,render:0,replac:0,represent:0,req:0,requests_html:0,resolve_redirect:0,resp:0,respons:0,result:0,retir:0,retri:0,review:0,riverbankcomput:0,role:0,roundup:0,rss:0,run:0,runtim:0,saltstack:0,sanit:0,scientif:0,scipi:0,scrape:0,script:0,scrolldown:0,search:0,search_al:0,second:0,see:0,sel:0,select:0,selector:0,send:0,set:0,shell:0,show:0,sig:0,signup:0,simpl:0,simpli:0,singl:0,sleep:0,softwar:0,some:0,someclass:0,someid:0,sort:0,sourc:0,sourceforg:0,spec:0,specif:0,specifi:0,sponsor:0,sponsorship:0,start:0,statu:0,stori:0,str:0,stream:0,string:0,strip:0,style:0,sub:0,submitting_an_ev:0,subnav:0,success:0,t3_81k1c8:0,t3_81lqtp:0,t3_81nevg:0,t3_81nrcd:0,t3_81p438:0,t3_81pm82:0,t3_81puu5:0,tag:0,take:0,target:0,templat:0,test:0,text:0,thank:0,thei:0,thepsf:0,thi:0,thing:0,those:0,tier:0,time:0,timeout:0,titl:0,tkinter:0,tool:0,tornadoweb:0,trac:0,transmiss:0,treeitem:0,twitter:0,type:0,unicod:0,union:0,updat:0,upon:0,uri:0,url:0,use:0,used:0,user:0,user_ag:0,using:0,utf:0,val:0,valu:0,variabl:0,verifi:0,version:0,w3school:0,wai:0,wait:0,want:0,warn:0,web2pi:0,web:0,were:0,wh73_1a:0,when:0,where:0,whether:0,which:0,width:0,wiki:0,window:0,within:0,work:0,workshop:0,www:0,wxpython:0,x_c9d0:0,xpath:0,yield_request:0,you:0,your:0,zvc80sq9s00:0},titles:["Requests-HTML: HTML Parsing for Humans (writing Python 3)!"],titleterms:{"class":0,"function":0,Using:0,api:0,document:0,html:0,human:0,indic:0,instal:0,javascript:0,main:0,pagin:0,pars:0,python:0,request:0,session:0,support:0,tabl:0,tutori:0,usag:0,util:0,without:0,write:0}}) \ No newline at end of file +Search.setIndex({docnames:["index"],envversion:53,filenames:["index.rst"],objects:{"":{requests_html:[0,0,0,"-"]},"requests_html.Element":{absolute_links:[0,2,1,""],attrs:[0,2,1,""],base_url:[0,2,1,""],encoding:[0,2,1,""],find:[0,3,1,""],full_text:[0,2,1,""],html:[0,2,1,""],links:[0,2,1,""],lxml:[0,2,1,""],pq:[0,2,1,""],raw_html:[0,2,1,""],search:[0,3,1,""],search_all:[0,3,1,""],text:[0,2,1,""],xpath:[0,3,1,""]},"requests_html.HTML":{absolute_links:[0,2,1,""],base_url:[0,2,1,""],encoding:[0,2,1,""],find:[0,3,1,""],full_text:[0,2,1,""],html:[0,2,1,""],links:[0,2,1,""],lxml:[0,2,1,""],pq:[0,2,1,""],raw_html:[0,2,1,""],render:[0,3,1,""],search:[0,3,1,""],search_all:[0,3,1,""],text:[0,2,1,""],xpath:[0,3,1,""]},"requests_html.HTMLSession":{"delete":[0,3,1,""],close:[0,3,1,""],get:[0,3,1,""],get_adapter:[0,3,1,""],get_redirect_target:[0,3,1,""],head:[0,3,1,""],merge_environment_settings:[0,3,1,""],mount:[0,3,1,""],options:[0,3,1,""],patch:[0,3,1,""],post:[0,3,1,""],prepare_request:[0,3,1,""],put:[0,3,1,""],rebuild_auth:[0,3,1,""],rebuild_method:[0,3,1,""],rebuild_proxies:[0,3,1,""],request:[0,3,1,""],resolve_redirects:[0,3,1,""],send:[0,3,1,""]},requests_html:{Element:[0,1,1,""],HTML:[0,1,1,""],HTMLSession:[0,1,1,""],user_agent:[0,4,1,""]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","attribute","Python attribute"],"3":["py","method","Python method"],"4":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:attribute","3":"py:method","4":"py:function"},terms:{"348rc1":0,"355rc1":0,"370a4":0,"370b1":0,"4c4":0,"byte":0,"case":0,"default":0,"float":0,"import":0,"int":0,"long":0,"new":0,"return":0,"true":0,The:0,There:0,These:0,_blank:0,_encod:0,_forwardref:0,abil:0,about:0,abov:0,absolut:0,absolute_link:0,account:0,adapt:0,adapter_kwarg:0,after:0,agent:0,all:0,allow:0,alongsid:0,also:0,altern:0,alwai:0,amobel8b8mc:0,amongst:0,amount:0,anchor:0,ani:0,ansibl:0,app:0,appar:0,applic:0,appropri:0,arg:0,argument:0,aria:0,art:0,articl:0,asynchtmlsess:0,attempt:[],attr:0,attribut:0,authent:0,author:0,automat:0,avail:0,avoid:0,award:0,base:0,base_url:0,baseadapt:0,befor:0,beginnersguid:0,behavior:0,being:0,blog:0,blogspot:0,bodi:0,bool:0,bottlepi:0,brochur:0,browser:0,btn:0,bug:0,build:0,buildbot:0,busi:0,button:0,calcul:0,calendar:0,can:0,center:0,cert:0,certain:0,certian:[],chang:0,check:0,chrome:0,chromium:0,clean:0,clientheight:0,clientwidth:0,close:0,code:0,codelin:0,codeofconduct:0,col:0,com:0,commun:0,complex:0,configur:0,connect:0,consid:0,construct:0,consum:0,contain:0,content:0,contribut:0,controlflow:0,cooki:0,copi:0,core:0,count:0,cover:0,crach:0,creat:0,credenti:0,css:0,data:0,default_encod:0,defin:0,delet:0,descend:0,detail:0,dev:0,develop:0,devguid:0,devicepixelratio:0,devicescalefactor:0,dict:0,dictionari:0,directori:0,div:0,divers:0,djangoproject:0,doc:0,documentel:0,donat:0,down:0,download:0,each:0,easili:0,edgewal:0,educ:0,element:0,encod:0,engin:0,environ:0,essai:0,evalu:0,event:0,ever:0,exampl:0,exclud:0,execut:0,experi:0,extract:0,facebook:0,faint:0,fals:0,faq:0,feedproxi:0,fetch:[],file:0,find:0,first:0,flask:0,follow:0,form:0,format:0,forum:0,found:0,fref:0,from:0,full:0,full_text:0,gener:0,get:0,get_adapt:0,get_redirect_target:0,getpython:0,gettingstart:0,github:0,given:0,gnome:0,googl:0,govern:0,grab:0,group:0,happen:0,has:0,haspopup:0,head:0,header:0,heart:0,height:0,help:0,hidden:0,home:0,host:0,how:0,href:0,htmlrespons:0,htmlsession:0,http:0,httpbin:0,httprespons:0,httpsession:0,improv:0,includ:0,index:0,industri:0,info:0,initi:0,inspir:0,instanc:0,integ:0,intellig:0,intend:0,interact:0,interfac:0,intern:0,intro:0,introduct:0,introspect:0,intuit:0,ipython:0,irc:0,issu:0,job:0,jqueri:0,json:0,jumbotron:0,just:0,keep_pag:0,kei:0,kenneth:0,kennethreitz:0,kihd2dw98yi:0,kivi:0,know:0,kwarg:0,land:0,languag:0,leak:0,learn:0,left:0,legal:0,legit:0,length:0,let:0,librari:0,licens:0,light:0,like:0,link:0,list:0,listinfo:0,load:0,login:0,logo:0,loss:0,love:0,lxml:0,mac:0,magic:0,mai:0,mail:0,mailman:0,make:0,manag:0,mani:0,membership:0,memori:0,mentorship:0,menu:0,merchandis:0,merg:0,merge_environment_set:0,method:0,million:0,miss:0,mock:0,mock_brows:0,modul:0,moin:0,month:0,more:0,mount:0,multipl:0,n7q:0,necessari:0,net:0,next:0,no_proxi:0,none:0,note:0,now:0,number:0,object:0,onc:0,one:0,onli:0,open:0,openstack:0,option:0,order:0,org:0,origin:0,osx:0,other:0,otherwis:0,out:0,page:0,panda:0,paramet:0,past:0,patch:0,pep:0,persist:0,pipenv:0,planetpython:0,platform:0,plu:0,pocoo:0,pool:0,possibl:0,post:0,prefix:0,prepar:0,prepare_request:0,prepared_request:0,preparedrequest:0,prevent:0,previou:0,print:0,privaci:0,proceed:0,program:0,project:0,provid:0,proxi:0,psf:0,put:0,pycon:0,pydata:0,pyfound:0,pygobject:0,pylonsproject:0,pypi:0,pyppet:0,pyqt:0,pyqueri:0,pysid:0,python2orpython3:0,pythonbook:0,pythondotorg:0,pythoneventscalendar:0,pythoninsid:0,pythonlang:0,quot:0,raw_html:0,readi:0,real:0,reappli:0,rebuild_auth:0,rebuild_method:0,rebuild_proxi:0,receiv:0,reddit:0,redirect:0,refer:0,regist:0,reitz:0,releas:0,reload:0,remov:0,render:0,replac:0,represent:0,req:0,requests_html:0,resolve_redirect:0,resp:0,respons:0,result:0,retir:0,retri:0,review:0,riverbankcomput:0,role:0,roundup:0,rss:0,run:0,runtim:0,saltstack:0,sanit:0,scale:0,scientif:0,scipi:0,scrape:0,script:0,scrolldown:0,search:0,search_al:0,second:0,see:0,sel:0,select:0,selector:0,send:0,set:0,shell:0,show:0,sig:0,signup:0,simpl:0,simpli:[],sinc:0,singl:0,sleep:0,softwar:0,some:0,someclass:0,someid:0,sort:0,sourc:0,sourceforg:0,spec:0,specif:0,specifi:0,sponsor:0,sponsorship:0,start:0,statu:0,stori:0,str:0,stream:0,string:0,strip:0,style:0,sub:0,submitting_an_ev:0,subnav:0,success:0,t3_81k1c8:0,t3_81lqtp:0,t3_81nevg:0,t3_81nrcd:0,t3_81p438:0,t3_81pm82:0,t3_81puu5:0,tag:0,take:0,target:0,templat:0,test:0,text:0,thank:0,thei:0,thepsf:0,thi:0,thing:0,those:0,through:0,tier:0,time:0,timeout:0,titl:0,tkinter:0,tool:0,tornadoweb:0,trac:0,transmiss:0,treeitem:0,twitter:0,type:0,unicod:0,union:0,updat:0,upon:0,uri:0,url:0,use:0,used:0,user:0,user_ag:0,using:0,utf:0,val:0,valu:0,variabl:0,verifi:0,version:0,w3school:0,wai:0,wait:0,want:0,warn:0,web2pi:0,web:0,were:0,wh73_1a:0,when:0,where:0,whether:0,which:0,width:0,wiki:0,window:0,within:0,work:0,workshop:0,www:0,wxpython:0,x_c9d0:0,xpath:0,yield_request:0,you:0,your:0,zvc80sq9s00:0},titles:["Requests-HTML: HTML Parsing for Humans (writing Python 3)!"],titleterms:{"class":0,"function":0,Using:0,api:0,document:0,html:0,human:0,indic:0,instal:0,javascript:0,main:0,pagin:0,pars:0,python:0,request:0,session:0,support:0,tabl:0,tutori:0,usag:0,util:0,without:0,write:0}}) \ No newline at end of file