From 64e67d46ff802766c5ef531aa0cb99dc9db9de4b Mon Sep 17 00:00:00 2001 From: camper42 Date: Thu, 1 Mar 2018 20:34:56 +0800 Subject: [PATCH] ignore javascript links ignore anything startswith `javascript:` in href attr --- requests_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requests_html.py b/requests_html.py index 7964625..bd41ba8 100644 --- a/requests_html.py +++ b/requests_html.py @@ -212,7 +212,7 @@ class BaseParser: try: href = link.attrs['href'].strip() - if href and not (href.startswith('#') and self.skip_anchors and href in ['javascript:;']): + if href and not (href.startswith('#') and self.skip_anchors) and not href.startswith('javascript:'): yield href except KeyError: pass