diff --git a/requests_html.py b/requests_html.py index fee56bb..2277ce6 100644 --- a/requests_html.py +++ b/requests_html.py @@ -294,7 +294,7 @@ class BaseParser: try: href = link.attrs['href'].strip() - if href and not (href.startswith('#') and self.skip_anchors) and not href.startswith('javascript:'): + if href and not (href.startswith('#') and self.skip_anchors) and not href.startswith('javascript:') and not href.startswith('mailto:'): yield href except KeyError: pass @@ -343,7 +343,7 @@ class BaseParser: # Support for tag. base = self.find('base', first=True) if base: - result = base.attrs['href'].strip() + result = base.attrs.get('href', '').strip() if result: return result @@ -351,7 +351,7 @@ class BaseParser: parsed = urlparse(self.url)._asdict() # Remove any part of the path after the last '/' - path = '/'.join(parsed['path'].split('/')[:-1]) + parsed['path'] = '/'.join(parsed['path'].split('/')[:-1]) + '/' # Reconstruct the url with the modified path parsed = (v for v in parsed.values())