mirror of
https://github.com/kennethreitz/requests-html.git
synced 2026-06-05 23:00:20 +00:00
Fix bugs related to links
* #121 KeyError of special base tag * #124 Remove 'mailto:' links out from links
This commit is contained in:
+3
-3
@@ -294,7 +294,7 @@ class BaseParser:
|
||||
|
||||
try:
|
||||
href = link.attrs['href'].strip()
|
||||
if href and not (href.startswith('#') and self.skip_anchors) and not href.startswith('javascript:'):
|
||||
if href and not (href.startswith('#') and self.skip_anchors) and not href.startswith('javascript:') and not href.startswith('mailto:'):
|
||||
yield href
|
||||
except KeyError:
|
||||
pass
|
||||
@@ -343,7 +343,7 @@ class BaseParser:
|
||||
# Support for <base> tag.
|
||||
base = self.find('base', first=True)
|
||||
if base:
|
||||
result = base.attrs['href'].strip()
|
||||
result = base.attrs.get('href', '').strip()
|
||||
if result:
|
||||
return result
|
||||
|
||||
@@ -351,7 +351,7 @@ class BaseParser:
|
||||
parsed = urlparse(self.url)._asdict()
|
||||
|
||||
# Remove any part of the path after the last '/'
|
||||
path = '/'.join(parsed['path'].split('/')[:-1])
|
||||
parsed['path'] = '/'.join(parsed['path'].split('/')[:-1]) + '/'
|
||||
|
||||
# Reconstruct the url with the modified path
|
||||
parsed = (v for v in parsed.values())
|
||||
|
||||
Reference in New Issue
Block a user