auto-remove proxy.lib.pdx.edu from urls
This commit is contained in:
parent
9c7da548e1
commit
cf7c1b78e1
@ -42,6 +42,8 @@ def download(phenny, input, verbose=True):
|
||||
if len(line) < 5 or (not "http://" in line and not "https://" in line) or not line.startswith("http"):
|
||||
return
|
||||
for line in re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', line):
|
||||
line = filter_fix(line)
|
||||
|
||||
translation_url = "http://localhost:1969/web"
|
||||
|
||||
headers = {
|
||||
@ -234,3 +236,11 @@ def extract_meta_content(tree, meta_name):
|
||||
else:
|
||||
return content
|
||||
|
||||
def filter_fix(url):
|
||||
"""
|
||||
Fixes some common problems in urls.
|
||||
"""
|
||||
if ".proxy.lib.pdx.edu" in url:
|
||||
url = url.replace(".proxy.lib.pdx.edu", "")
|
||||
return url
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user