auto-remove proxy.lib.pdx.edu from urls
This commit is contained in:
parent
9c7da548e1
commit
cf7c1b78e1
@ -42,6 +42,8 @@ def download(phenny, input, verbose=True):
|
|||||||
if len(line) < 5 or (not "http://" in line and not "https://" in line) or not line.startswith("http"):
|
if len(line) < 5 or (not "http://" in line and not "https://" in line) or not line.startswith("http"):
|
||||||
return
|
return
|
||||||
for line in re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', line):
|
for line in re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', line):
|
||||||
|
line = filter_fix(line)
|
||||||
|
|
||||||
translation_url = "http://localhost:1969/web"
|
translation_url = "http://localhost:1969/web"
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
@ -234,3 +236,11 @@ def extract_meta_content(tree, meta_name):
|
|||||||
else:
|
else:
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
def filter_fix(url):
|
||||||
|
"""
|
||||||
|
Fixes some common problems in urls.
|
||||||
|
"""
|
||||||
|
if ".proxy.lib.pdx.edu" in url:
|
||||||
|
url = url.replace(".proxy.lib.pdx.edu", "")
|
||||||
|
return url
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user