fail less catastrophically for a weird sciencedirect url
This commit is contained in:
parent
2c3df4e2ef
commit
f7d7eaa6cb
@ -172,7 +172,7 @@ def download_url(url):
|
|||||||
extension = ".pdf"
|
extension = ".pdf"
|
||||||
title = citation_title
|
title = citation_title
|
||||||
else:
|
else:
|
||||||
if "sciencedirect.com" in url:
|
if "sciencedirect.com" in url and not "ShoppingCart" in url:
|
||||||
title = tree.xpath("//h1[@class='svTitle']")[0].text
|
title = tree.xpath("//h1[@class='svTitle']")[0].text
|
||||||
pdf_url = tree.xpath("//a[@id='pdfLink']/@href")[0]
|
pdf_url = tree.xpath("//a[@id='pdfLink']/@href")[0]
|
||||||
response = requests.get(pdf_url, headers={"User-Agent": "sdf-macross"})
|
response = requests.get(pdf_url, headers={"User-Agent": "sdf-macross"})
|
||||||
|
Loading…
Reference in New Issue
Block a user