fail less catastrophically for a weird sciencedirect url

This commit is contained in:
Bryan Bishop 2013-01-23 19:58:24 -06:00
parent 2c3df4e2ef
commit f7d7eaa6cb

View File

@ -172,7 +172,7 @@ def download_url(url):
extension = ".pdf" extension = ".pdf"
title = citation_title title = citation_title
else: else:
if "sciencedirect.com" in url: if "sciencedirect.com" in url and not "ShoppingCart" in url:
title = tree.xpath("//h1[@class='svTitle']")[0].text title = tree.xpath("//h1[@class='svTitle']")[0].text
pdf_url = tree.xpath("//a[@id='pdfLink']/@href")[0] pdf_url = tree.xpath("//a[@id='pdfLink']/@href")[0]
response = requests.get(pdf_url, headers={"User-Agent": "sdf-macross"}) response = requests.get(pdf_url, headers={"User-Agent": "sdf-macross"})