From 8b3abe92221845cad85da23e6397197ed3b17309 Mon Sep 17 00:00:00 2001 From: Bryan Bishop Date: Wed, 23 Jan 2013 20:01:03 -0600 Subject: [PATCH] possibly better sciencedirect handling --- modules/papers.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/modules/papers.py b/modules/papers.py index 9c41253..09bba89 100644 --- a/modules/papers.py +++ b/modules/papers.py @@ -173,12 +173,18 @@ def download_url(url): title = citation_title else: if "sciencedirect.com" in url and not "ShoppingCart" in url: - title = tree.xpath("//h1[@class='svTitle']")[0].text - pdf_url = tree.xpath("//a[@id='pdfLink']/@href")[0] - response = requests.get(pdf_url, headers={"User-Agent": "sdf-macross"}) - content = response.content - if "pdf" in response.headers["content-type"]: - extension = ".pdf" + try: + title = tree.xpath("//h1[@class='svTitle']")[0].text + pdf_url = tree.xpath("//a[@id='pdfLink']/@href")[0] + new_response = requests.get(pdf_url, headers={"User-Agent": "sdf-macross"}) + new_content = new_response.content + if "pdf" in new_response.headers["content-type"]: + extension = ".pdf" + except Exception: + pass + else: + content = new_content + response = new_response elif "h1 class=\"articleTitle" in content: try: title = tree.xpath("//h1[@class='articleTitle']")[0].text