From a89129b4240326d8e53fa8f6b040bf1f7e299584 Mon Sep 17 00:00:00 2001 From: Bryan Bishop Date: Sun, 20 Jan 2013 22:19:57 -0600 Subject: [PATCH] fix sciencedirect.com parsing --- modules/papers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/papers.py b/modules/papers.py index e0d13c7..00891d0 100644 --- a/modules/papers.py +++ b/modules/papers.py @@ -169,7 +169,10 @@ def download_url(url): extension = ".pdf" title = citation_title else: - if "h1 class=\"articleTitle" in content: + if "sciencedirect.com" in url: + title = tree.xpath("//h1[@class='svTitle']")[0].text + pdf_url = tree.xpath("//a[@id='pdfLink']/@href")[0] + elif "h1 class=\"articleTitle" in content: try: title = tree.xpath("//h1[@class='articleTitle']")[0].text title = title.encode("ascii", "ignore")