fix sciencedirect.com parsing

2013-01-20 22:19:57 -06:00 · 2013-01-20 22:19:57 -06:00 · a89129b424
commit a89129b424
parent cf7c1b78e1
1 changed files with 4 additions and 1 deletions
--- a/modules/papers.py
+++ b/modules/papers.py
@ -169,7 +169,10 @@ def download_url(url):
                extension = ".pdf"
                title = citation_title
        else:
-            if "h1 class=\"articleTitle" in content:
+            if "sciencedirect.com" in url:
+                title = tree.xpath("//h1[@class='svTitle']")[0].text
+                pdf_url = tree.xpath("//a[@id='pdfLink']/@href")[0]
+            elif "h1 class=\"articleTitle" in content:
                try:
                    title = tree.xpath("//h1[@class='articleTitle']")[0].text
                    title = title.encode("ascii", "ignore")