fix jstor title determination

2013-02-21 17:29:53 -06:00 · 2013-02-21 17:29:53 -06:00 · 04644364e2
commit 04644364e2
parent 16c7f4d4db
1 changed files with 6 additions and 3 deletions
--- a/modules/papers.py
+++ b/modules/papers.py
@ -217,6 +217,9 @@ def download_url(url):
                    url = url[0:url.find("?")]

                # not all pages have the <input type="hidden" name="ppv-title"> element
+                try:
+                    title = tree.xpath("//div[@class='hd title']")[0].text
+                except Exception:
                    try:
                        title = tree.xpath("//input[@name='ppv-title']/@value")[0]
                    except Exception:
@ -234,7 +237,7 @@ def download_url(url):
                if document_id.isdigit():
                    try:
                        pdf_url = "http://www.jstor.org/stable/pdfplus/" + document_id + ".pdf?acceptTC=true"
-                        new_response = requests.get(pdf_url, header={"User-Agent": "time-machine/1.1"})
+                        new_response = requests.get(pdf_url, headers={"User-Agent": "time-machine/1.1"})
                        new_content = new_response.content
                        if "pdf" in new_response.headers["content-type"]:
                            extension = ".pdf"