From 04644364e2990f9b35140ea2f3f70b7bbdc720ea Mon Sep 17 00:00:00 2001 From: Bryan Bishop Date: Thu, 21 Feb 2013 17:29:53 -0600 Subject: [PATCH] fix jstor title determination --- modules/papers.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/papers.py b/modules/papers.py index ba9eb59..3504222 100644 --- a/modules/papers.py +++ b/modules/papers.py @@ -218,9 +218,12 @@ def download_url(url): # not all pages have the element try: - title = tree.xpath("//input[@name='ppv-title']/@value")[0] + title = tree.xpath("//div[@class='hd title']")[0].text except Exception: - pass + try: + title = tree.xpath("//input[@name='ppv-title']/@value")[0] + except Exception: + pass # get the document id document_id = None @@ -234,7 +237,7 @@ def download_url(url): if document_id.isdigit(): try: pdf_url = "http://www.jstor.org/stable/pdfplus/" + document_id + ".pdf?acceptTC=true" - new_response = requests.get(pdf_url, header={"User-Agent": "time-machine/1.1"}) + new_response = requests.get(pdf_url, headers={"User-Agent": "time-machine/1.1"}) new_content = new_response.content if "pdf" in new_response.headers["content-type"]: extension = ".pdf"