From b1dcaf0e2371f865ec151c85163572032aa637f6 Mon Sep 17 00:00:00 2001 From: Bryan Bishop Date: Wed, 16 Jan 2013 02:25:04 -0600 Subject: [PATCH] fix title encoding for another pdf case --- modules/papers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/papers.py b/modules/papers.py index b01f6e7..1a2aeb4 100644 --- a/modules/papers.py +++ b/modules/papers.py @@ -159,9 +159,10 @@ def download_url(url): # extract some metadata with xpaths citation_pdf_url = find_citation_pdf_url(tree, url) citation_title = find_citation_title(tree) + citation_title = citation_title.encode("ascii", "ignore") if citation_pdf_url and citation_title: - response = requests.get(citation_pdf_url, headers={"User-Agent": "gundam-gdf"}) + response = requests.get(citation_pdf_url, headers={"User-Agent": "pdf-defense-force"}) content = response.content if "pdf" in response.headers["content-type"]: extension = ".pdf"