fix title encoding for another pdf case

This commit is contained in:
Bryan Bishop 2013-01-16 02:25:04 -06:00
parent 723b9f18d7
commit b1dcaf0e23
1 changed files with 2 additions and 1 deletions

View File

@ -159,9 +159,10 @@ def download_url(url):
# extract some metadata with xpaths
citation_pdf_url = find_citation_pdf_url(tree, url)
citation_title = find_citation_title(tree)
citation_title = citation_title.encode("ascii", "ignore")
if citation_pdf_url and citation_title:
response = requests.get(citation_pdf_url, headers={"User-Agent": "gundam-gdf"})
response = requests.get(citation_pdf_url, headers={"User-Agent": "pdf-defense-force"})
content = response.content
if "pdf" in response.headers["content-type"]:
extension = ".pdf"