fix title encoding for another pdf case
This commit is contained in:
parent
723b9f18d7
commit
b1dcaf0e23
@ -159,9 +159,10 @@ def download_url(url):
|
|||||||
# extract some metadata with xpaths
|
# extract some metadata with xpaths
|
||||||
citation_pdf_url = find_citation_pdf_url(tree, url)
|
citation_pdf_url = find_citation_pdf_url(tree, url)
|
||||||
citation_title = find_citation_title(tree)
|
citation_title = find_citation_title(tree)
|
||||||
|
citation_title = citation_title.encode("ascii", "ignore")
|
||||||
|
|
||||||
if citation_pdf_url and citation_title:
|
if citation_pdf_url and citation_title:
|
||||||
response = requests.get(citation_pdf_url, headers={"User-Agent": "gundam-gdf"})
|
response = requests.get(citation_pdf_url, headers={"User-Agent": "pdf-defense-force"})
|
||||||
content = response.content
|
content = response.content
|
||||||
if "pdf" in response.headers["content-type"]:
|
if "pdf" in response.headers["content-type"]:
|
||||||
extension = ".pdf"
|
extension = ".pdf"
|
||||||
|
Loading…
Reference in New Issue
Block a user