pass StringIO to pdfparanoia

This commit is contained in:
Bryan Bishop 2013-02-09 07:45:53 -06:00
parent 53de3f3648
commit db58d53c10

View File

@ -101,7 +101,7 @@ def download(phenny, input, verbose=True):
data = response.content data = response.content
if "pdf" in response.headers["content-type"]: if "pdf" in response.headers["content-type"]:
data = pdfparanoia.scrub(data) data = pdfparanoia.scrub(StringIO(data))
# grr.. # grr..
title = title.encode("ascii", "ignore") title = title.encode("ascii", "ignore")
@ -207,7 +207,6 @@ def download_url(url):
new_response = requests.get(pdf_url, headers={"User-Agent": "time-machine/1.0"}) new_response = requests.get(pdf_url, headers={"User-Agent": "time-machine/1.0"})
new_content = new_response.content new_content = new_response.content
if "pdf" in new_response.headers["content-type"]: if "pdf" in new_response.headers["content-type"]:
new_content = pdfparanoia.scrub(new_content)
extension = ".pdf" extension = ".pdf"
except Exception: except Exception:
pass pass
@ -251,7 +250,7 @@ def download_url(url):
path = os.path.join("/home/bryan/public_html/papers2/paperbot/", title + extension) path = os.path.join("/home/bryan/public_html/papers2/paperbot/", title + extension)
if extension in [".pdf", "pdf"]: if extension in [".pdf", "pdf"]:
content = pdfparanoia.scrub(content) content = pdfparanoia.scrub(StringIO(content))
file_handler = open(path, "w") file_handler = open(path, "w")
file_handler.write(content) file_handler.write(content)