attempt at multi URL downloads

This commit is contained in:
Nathan McCorkle 2013-01-10 21:08:36 -08:00
parent f186b7d009
commit e4074d2b3d

View File

@ -1,7 +1,7 @@
""" """
Fetches papers. Fetches papers.
""" """
import re
import os import os
import json import json
import random import random
@ -39,7 +39,7 @@ def download(phenny, input, verbose=True):
# don't bother if there's nothing there # don't bother if there's nothing there
if len(line) < 5 or (not "http://" in line and not "https://" in line) or not line.startswith("http"): if len(line) < 5 or (not "http://" in line and not "https://" in line) or not line.startswith("http"):
return return
for line in re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', line):
translation_url = "http://localhost:1969/web" translation_url = "http://localhost:1969/web"
headers = { headers = {
@ -84,10 +84,10 @@ def download(phenny, input, verbose=True):
# detect failure # detect failure
if response.status_code == 401: if response.status_code == 401:
phenny.say("HTTP 401 unauthorized " + str(pdf_url)) phenny.say("HTTP 401 unauthorized " + str(pdf_url))
return continue
elif response.status_code != 200: elif response.status_code != 200:
phenny.say("HTTP " + str(response.status_code) + " " + str(pdf_url)) phenny.say("HTTP " + str(response.status_code) + " " + str(pdf_url))
return continue
data = response.content data = response.content
@ -104,25 +104,26 @@ def download(phenny, input, verbose=True):
url = "http://diyhpl.us/~bryan/papers2/paperbot/" + filename + ".pdf" url = "http://diyhpl.us/~bryan/papers2/paperbot/" + filename + ".pdf"
phenny.say(url) phenny.say(url)
return continue
elif verbose and explicit: elif verbose and explicit:
phenny.say("error: didn't find any pdfs on " + line) phenny.say("error: didn't find any pdfs on " + line)
phenny.say(download_url(line)) phenny.say(download_url(line))
return continue
elif verbose and explicit: elif verbose and explicit:
phenny.say("error: dunno how to find the pdf on " + line) phenny.say("error: dunno how to find the pdf on " + line)
phenny.say(download_url(line)) phenny.say(download_url(line))
return continue
elif verbose and explicit: elif verbose and explicit:
if response.status_code == 501: if response.status_code == 501:
if verbose: if verbose:
phenny.say("no translator available, raw dump: " + download_url(line)) phenny.say("no translator available, raw dump: " + download_url(line))
return continue
else: else:
if verbose: if verbose:
phenny.say("error: HTTP " + str(response.status_code) + " " + download_url(line)) phenny.say("error: HTTP " + str(response.status_code) + " " + download_url(line))
return continue
else: else:
continue
return return
download.commands = ["fetch", "get", "download"] download.commands = ["fetch", "get", "download"]
download.priority = "high" download.priority = "high"