remove phenny, tweak some things

This commit is contained in:
Antoine Amarilli 2013-05-11 11:57:28 +02:00
parent 960e86327e
commit 86c2e11a8c
2 changed files with 36 additions and 48 deletions

77
modules/papers.py → papers.py Normal file → Executable file
View File

@ -1,44 +1,23 @@
#!/usr/bin/python
""" """
Fetches papers. Fetches papers.
""" """
import re import re
import os import os
import json import json
import params
import random import random
import requests import requests
import lxml.etree import lxml.etree
import sys
from StringIO import StringIO from StringIO import StringIO
import pdfparanoia import pdfparanoia
def download(phenny, input, verbose=True): def download(line, verbose=True):
""" """
Downloads a paper. Downloads a paper.
""" """
# only accept requests in a channel
if not input.sender.startswith('#'):
# unless the user is an admin, of course
if not input.admin:
phenny.say("i only take requests in the ##hplusroadmap channel.")
return
else:
# just give a warning message to the admin.. not a big deal.
phenny.say("okay i'll try, but please send me requests in ##hplusroadmap in the future.")
# get the input
line = input.group()
# was this an explicit command?
explicit = False
if line.startswith(phenny.nick):
explicit = True
line = line[len(phenny.nick):]
if line.startswith(",") or line.startswith(":"):
line = line[1:]
if line.startswith(" "):
line = line.strip()
# don't bother if there's nothing there # don't bother if there's nothing there
if len(line) < 5 or (not "http://" in line and not "https://" in line) or not line.startswith("http"): if len(line) < 5 or (not "http://" in line and not "https://" in line) or not line.startswith("http"):
@ -50,7 +29,7 @@ def download(phenny, input, verbose=True):
line = fix_ieee_login_urls(line) line = fix_ieee_login_urls(line)
line = fix_jstor_pdf_urls(line) line = fix_jstor_pdf_urls(line)
translation_url = "http://localhost:1969/web" translation_url = params.server
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
@ -93,10 +72,10 @@ def download(phenny, input, verbose=True):
# detect failure # detect failure
if response.status_code == 401: if response.status_code == 401:
phenny.say("HTTP 401 unauthorized " + str(pdf_url)) print("HTTP 401 unauthorized " + str(pdf_url))
continue continue
elif response.status_code != 200: elif response.status_code != 200:
phenny.say("HTTP " + str(response.status_code) + " " + str(pdf_url)) print("HTTP " + str(response.status_code) + " " + str(pdf_url))
continue continue
data = response.content data = response.content
@ -111,7 +90,7 @@ def download(phenny, input, verbose=True):
# grr.. # grr..
title = title.encode("ascii", "ignore") title = title.encode("ascii", "ignore")
path = os.path.join("/home/bryan/public_html/papers2/paperbot/", title + ".pdf") path = os.path.join(params.folder, title + ".pdf")
file_handler = open(path, "w") file_handler = open(path, "w")
file_handler.write(data) file_handler.write(data)
@ -124,27 +103,23 @@ def download(phenny, input, verbose=True):
if filename[-1] == ".": if filename[-1] == ".":
filename = filename[:-1] filename = filename[:-1]
url = "http://diyhpl.us/~bryan/papers2/paperbot/" + filename + ".pdf" url = params.url + filename + ".pdf"
phenny.say(url) print(url)
continue continue
elif verbose and explicit: else:
phenny.say(download_url(line)) print(download_url(line))
continue continue
elif verbose and explicit: else:
phenny.say(download_url(line)) print(download_url(line))
continue continue
elif verbose and explicit: else:
if response.status_code == 501: if response.status_code == 501:
if verbose: if verbose:
phenny.say("no translator available, raw dump: " + download_url(line)) print("no translator available, raw dump: " + download_url(line))
continue
else: else:
if verbose: if verbose:
phenny.say("error: HTTP " + str(response.status_code) + " " + download_url(line)) print("error: HTTP " + str(response.status_code) + " " + download_url(line))
continue
else:
continue
return return
download.commands = ["fetch", "get", "download"] download.commands = ["fetch", "get", "download"]
download.priority = "high" download.priority = "high"
@ -296,8 +271,10 @@ def download_url(url):
# can't create directories # can't create directories
title = title.replace("/", "_") title = title.replace("/", "_")
title = title.replace(" ", "_")
title = title[:20]
path = os.path.join("/home/bryan/public_html/papers2/paperbot/", title + extension) path = os.path.join(params.folder, title + extension)
if extension in [".pdf", "pdf"]: if extension in [".pdf", "pdf"]:
try: try:
@ -311,7 +288,7 @@ def download_url(url):
file_handler.close() file_handler.close()
title = title.encode("ascii", "ignore") title = title.encode("ascii", "ignore")
url = "http://diyhpl.us/~bryan/papers2/paperbot/" + requests.utils.quote(title) + extension url = params.url + requests.utils.quote(title) + extension
return url return url
@ -395,3 +372,15 @@ def fix_jstor_pdf_urls(url):
url += "?acceptTC=true" url += "?acceptTC=true"
return url return url
if __name__ == '__main__':
if len(sys.argv) > 1:
for a in sys.argv[1:]:
download(a)
else:
while True:
l = sys.stdin.readline()
if not l:
break
download(l)

1
phenny

@ -1 +0,0 @@
Subproject commit 7752b56cc2f8325883bad052ad31bf2e7feb706e