remove phenny, tweak some things
This commit is contained in:
parent
960e86327e
commit
86c2e11a8c
77
modules/papers.py → papers.py
Normal file → Executable file
77
modules/papers.py → papers.py
Normal file → Executable file
@ -1,44 +1,23 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
"""
|
"""
|
||||||
Fetches papers.
|
Fetches papers.
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
import params
|
||||||
import random
|
import random
|
||||||
import requests
|
import requests
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
|
import sys
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
|
||||||
import pdfparanoia
|
import pdfparanoia
|
||||||
|
|
||||||
def download(phenny, input, verbose=True):
|
def download(line, verbose=True):
|
||||||
"""
|
"""
|
||||||
Downloads a paper.
|
Downloads a paper.
|
||||||
"""
|
"""
|
||||||
# only accept requests in a channel
|
|
||||||
if not input.sender.startswith('#'):
|
|
||||||
# unless the user is an admin, of course
|
|
||||||
if not input.admin:
|
|
||||||
phenny.say("i only take requests in the ##hplusroadmap channel.")
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
# just give a warning message to the admin.. not a big deal.
|
|
||||||
phenny.say("okay i'll try, but please send me requests in ##hplusroadmap in the future.")
|
|
||||||
|
|
||||||
# get the input
|
|
||||||
line = input.group()
|
|
||||||
|
|
||||||
# was this an explicit command?
|
|
||||||
explicit = False
|
|
||||||
if line.startswith(phenny.nick):
|
|
||||||
explicit = True
|
|
||||||
line = line[len(phenny.nick):]
|
|
||||||
|
|
||||||
if line.startswith(",") or line.startswith(":"):
|
|
||||||
line = line[1:]
|
|
||||||
|
|
||||||
if line.startswith(" "):
|
|
||||||
line = line.strip()
|
|
||||||
|
|
||||||
# don't bother if there's nothing there
|
# don't bother if there's nothing there
|
||||||
if len(line) < 5 or (not "http://" in line and not "https://" in line) or not line.startswith("http"):
|
if len(line) < 5 or (not "http://" in line and not "https://" in line) or not line.startswith("http"):
|
||||||
@ -50,7 +29,7 @@ def download(phenny, input, verbose=True):
|
|||||||
line = fix_ieee_login_urls(line)
|
line = fix_ieee_login_urls(line)
|
||||||
line = fix_jstor_pdf_urls(line)
|
line = fix_jstor_pdf_urls(line)
|
||||||
|
|
||||||
translation_url = "http://localhost:1969/web"
|
translation_url = params.server
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
@ -93,10 +72,10 @@ def download(phenny, input, verbose=True):
|
|||||||
|
|
||||||
# detect failure
|
# detect failure
|
||||||
if response.status_code == 401:
|
if response.status_code == 401:
|
||||||
phenny.say("HTTP 401 unauthorized " + str(pdf_url))
|
print("HTTP 401 unauthorized " + str(pdf_url))
|
||||||
continue
|
continue
|
||||||
elif response.status_code != 200:
|
elif response.status_code != 200:
|
||||||
phenny.say("HTTP " + str(response.status_code) + " " + str(pdf_url))
|
print("HTTP " + str(response.status_code) + " " + str(pdf_url))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
data = response.content
|
data = response.content
|
||||||
@ -111,7 +90,7 @@ def download(phenny, input, verbose=True):
|
|||||||
# grr..
|
# grr..
|
||||||
title = title.encode("ascii", "ignore")
|
title = title.encode("ascii", "ignore")
|
||||||
|
|
||||||
path = os.path.join("/home/bryan/public_html/papers2/paperbot/", title + ".pdf")
|
path = os.path.join(params.folder, title + ".pdf")
|
||||||
|
|
||||||
file_handler = open(path, "w")
|
file_handler = open(path, "w")
|
||||||
file_handler.write(data)
|
file_handler.write(data)
|
||||||
@ -124,27 +103,23 @@ def download(phenny, input, verbose=True):
|
|||||||
if filename[-1] == ".":
|
if filename[-1] == ".":
|
||||||
filename = filename[:-1]
|
filename = filename[:-1]
|
||||||
|
|
||||||
url = "http://diyhpl.us/~bryan/papers2/paperbot/" + filename + ".pdf"
|
url = params.url + filename + ".pdf"
|
||||||
|
|
||||||
phenny.say(url)
|
print(url)
|
||||||
continue
|
continue
|
||||||
elif verbose and explicit:
|
else:
|
||||||
phenny.say(download_url(line))
|
print(download_url(line))
|
||||||
continue
|
continue
|
||||||
elif verbose and explicit:
|
else:
|
||||||
phenny.say(download_url(line))
|
print(download_url(line))
|
||||||
continue
|
continue
|
||||||
elif verbose and explicit:
|
else:
|
||||||
if response.status_code == 501:
|
if response.status_code == 501:
|
||||||
if verbose:
|
if verbose:
|
||||||
phenny.say("no translator available, raw dump: " + download_url(line))
|
print("no translator available, raw dump: " + download_url(line))
|
||||||
continue
|
|
||||||
else:
|
else:
|
||||||
if verbose:
|
if verbose:
|
||||||
phenny.say("error: HTTP " + str(response.status_code) + " " + download_url(line))
|
print("error: HTTP " + str(response.status_code) + " " + download_url(line))
|
||||||
continue
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
return
|
return
|
||||||
download.commands = ["fetch", "get", "download"]
|
download.commands = ["fetch", "get", "download"]
|
||||||
download.priority = "high"
|
download.priority = "high"
|
||||||
@ -296,8 +271,10 @@ def download_url(url):
|
|||||||
|
|
||||||
# can't create directories
|
# can't create directories
|
||||||
title = title.replace("/", "_")
|
title = title.replace("/", "_")
|
||||||
|
title = title.replace(" ", "_")
|
||||||
|
title = title[:20]
|
||||||
|
|
||||||
path = os.path.join("/home/bryan/public_html/papers2/paperbot/", title + extension)
|
path = os.path.join(params.folder, title + extension)
|
||||||
|
|
||||||
if extension in [".pdf", "pdf"]:
|
if extension in [".pdf", "pdf"]:
|
||||||
try:
|
try:
|
||||||
@ -311,7 +288,7 @@ def download_url(url):
|
|||||||
file_handler.close()
|
file_handler.close()
|
||||||
|
|
||||||
title = title.encode("ascii", "ignore")
|
title = title.encode("ascii", "ignore")
|
||||||
url = "http://diyhpl.us/~bryan/papers2/paperbot/" + requests.utils.quote(title) + extension
|
url = params.url + requests.utils.quote(title) + extension
|
||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
@ -395,3 +372,15 @@ def fix_jstor_pdf_urls(url):
|
|||||||
url += "?acceptTC=true"
|
url += "?acceptTC=true"
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
for a in sys.argv[1:]:
|
||||||
|
download(a)
|
||||||
|
else:
|
||||||
|
while True:
|
||||||
|
l = sys.stdin.readline()
|
||||||
|
if not l:
|
||||||
|
break
|
||||||
|
download(l)
|
||||||
|
|
||||||
|
|
1
phenny
1
phenny
@ -1 +0,0 @@
|
|||||||
Subproject commit 7752b56cc2f8325883bad052ad31bf2e7feb706e
|
|
Loading…
Reference in New Issue
Block a user