From 8d930c95d3e65c4362ee0351948f88c6f6d19f07 Mon Sep 17 00:00:00 2001 From: Bryan Bishop Date: Mon, 7 Jan 2013 22:27:46 -0800 Subject: [PATCH] initial commit --- .gitignore | 7 +++ modules/papers.py | 127 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 .gitignore create mode 100644 modules/papers.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9d8a08b --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +# precompiled python +*.pyc + +# editor leftovers +*~ +.*.sw* + diff --git a/modules/papers.py b/modules/papers.py new file mode 100644 index 0000000..3009dd0 --- /dev/null +++ b/modules/papers.py @@ -0,0 +1,127 @@ +""" +Fetches papers. +""" + +import os +import json +import requests + +def download(phenny, input, verbose=True): + """ + Downloads a paper. + """ + # only accept requests in a channel + if not input.sender.startswith('#'): + # unless the user is an admin, of course + if not input.admin: + phenny.say("i only take requests in the ##hplusroadmap channel.") + return + else: + # just give a warning message to the admin.. not a big deal. + phenny.say("okay i'll try, but please send me requests in ##hplusroadmap in the future.") + + # get the input + line = input.group() + + # was this an explicit command? + explicit = False + if line.startswith(phenny.nick): + explicit = True + line = line[len(phenny.nick):] + + if line.startswith(",") or line.startswith(":"): + line = line[1:] + + if line.startswith(" "): + line = line.strip() + + # don't bother if there's nothing there + if len(line) < 5 or (not "http://" in line and not "https://" in line) or not line.startswith("http"): + return + + translation_url = "http://localhost:1969/web" + + headers = { + "Content-Type": "application/json", + } + + data = { + "url": line, + "sessionid": "what" + } + + data = json.dumps(data) + + response = requests.post(translation_url, data=data, headers=headers) + + if response.status_code == 200: + # see if there are any attachments + content = json.loads(response.content) + item = content[0] + title = item["title"] + + if item.has_key("attachments"): + pdf_url = None + for attachment in item["attachments"]: + if attachment.has_key("mimeType") and "application/pdf" in attachment["mimeType"]: + pdf_url = attachment["url"] + break + + if pdf_url: + user_agent = "Mozilla/5.0 (X11; Linux i686 (x86_64)) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11" + + headers = { + "User-Agent": user_agent, + } + + response = None + if pdf_url.startswith("https://"): + response = requests.get(pdf_url, headers=headers, verify=False) + else: + response = requests.get(pdf_url, headers=headers) + + # detect failure + if response.status_code == 401: + phenny.say("HTTP 401 unauthorized " + str(pdf_url)) + return + elif response.status_code != 200: + phenny.say("HTTP " + str(response.status_code) + " " + str(pdf_url)) + return + + data = response.content + + path = os.path.join("/home/bryan/public_html/papers2/paperbot/", title + ".pdf") + + file_handler = open(path, "w") + file_handler.write(data) + file_handler.close() + + # grr.. + title = title.encode("ascii", "ignore") + + filename = requests.utils.quote(title) + url = "http://diyhpl.us/~bryan/papers2/paperbot/" + filename + ".pdf" + + phenny.say(url) + return + elif verbose and explicit: + phenny.say("error: didn't find any pdfs on " + line) + return + elif verbose and explicit: + phenny.say("error: dunno how to find the pdf on " + line) + return + elif verbose and explicit: + if response.status_code == 501: + if verbose: + phenny.say("error: HTTP " + str(response.status_code) + " " + line + " (battle station not fully operational)") + return + else: + if verbose: + phenny.say("error: HTTP " + str(response.status_code) + " " + line) + return + else: + return +download.commands = ["fetch", "get", "download"] +download.priority = "high" +download.rule = r'(.*)' +