From c62d56684dc77906ccb4adff3efae3d7019722eb Mon Sep 17 00:00:00 2001 From: "Phyks (Lucas Verney)" Date: Wed, 10 Feb 2016 15:35:26 +0100 Subject: [PATCH] Initial commit --- .gitignore | 2 ++ README.md | 22 ++++++++++++++++ config.py.example | 4 +++ main.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 ++ views/template.tpl | 22 ++++++++++++++++ 6 files changed, 116 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 config.py.example create mode 100755 main.py create mode 100644 requirements.txt create mode 100644 views/template.tpl diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3afd512 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +config.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..cb47061 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +CitationExtractor +================= + + +This is a simple web interface above +[libbmc](https://github.com/Phyks/libbmc/) and especially +[CERMINE](https://github.com/CeON/CERMINE) to extract references from a given +PDF file. + + +## Installation + +* Clone this repository. +* Install requirements: `pip install -r requirements.txt` +* Create a config file: `cp config.py.example config.py` and edit it according + to your needs. +* Run it: `python3 ./main.py`. Head to `http://localhost:8080` to see it live. + + +## LICENSE + +This repo is under an MIT license. diff --git a/config.py.example b/config.py.example new file mode 100644 index 0000000..3bc223b --- /dev/null +++ b/config.py.example @@ -0,0 +1,4 @@ +HOST = "0.0.0.0" # Host to listen on +PORT = 8080 # Port to listen on +DEBUG = True # Enable debug in Bottle? +CERMINE_PATH = None # Should be set to the absolute path to CERMINE standalone JAR to prevent using the API. diff --git a/main.py b/main.py new file mode 100755 index 0000000..552ea0a --- /dev/null +++ b/main.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +import config +from libbmc.citations import pdf +import os +import tempfile + +from bottle import redirect, request, route, run, view + + +@route("/upload", method="POST") +@view("template.tpl") +def do_upload(): + """ + Handle uploaded PDF files and process them for citations. + """ + # Check uploaded file has been provided + upload = request.files.get('upload') + if upload is None: + return redirect("/") + + # Check file is a PDF file + _, ext = os.path.splitext(upload.filename) + if ext not in (".pdf"): + return { + "params": { + "error": "File does not seem to be a valid PDF file." + } + } + + # Process citations + with tempfile.NamedTemporaryFile() as fh: + upload.save(fh) + citations = pdf.cermine_dois(fh.name, + override_local=config.CERMINE_PATH) + + return { + "params": { + "citations": citations + } + } + + +@route("/<:re:.+>") +def redirect_catchall(): + """ + Redirect everything else to /. + """ + return redirect("/") + + +@route("/") +@view("template.tpl") +def index(): + """ + Main index view, upload form. + """ + return { + "params": { + } + } + + +if __name__ == "__main__": + run(host=config.HOST, port=config.PORT, debug=config.DEBUG) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b3b5996 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +bottle>=0.12.9 +libbmc>=0.1.2 diff --git a/views/template.tpl b/views/template.tpl new file mode 100644 index 0000000..10b97c6 --- /dev/null +++ b/views/template.tpl @@ -0,0 +1,22 @@ + + + + + CitationExtractor + + + %if "error" in params: +

Error: {{ params["error"] }}

+ % end + + % if "citations" in params: +
{{ params["citations"] }}
+ % else: +
+

+

+
+ % end + + +