Initial commit

This commit is contained in:
Lucas Verney 2016-02-10 15:35:26 +01:00
commit c62d56684d
6 changed files with 116 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
__pycache__
config.py

22
README.md Normal file
View File

@ -0,0 +1,22 @@
CitationExtractor
=================
This is a simple web interface above
[libbmc](https://github.com/Phyks/libbmc/) and especially
[CERMINE](https://github.com/CeON/CERMINE) to extract references from a given
PDF file.
## Installation
* Clone this repository.
* Install requirements: `pip install -r requirements.txt`
* Create a config file: `cp config.py.example config.py` and edit it according
to your needs.
* Run it: `python3 ./main.py`. Head to `http://localhost:8080` to see it live.
## LICENSE
This repo is under an MIT license.

4
config.py.example Normal file
View File

@ -0,0 +1,4 @@
HOST = "0.0.0.0" # Host to listen on
PORT = 8080 # Port to listen on
DEBUG = True # Enable debug in Bottle?
CERMINE_PATH = None # Should be set to the absolute path to CERMINE standalone JAR to prevent using the API.

64
main.py Executable file
View File

@ -0,0 +1,64 @@
#!/usr/bin/env python3
import config
from libbmc.citations import pdf
import os
import tempfile
from bottle import redirect, request, route, run, view
@route("/upload", method="POST")
@view("template.tpl")
def do_upload():
"""
Handle uploaded PDF files and process them for citations.
"""
# Check uploaded file has been provided
upload = request.files.get('upload')
if upload is None:
return redirect("/")
# Check file is a PDF file
_, ext = os.path.splitext(upload.filename)
if ext not in (".pdf"):
return {
"params": {
"error": "File does not seem to be a valid PDF file."
}
}
# Process citations
with tempfile.NamedTemporaryFile() as fh:
upload.save(fh)
citations = pdf.cermine_dois(fh.name,
override_local=config.CERMINE_PATH)
return {
"params": {
"citations": citations
}
}
@route("/<:re:.+>")
def redirect_catchall():
"""
Redirect everything else to /.
"""
return redirect("/")
@route("/")
@view("template.tpl")
def index():
"""
Main index view, upload form.
"""
return {
"params": {
}
}
if __name__ == "__main__":
run(host=config.HOST, port=config.PORT, debug=config.DEBUG)

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
bottle>=0.12.9
libbmc>=0.1.2

22
views/template.tpl Normal file
View File

@ -0,0 +1,22 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>CitationExtractor</title>
</head>
<body>
%if "error" in params:
<p><strong>Error: {{ params["error"] }}</strong></p>
% end
% if "citations" in params:
<pre>{{ params["citations"] }}</pre>
% else:
<form action="/upload" method="post" enctype="multipart/form-data">
<p><label for="upload">Select a file: </label><input type="file" name="upload" id="upload" /></p>
<p><input type="submit" value="Extract citations!" /></p>
</form>
% end
</form>
</body>
</html>