citationextractor/main.py

91 lines
2.0 KiB
Python
Raw Permalink Normal View History

2016-02-10 15:35:26 +01:00
#!/usr/bin/env python3
import config
2016-02-10 16:17:19 +01:00
import html
2016-02-10 15:35:26 +01:00
import os
import tempfile
from bottle import redirect, request, route, run, view
2016-02-17 16:42:46 +01:00
import libbmc.doi as doi
import libbmc.citations.pdf as pdf
2016-02-10 15:35:26 +01:00
def format_citation(doi_url):
if doi_url is None:
return {
"doi": None,
"oa": None,
"sharable": None
}
canonical_doi = doi.to_canonical(doi_url)
sharable = doi.get_oa_policy(canonical_doi)
if sharable is not None:
sharable = "can" in sharable.values()
return {
"doi": canonical_doi,
"oa": doi.get_oa_version(canonical_doi),
"sharable": sharable
}
2016-02-10 15:35:26 +01:00
@route("/upload", method="POST")
@view("template.tpl")
def do_upload():
"""
Handle uploaded PDF files and process them for citations.
"""
# Check uploaded file has been provided
upload = request.files.get('upload')
if upload is None:
return redirect("/")
# Check file is a PDF file
_, ext = os.path.splitext(upload.filename)
if ext not in (".pdf"):
return {
"params": {
"error": "File does not seem to be a valid PDF file."
}
}
# Process citations
with tempfile.NamedTemporaryFile() as fh:
upload.save(fh)
2016-02-10 16:17:19 +01:00
raw_citations = pdf.cermine_dois(fh.name,
override_local=config.CERMINE_PATH)
citations = {
html.unescape(k): format_citation(v)
2016-02-10 16:17:19 +01:00
for k, v in raw_citations.items()
}
2016-02-10 15:35:26 +01:00
return {
"params": {
2016-02-10 16:17:19 +01:00
"citations": citations,
"upload_name": upload.filename
2016-02-10 15:35:26 +01:00
}
}
@route("/<:re:.+>")
def redirect_catchall():
"""
Redirect everything else to /.
"""
return redirect("/")
@route("/")
@view("template.tpl")
def index():
"""
Main index view, upload form.
"""
return {
2016-02-10 16:17:19 +01:00
"params": {}
2016-02-10 15:35:26 +01:00
}
if __name__ == "__main__":
run(host=config.HOST, port=config.PORT, debug=config.DEBUG)