Basic API to put and fetch some papers
This commit is contained in:
parent
a4e3538102
commit
357873d10c
40
database.py
Normal file
40
database.py
Normal file
@ -0,0 +1,40 @@
|
||||
"""
|
||||
This file contains the database schema in SQLAlchemy format.
|
||||
"""
|
||||
from sqlalchemy import Column, Integer, String
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
class Paper(Base):
|
||||
__tablename__ = 'papers'
|
||||
id = Column(Integer, primary_key=True)
|
||||
doi = Column(String(), nullable=True, unique=True)
|
||||
arxiv_id = Column(String(25), nullable=True, unique=True)
|
||||
|
||||
def __repr__(self):
|
||||
return "<Paper(id='%d', doi='%s', arxiv_id='%s')>" % (
|
||||
self.id,
|
||||
self.doi,
|
||||
self.arxiv_id,
|
||||
)
|
||||
|
||||
def json_api_repr(self):
|
||||
"""
|
||||
Dict to dump for the JSON API.
|
||||
"""
|
||||
return {
|
||||
"types": self.__tablename__,
|
||||
"id": self.id,
|
||||
"attributes": {
|
||||
"doi": self.doi,
|
||||
"arxiv_id": self.arxiv_id,
|
||||
},
|
||||
"links": {
|
||||
"self": "/papers/%d" % (self.id,)
|
||||
},
|
||||
"relationships": {
|
||||
# TODO
|
||||
}
|
||||
}
|
@ -15,4 +15,4 @@ if __name__ == "__main__":
|
||||
if os.path.isfile(sys.argv[1]):
|
||||
pprint.pprint(bbl.get_dois(sys.argv[1]))
|
||||
else:
|
||||
pprint.pprint(arxiv.get_dois(sys.argv[1]))
|
||||
pprint.pprint(arxiv.get_cited_dois(sys.argv[1]))
|
||||
|
67
main.py
67
main.py
@ -1,31 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
from bottle import get, post, run
|
||||
import bottle
|
||||
from bottle.ext import sqlalchemy
|
||||
from sqlalchemy import create_engine, Column, Integer, Sequence, String
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy import create_engine, event
|
||||
from sqlalchemy.engine import Engine
|
||||
|
||||
import database
|
||||
import routes
|
||||
|
||||
# Initialize db and include the SQLAlchemy plugin in bottle
|
||||
engine = create_engine('sqlite:///:memory:', echo=True)
|
||||
|
||||
app = bottle.Bottle()
|
||||
plugin = sqlalchemy.Plugin(
|
||||
# SQLAlchemy engine created with create_engine function.
|
||||
engine,
|
||||
# SQLAlchemy metadata, required only if create=True.
|
||||
database.Base.metadata,
|
||||
# Keyword used to inject session database in a route (default 'db').
|
||||
keyword='db',
|
||||
# If it is true, execute `metadata.create_all(engine)` when plugin is
|
||||
# applied (default False).
|
||||
create=True,
|
||||
# If it is true, plugin commit changes after route is executed (default
|
||||
# True).
|
||||
commit=True,
|
||||
# If it is true and keyword is not defined, plugin uses **kwargs argument
|
||||
# to inject session database (default False).
|
||||
use_kwargs=False
|
||||
)
|
||||
|
||||
app.install(plugin)
|
||||
|
||||
|
||||
@get("/doi/<doi:path>")
|
||||
def doi(doi):
|
||||
"""
|
||||
GET /doi/<DOI>
|
||||
|
||||
{}
|
||||
"""
|
||||
# TODO
|
||||
pass
|
||||
# Auto enable foreign keys for SQLite
|
||||
@event.listens_for(Engine, "connect")
|
||||
def set_sqlite_pragma(dbapi_connection, connection_record):
|
||||
cursor = dbapi_connection.cursor()
|
||||
cursor.execute("PRAGMA foreign_keys=ON")
|
||||
cursor.close()
|
||||
|
||||
|
||||
@post("/doi/<doi:path>")
|
||||
def doi_post(doi):
|
||||
"""
|
||||
POST /doi/<DOI>
|
||||
# Routes
|
||||
app.get("/papers", callback=routes.get.fetch_papers)
|
||||
app.get("/papers/<id:int>", callback=routes.get.fetch_by_id)
|
||||
|
||||
{}
|
||||
"""
|
||||
# TODO
|
||||
pass
|
||||
# TODO: Fetch relationships
|
||||
|
||||
|
||||
app.post("/papers", callback=routes.post.create_paper)
|
||||
|
||||
# TODO: Update relationships
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run(host='localhost', port=8080, debug=True)
|
||||
app.run(host='localhost', port=8080, debug=True)
|
||||
|
@ -4,6 +4,7 @@ This file contains all the arXiv-specific functions.
|
||||
import io
|
||||
import requests
|
||||
import tarfile
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from . import bbl
|
||||
|
||||
@ -38,7 +39,7 @@ def bbl_from_arxiv(eprint):
|
||||
return bbl_files
|
||||
|
||||
|
||||
def get_dois(eprint):
|
||||
def get_cited_dois(eprint):
|
||||
"""
|
||||
Get the .bbl files (if any) of a given preprint.
|
||||
|
||||
@ -52,3 +53,46 @@ def get_dois(eprint):
|
||||
for bbl_file in bbl_files:
|
||||
dois.update(bbl.get_dois(bbl_file))
|
||||
return dois
|
||||
|
||||
|
||||
def get_arxiv_eprint_from_doi(doi):
|
||||
"""
|
||||
Get the arXiv eprint id for a given DOI.
|
||||
|
||||
Params:
|
||||
- doi is the DOI of the resource to look for.
|
||||
|
||||
Returns the arXiv eprint id, or None if not found.
|
||||
"""
|
||||
r = requests.get("http://export.arxiv.org/api/query",
|
||||
params={
|
||||
"search_query": "doi:%s" % (doi,),
|
||||
"max_results": 1
|
||||
})
|
||||
e = xml.etree.ElementTree.fromstring(r.content)
|
||||
for entry in e.iter("{http://www.w3.org/2005/Atom}entry"):
|
||||
id = entry.find("{http://www.w3.org/2005/Atom}id").text
|
||||
return id.replace("http://arxiv.org/abs/", "")
|
||||
return None
|
||||
|
||||
|
||||
def get_doi(eprint):
|
||||
"""
|
||||
Get the associated DOI for a given arXiv eprint.
|
||||
|
||||
Params:
|
||||
- eprint is the arXiv eprint id.
|
||||
|
||||
Returns the DOI if any, or None.
|
||||
"""
|
||||
r = requests.get("http://export.arxiv.org/api/query",
|
||||
params={
|
||||
"id_list": eprint,
|
||||
"max_results": 1
|
||||
})
|
||||
e = xml.etree.ElementTree.fromstring(r.content)
|
||||
for entry in e.iter("{http://www.w3.org/2005/Atom}entry"):
|
||||
doi = entry.find("{http://arxiv.org/schemas/atom}doi")
|
||||
if doi is not None:
|
||||
return doi.text
|
||||
return None
|
||||
|
4
routes/__init__.py
Normal file
4
routes/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
from . import get
|
||||
from . import post
|
||||
|
||||
__all__ = ["get", "post"]
|
161
routes/get.py
Normal file
161
routes/get.py
Normal file
@ -0,0 +1,161 @@
|
||||
"""
|
||||
This file contains GET routes methods.
|
||||
"""
|
||||
import bottle
|
||||
|
||||
import database
|
||||
import tools
|
||||
|
||||
|
||||
def fetch_papers(db):
|
||||
"""
|
||||
Fetch all matching papers.
|
||||
|
||||
```
|
||||
GET /papers
|
||||
Accept: application/vnd.api+json
|
||||
```
|
||||
|
||||
Filtering is possible using `id=ID`, `doi=DOI`, `arxiv_id=ARXIV_ID` or any
|
||||
combination of these GET parameters. Other parameters are ignored.
|
||||
|
||||
```
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
"type": "papers",
|
||||
"id": 1,
|
||||
"attributes": {
|
||||
"doi": "TODO",
|
||||
"arxiv_id": "TODO"
|
||||
},
|
||||
"links": {
|
||||
"self": "TODO"
|
||||
},
|
||||
"relationships": {
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
"""
|
||||
filters = {k: bottle.request.params[k]
|
||||
for k in bottle.request.params
|
||||
if k in ["id", "doi", "arxiv_id"]}
|
||||
resources = db.query(database.Paper).filter_by(**filters).all()
|
||||
if resources:
|
||||
return tools.pretty_json({
|
||||
"data": [resource.json_api_repr() for resource in resources]
|
||||
})
|
||||
return bottle.HTTPError(404, "Not found")
|
||||
|
||||
|
||||
def fetch_by_id(id, db):
|
||||
"""
|
||||
Fetch a resource identified by its internal id.
|
||||
|
||||
```
|
||||
GET /id/<id>
|
||||
Accept: application/vnd.api+json
|
||||
```
|
||||
|
||||
```
|
||||
{
|
||||
"data": {
|
||||
{
|
||||
"type": "papers",
|
||||
"id": 1,
|
||||
"attributes": {
|
||||
"doi": "TODO",
|
||||
"arxiv_id": "TODO"
|
||||
},
|
||||
"links": {
|
||||
"self": "TODO"
|
||||
},
|
||||
"relationships": {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
"""
|
||||
resource = db.query(database.Paper).filter_by(id=id).first()
|
||||
if resource:
|
||||
return tools.pretty_json({
|
||||
"data": resource.json_api_repr()
|
||||
})
|
||||
return bottle.HTTPError(404, "Not found")
|
||||
|
||||
|
||||
def fetch_by_doi(doi, db):
|
||||
"""
|
||||
Fetch a resource identified by its DOI.
|
||||
|
||||
```
|
||||
GET /doi/<DOI>
|
||||
Accept: application/vnd.api+json
|
||||
```
|
||||
|
||||
```
|
||||
{
|
||||
"data": {
|
||||
{
|
||||
"type": "papers",
|
||||
"id": 1,
|
||||
"attributes": {
|
||||
"doi": "TODO",
|
||||
"arxiv_id": "TODO"
|
||||
},
|
||||
"links": {
|
||||
"self": "TODO"
|
||||
},
|
||||
"relationships": {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
"""
|
||||
resource = db.query(database.Paper).filter_by(doi=doi).first()
|
||||
if resource:
|
||||
return tools.pretty_json({
|
||||
"data": resource.json_api_repr()
|
||||
})
|
||||
return bottle.HTTPError(404, "Not found")
|
||||
|
||||
|
||||
def fetch_by_arxiv(arxiv, db):
|
||||
"""
|
||||
Fetch a resource identified by its arXiv eprint ID.
|
||||
|
||||
```
|
||||
GET /arxiv/<arxiv_eprint_id>
|
||||
Accept: application/vnd.api+json
|
||||
```
|
||||
|
||||
```
|
||||
{
|
||||
"data": {
|
||||
{
|
||||
"type": "papers",
|
||||
"id": 1,
|
||||
"attributes": {
|
||||
"doi": "TODO",
|
||||
"arxiv_id": "TODO"
|
||||
},
|
||||
"links": {
|
||||
"self": "TODO"
|
||||
},
|
||||
"relationships": {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
"""
|
||||
resource = db.query(database.Paper).filter_by(arxiv_id=arxiv).first()
|
||||
if resource:
|
||||
return tools.pretty_json({
|
||||
"data": resource.json_api_repr()
|
||||
})
|
||||
return bottle.HTTPError(404, "Not found")
|
113
routes/post.py
Normal file
113
routes/post.py
Normal file
@ -0,0 +1,113 @@
|
||||
"""
|
||||
This file contains POST routes methods.
|
||||
"""
|
||||
import bottle
|
||||
import json
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
import database
|
||||
import tools
|
||||
from reference_fetcher import arxiv
|
||||
|
||||
|
||||
def create_paper(db):
|
||||
"""
|
||||
Create a new resource identified by its DOI or arXiv eprint id.
|
||||
|
||||
```
|
||||
POST /papers
|
||||
Content-Type: application/vnd.api+json
|
||||
Accept: application/vnd.api+json
|
||||
|
||||
{
|
||||
"data": {
|
||||
"doi": "DOI",
|
||||
// OR
|
||||
"arxiv_id": "ARXIV_ID"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```
|
||||
{} TODO
|
||||
```
|
||||
"""
|
||||
data = json.loads(bottle.request.body.read().decode("utf-8"))
|
||||
# Validate the request
|
||||
if("data" not in data or
|
||||
"type" not in data["data"] or
|
||||
data["data"]["type"] != "papers" or
|
||||
("doi" not in data["data"] and "arxiv_id" not in data["data"])):
|
||||
return bottle.HTTPError(403, "Forbidden")
|
||||
|
||||
data = data["data"]
|
||||
|
||||
if "doi" in data:
|
||||
paper = create_by_doi(data["doi"], db)
|
||||
elif "arxiv_id" in data:
|
||||
paper = create_by_arxiv(data["arxiv"], db)
|
||||
|
||||
if paper is None:
|
||||
return bottle.HTTPError(409, "Conflict")
|
||||
|
||||
# Return the resource
|
||||
response = {
|
||||
"data": paper.json_api_repr()
|
||||
}
|
||||
# Note: Return a 202 as the resource has been accepted but is not yet
|
||||
# processed, especially since its relationships have not yet been fetched.
|
||||
# TODO: Redirection
|
||||
return bottle.HTTPResponse(status=202, body=tools.pretty_json(response))
|
||||
|
||||
|
||||
def create_by_doi(doi, db):
|
||||
"""
|
||||
Create a new resource identified by its DOI, if it does not exist.
|
||||
|
||||
Return None if insertion failed, the Paper object otherwise.
|
||||
"""
|
||||
paper = database.Paper(doi=doi)
|
||||
|
||||
# Try to fetch an arXiv id
|
||||
arxiv_id = arxiv.get_arxiv_eprint_from_doi(doi)
|
||||
if arxiv_id:
|
||||
paper.arxiv_id = arxiv_id
|
||||
|
||||
# Add it to the database
|
||||
try:
|
||||
db.add(paper)
|
||||
db.flush()
|
||||
except IntegrityError:
|
||||
# Unique constraint violation, paper already exists
|
||||
db.rollback()
|
||||
return None
|
||||
|
||||
# Return the paper
|
||||
return paper
|
||||
|
||||
|
||||
def create_by_arxiv(arxiv, db):
|
||||
"""
|
||||
Create a new resource identified by its arXiv eprint ID, if it does not
|
||||
exist.
|
||||
|
||||
Return None if insertion failed, the Paper object otherwise.
|
||||
"""
|
||||
paper = database.Paper(arxiv_id=arxiv)
|
||||
|
||||
# Try to fetch an arXiv id
|
||||
doi = arxiv.get_doi(arxiv)
|
||||
if doi:
|
||||
paper.doi = doi
|
||||
|
||||
# Add it to the database
|
||||
try:
|
||||
db.add(paper)
|
||||
db.flush()
|
||||
except IntegrityError:
|
||||
# Unique constraint violation, paper already exists
|
||||
db.rollback()
|
||||
return None
|
||||
|
||||
# Return the paper
|
||||
return paper
|
Loading…
Reference in New Issue
Block a user