Browse Source

Basic API to put and fetch some papers

master
Lucas Verney 7 years ago
parent
commit
357873d10c
  1. 40
      database.py
  2. 2
      fetch_references.py
  3. 65
      main.py
  4. 46
      reference_fetcher/arxiv.py
  5. 4
      routes/__init__.py
  6. 161
      routes/get.py
  7. 113
      routes/post.py
  8. 14
      tools.py

40
database.py

@ -0,0 +1,40 @@ @@ -0,0 +1,40 @@
"""
This file contains the database schema in SQLAlchemy format.
"""
from sqlalchemy import Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Paper(Base):
__tablename__ = 'papers'
id = Column(Integer, primary_key=True)
doi = Column(String(), nullable=True, unique=True)
arxiv_id = Column(String(25), nullable=True, unique=True)
def __repr__(self):
return "<Paper(id='%d', doi='%s', arxiv_id='%s')>" % (
self.id,
self.doi,
self.arxiv_id,
)
def json_api_repr(self):
"""
Dict to dump for the JSON API.
"""
return {
"types": self.__tablename__,
"id": self.id,
"attributes": {
"doi": self.doi,
"arxiv_id": self.arxiv_id,
},
"links": {
"self": "/papers/%d" % (self.id,)
},
"relationships": {
# TODO
}
}

2
fetch_references.py

@ -15,4 +15,4 @@ if __name__ == "__main__": @@ -15,4 +15,4 @@ if __name__ == "__main__":
if os.path.isfile(sys.argv[1]):
pprint.pprint(bbl.get_dois(sys.argv[1]))
else:
pprint.pprint(arxiv.get_dois(sys.argv[1]))
pprint.pprint(arxiv.get_cited_dois(sys.argv[1]))

65
main.py

@ -1,31 +1,56 @@ @@ -1,31 +1,56 @@
#!/usr/bin/env python3
from bottle import get, post, run
import bottle
from bottle.ext import sqlalchemy
from sqlalchemy import create_engine, Column, Integer, Sequence, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine, event
from sqlalchemy.engine import Engine
import database
import routes
@get("/doi/<doi:path>")
def doi(doi):
"""
GET /doi/<DOI>
# Initialize db and include the SQLAlchemy plugin in bottle
engine = create_engine('sqlite:///:memory:', echo=True)
{}
"""
# TODO
pass
app = bottle.Bottle()
plugin = sqlalchemy.Plugin(
# SQLAlchemy engine created with create_engine function.
engine,
# SQLAlchemy metadata, required only if create=True.
database.Base.metadata,
# Keyword used to inject session database in a route (default 'db').
keyword='db',
# If it is true, execute `metadata.create_all(engine)` when plugin is
# applied (default False).
create=True,
# If it is true, plugin commit changes after route is executed (default
# True).
commit=True,
# If it is true and keyword is not defined, plugin uses **kwargs argument
# to inject session database (default False).
use_kwargs=False
)
app.install(plugin)
@post("/doi/<doi:path>")
def doi_post(doi):
"""
POST /doi/<DOI>
{}
"""
# TODO
pass
# Auto enable foreign keys for SQLite
@event.listens_for(Engine, "connect")
def set_sqlite_pragma(dbapi_connection, connection_record):
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
# Routes
app.get("/papers", callback=routes.get.fetch_papers)
app.get("/papers/<id:int>", callback=routes.get.fetch_by_id)
# TODO: Fetch relationships
app.post("/papers", callback=routes.post.create_paper)
# TODO: Update relationships
if __name__ == "__main__":
run(host='localhost', port=8080, debug=True)
app.run(host='localhost', port=8080, debug=True)

46
reference_fetcher/arxiv.py

@ -4,6 +4,7 @@ This file contains all the arXiv-specific functions. @@ -4,6 +4,7 @@ This file contains all the arXiv-specific functions.
import io
import requests
import tarfile
import xml.etree.ElementTree
from . import bbl
@ -38,7 +39,7 @@ def bbl_from_arxiv(eprint): @@ -38,7 +39,7 @@ def bbl_from_arxiv(eprint):
return bbl_files
def get_dois(eprint):
def get_cited_dois(eprint):
"""
Get the .bbl files (if any) of a given preprint.
@ -52,3 +53,46 @@ def get_dois(eprint): @@ -52,3 +53,46 @@ def get_dois(eprint):
for bbl_file in bbl_files:
dois.update(bbl.get_dois(bbl_file))
return dois
def get_arxiv_eprint_from_doi(doi):
"""
Get the arXiv eprint id for a given DOI.
Params:
- doi is the DOI of the resource to look for.
Returns the arXiv eprint id, or None if not found.
"""
r = requests.get("http://export.arxiv.org/api/query",
params={
"search_query": "doi:%s" % (doi,),
"max_results": 1
})
e = xml.etree.ElementTree.fromstring(r.content)
for entry in e.iter("{http://www.w3.org/2005/Atom}entry"):
id = entry.find("{http://www.w3.org/2005/Atom}id").text
return id.replace("http://arxiv.org/abs/", "")
return None
def get_doi(eprint):
"""
Get the associated DOI for a given arXiv eprint.
Params:
- eprint is the arXiv eprint id.
Returns the DOI if any, or None.
"""
r = requests.get("http://export.arxiv.org/api/query",
params={
"id_list": eprint,
"max_results": 1
})
e = xml.etree.ElementTree.fromstring(r.content)
for entry in e.iter("{http://www.w3.org/2005/Atom}entry"):
doi = entry.find("{http://arxiv.org/schemas/atom}doi")
if doi is not None:
return doi.text
return None

4
routes/__init__.py

@ -0,0 +1,4 @@ @@ -0,0 +1,4 @@
from . import get
from . import post
__all__ = ["get", "post"]

161
routes/get.py

@ -0,0 +1,161 @@ @@ -0,0 +1,161 @@
"""
This file contains GET routes methods.
"""
import bottle
import database
import tools
def fetch_papers(db):
"""
Fetch all matching papers.
```
GET /papers
Accept: application/vnd.api+json
```
Filtering is possible using `id=ID`, `doi=DOI`, `arxiv_id=ARXIV_ID` or any
combination of these GET parameters. Other parameters are ignored.
```
{
"data": [
{
"type": "papers",
"id": 1,
"attributes": {
"doi": "TODO",
"arxiv_id": "TODO"
},
"links": {
"self": "TODO"
},
"relationships": {
}
}
]
}
```
"""
filters = {k: bottle.request.params[k]
for k in bottle.request.params
if k in ["id", "doi", "arxiv_id"]}
resources = db.query(database.Paper).filter_by(**filters).all()
if resources:
return tools.pretty_json({
"data": [resource.json_api_repr() for resource in resources]
})
return bottle.HTTPError(404, "Not found")
def fetch_by_id(id, db):
"""
Fetch a resource identified by its internal id.
```
GET /id/<id>
Accept: application/vnd.api+json
```
```
{
"data": {
{
"type": "papers",
"id": 1,
"attributes": {
"doi": "TODO",
"arxiv_id": "TODO"
},
"links": {
"self": "TODO"
},
"relationships": {
}
}
}
}
```
"""
resource = db.query(database.Paper).filter_by(id=id).first()
if resource:
return tools.pretty_json({
"data": resource.json_api_repr()
})
return bottle.HTTPError(404, "Not found")
def fetch_by_doi(doi, db):
"""
Fetch a resource identified by its DOI.
```
GET /doi/<DOI>
Accept: application/vnd.api+json
```
```
{
"data": {
{
"type": "papers",
"id": 1,
"attributes": {
"doi": "TODO",
"arxiv_id": "TODO"
},
"links": {
"self": "TODO"
},
"relationships": {
}
}
}
}
```
"""
resource = db.query(database.Paper).filter_by(doi=doi).first()
if resource:
return tools.pretty_json({
"data": resource.json_api_repr()
})
return bottle.HTTPError(404, "Not found")
def fetch_by_arxiv(arxiv, db):
"""
Fetch a resource identified by its arXiv eprint ID.
```
GET /arxiv/<arxiv_eprint_id>
Accept: application/vnd.api+json
```
```
{
"data": {
{
"type": "papers",
"id": 1,
"attributes": {
"doi": "TODO",
"arxiv_id": "TODO"
},
"links": {
"self": "TODO"
},
"relationships": {
}
}
}
}
```
"""
resource = db.query(database.Paper).filter_by(arxiv_id=arxiv).first()
if resource:
return tools.pretty_json({
"data": resource.json_api_repr()
})
return bottle.HTTPError(404, "Not found")

113
routes/post.py

@ -0,0 +1,113 @@ @@ -0,0 +1,113 @@
"""
This file contains POST routes methods.
"""
import bottle
import json
from sqlalchemy.exc import IntegrityError
import database
import tools
from reference_fetcher import arxiv
def create_paper(db):
"""
Create a new resource identified by its DOI or arXiv eprint id.
```
POST /papers
Content-Type: application/vnd.api+json
Accept: application/vnd.api+json
{
"data": {
"doi": "DOI",
// OR
"arxiv_id": "ARXIV_ID"
}
}
```
```
{} TODO
```
"""
data = json.loads(bottle.request.body.read().decode("utf-8"))
# Validate the request
if("data" not in data or
"type" not in data["data"] or
data["data"]["type"] != "papers" or
("doi" not in data["data"] and "arxiv_id" not in data["data"])):
return bottle.HTTPError(403, "Forbidden")
data = data["data"]
if "doi" in data:
paper = create_by_doi(data["doi"], db)
elif "arxiv_id" in data:
paper = create_by_arxiv(data["arxiv"], db)
if paper is None:
return bottle.HTTPError(409, "Conflict")
# Return the resource
response = {
"data": paper.json_api_repr()
}
# Note: Return a 202 as the resource has been accepted but is not yet
# processed, especially since its relationships have not yet been fetched.
# TODO: Redirection
return bottle.HTTPResponse(status=202, body=tools.pretty_json(response))
def create_by_doi(doi, db):
"""
Create a new resource identified by its DOI, if it does not exist.
Return None if insertion failed, the Paper object otherwise.
"""
paper = database.Paper(doi=doi)
# Try to fetch an arXiv id
arxiv_id = arxiv.get_arxiv_eprint_from_doi(doi)
if arxiv_id:
paper.arxiv_id = arxiv_id
# Add it to the database
try:
db.add(paper)
db.flush()
except IntegrityError:
# Unique constraint violation, paper already exists
db.rollback()
return None
# Return the paper
return paper
def create_by_arxiv(arxiv, db):
"""
Create a new resource identified by its arXiv eprint ID, if it does not
exist.
Return None if insertion failed, the Paper object otherwise.
"""
paper = database.Paper(arxiv_id=arxiv)
# Try to fetch an arXiv id
doi = arxiv.get_doi(arxiv)
if doi:
paper.doi = doi
# Add it to the database
try:
db.add(paper)
db.flush()
except IntegrityError:
# Unique constraint violation, paper already exists
db.rollback()
return None
# Return the paper
return paper

14
tools.py

@ -0,0 +1,14 @@ @@ -0,0 +1,14 @@
"""
Various utility functions.
"""
import json
def pretty_json(data):
"""
Return pretty printed JSON-formatted string.
"""
return json.dumps(data,
sort_keys=True,
indent=4,
separators=(',', ': '))
Loading…
Cancel
Save