Add tags support
Also fix misc bugs * Update README.md doc. * Fix relationships not shown in JSON API for a given paper if relationship was not referenced. * Handle tags
This commit is contained in:
parent
7e80aacdb7
commit
4a67e974b8
153
README.md
153
README.md
@ -3,6 +3,30 @@ Metadata for arXiv
|
||||
|
||||
The goal of this repository is to provide a minimal API to put metadata on arXiv papers.
|
||||
|
||||
Disclaimer: This code is not scalable nor ready to run in production. In
|
||||
particular, it might be error-prone, and do not try to be resilient and keep
|
||||
trace of errors. It is here as a proof of concept and to back [this
|
||||
article](TODO) with some code. However, the `reference_fetcher` part is working
|
||||
quite well, and was able to extract most of the references from arXiv papers I
|
||||
tested it on. Note that it is quite long to run it on a paper, mainly due to
|
||||
the latency in [Crossref API](http://search.crossref.org/).
|
||||
|
||||
|
||||
## Special thanks
|
||||
|
||||
Under the hood, this code uses the wonderful [Crossref
|
||||
API](http://search.crossref.org/) for reference parsing to DOI, which works
|
||||
really well and with a very large index.
|
||||
|
||||
It also uses the [Dissemin API](http://beta.dissem.in/) in the
|
||||
`reference_fetcher` to try to find Open access versions of referenced papers.
|
||||
|
||||
It works using the Open access [arXiv.org](http://arxiv.org) repository,
|
||||
without which it would be really difficult to achieve similar thing, due to
|
||||
paywalls and lack of sources. It also uses their
|
||||
[API](http://arxiv.org/help/api) to fetch DOIs from arXiv id and conversely.
|
||||
|
||||
|
||||
## Introduction
|
||||
|
||||
Most of the published scientific papers are availabe online, as preprints. For
|
||||
@ -218,6 +242,95 @@ Accept: application/vnd.api+json
|
||||
```
|
||||
|
||||
|
||||
### Get tags
|
||||
|
||||
```
|
||||
GET /tags
|
||||
Accept: application/vnd.api+json
|
||||
```
|
||||
|
||||
Filtering is possible using ``id=ID``, ``name=NAME`` or any combination of
|
||||
these GET parameters. Other parameters are ignored.
|
||||
|
||||
```json
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
"type": "tags",
|
||||
"id": 1,
|
||||
"attributes": {
|
||||
"name": "foobar",
|
||||
},
|
||||
"links": {
|
||||
"self": "/tags/1"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### Get a tag by id
|
||||
|
||||
```
|
||||
GET /tag/1
|
||||
Accept: application/vnd.api+json
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"data": {
|
||||
"type": "papers",
|
||||
"id": 1,
|
||||
"attributes": {
|
||||
"doi": "10.1126/science.1252319",
|
||||
"arxiv_id": "1401.2910"
|
||||
},
|
||||
"links": {
|
||||
"self": "/papers/1"
|
||||
},
|
||||
"relationships": {
|
||||
"cite": {
|
||||
"links": {
|
||||
"related": "/papers/1/relationships/cite"
|
||||
}
|
||||
},
|
||||
…
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Create a tag
|
||||
|
||||
```
|
||||
POST /tags
|
||||
Content-Type: application/vnd.api+json
|
||||
Accept: application/vnd.api+json
|
||||
|
||||
{
|
||||
"data": {
|
||||
"name": "foobar",
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"data": {
|
||||
"type": "tags",
|
||||
"id": 1,
|
||||
"attributes": {
|
||||
"name": "foobar",
|
||||
},
|
||||
"links": {
|
||||
"self": "/tags/1"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### Create a relationship between two papers
|
||||
|
||||
```
|
||||
@ -236,6 +349,26 @@ Accept: application/vnd.api+json
|
||||
Response is empty HTTP 204.
|
||||
|
||||
|
||||
### Add a tag to a paper
|
||||
|
||||
```
|
||||
POST /papers/1/relationships/tags
|
||||
Content-Type: application/vnd.api+json
|
||||
Accept: application/vnd.api+json
|
||||
|
||||
{
|
||||
"data": [
|
||||
{ "type": "tags", "id": "2" },
|
||||
...
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
`id` is the id of the tag, which has to be created previously.
|
||||
|
||||
Response is empty HTTP 204.
|
||||
|
||||
|
||||
### Delete a paper and associated relationships
|
||||
|
||||
```
|
||||
@ -264,6 +397,26 @@ Accept: application/vnd.api+json
|
||||
Response is empty HTTP 204.
|
||||
|
||||
|
||||
### Deleting a tag for a paper
|
||||
|
||||
```
|
||||
DELETE /papers/1/relationships/tags
|
||||
Content-Type: application/vnd.api+json
|
||||
Accept: application/vnd.api+json
|
||||
|
||||
{
|
||||
"data": [
|
||||
{ "type": "tags", "id": "2" },
|
||||
...
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
`id` is the id of the tag.
|
||||
|
||||
Response is empty HTTP 204.
|
||||
|
||||
|
||||
## Associated library
|
||||
|
||||
`reference_fetcher` is a module you can use to:
|
||||
|
79
database.py
79
database.py
@ -4,7 +4,7 @@ This file contains the database schema in SQLAlchemy format.
|
||||
import sqlite3
|
||||
|
||||
from sqlalchemy import event
|
||||
from sqlalchemy import Column, ForeignKey, Integer, String
|
||||
from sqlalchemy import Column, ForeignKey, Integer, String, Table
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import relationship as sqlalchemy_relationship
|
||||
@ -24,9 +24,9 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
|
||||
cursor.close()
|
||||
|
||||
|
||||
class Association(Base):
|
||||
class RelationshipAssociation(Base):
|
||||
# Relationships are to be read "left RELATION right"
|
||||
__tablename__ = "association"
|
||||
__tablename__ = "relationship_association"
|
||||
id = Column(Integer, primary_key=True)
|
||||
left_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"))
|
||||
right_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"))
|
||||
@ -45,6 +45,12 @@ class Association(Base):
|
||||
back_populates="related_to",
|
||||
passive_deletes=True)
|
||||
|
||||
tag_association_table = Table(
|
||||
'tag_association', Base.metadata,
|
||||
Column('paper_id', Integer, ForeignKey('papers.id', ondelete="CASCADE")),
|
||||
Column('tag_id', Integer, ForeignKey('tags.id', ondelete="CASCADE"))
|
||||
)
|
||||
|
||||
|
||||
class Paper(Base):
|
||||
__tablename__ = "papers"
|
||||
@ -52,15 +58,19 @@ class Paper(Base):
|
||||
doi = Column(String(), nullable=True, unique=True)
|
||||
arxiv_id = Column(String(25), nullable=True, unique=True)
|
||||
# related_to are papers related to this paper (this_paper R …)
|
||||
related_to = sqlalchemy_relationship("Association",
|
||||
foreign_keys="Association.left_id",
|
||||
related_to = sqlalchemy_relationship("RelationshipAssociation",
|
||||
foreign_keys="RelationshipAssociation.left_id",
|
||||
back_populates="left_paper",
|
||||
passive_deletes=True)
|
||||
# related_by are papers referenced by this paper (… R this_paper)
|
||||
related_by = sqlalchemy_relationship("Association",
|
||||
foreign_keys="Association.right_id",
|
||||
related_by = sqlalchemy_relationship("RelationshipAssociation",
|
||||
foreign_keys="RelationshipAssociation.right_id",
|
||||
back_populates="right_paper",
|
||||
passive_deletes=True)
|
||||
# Tags relationship
|
||||
tags = sqlalchemy_relationship("Tag",
|
||||
secondary=tag_association_table,
|
||||
backref="papers")
|
||||
|
||||
def __repr__(self):
|
||||
return "<Paper(id='%d', doi='%s', arxiv_id='%s')>" % (
|
||||
@ -69,22 +79,12 @@ class Paper(Base):
|
||||
self.arxiv_id,
|
||||
)
|
||||
|
||||
def json_api_repr(self):
|
||||
def json_api_repr(self, db):
|
||||
"""
|
||||
Dict to dump for the JSON API.
|
||||
"""
|
||||
relationships = [a.relationship.name for a in self.related_to]
|
||||
return {
|
||||
"types": self.__tablename__,
|
||||
"id": self.id,
|
||||
"attributes": {
|
||||
"doi": self.doi,
|
||||
"arxiv_id": self.arxiv_id,
|
||||
},
|
||||
"links": {
|
||||
"self": "/papers/%d" % (self.id,)
|
||||
},
|
||||
"relationships": {
|
||||
relationships = [i.name for i in db.query(Relationship).all()]
|
||||
relationships_dict = {
|
||||
k: {
|
||||
"links": {
|
||||
"related": (
|
||||
@ -95,6 +95,22 @@ class Paper(Base):
|
||||
}
|
||||
for k in relationships
|
||||
}
|
||||
relationships_dict["tags"] = {
|
||||
"links": {
|
||||
"related": "/papers/%d/relationships/tags" % (self.id,)
|
||||
}
|
||||
}
|
||||
return {
|
||||
"types": self.__tablename__,
|
||||
"id": self.id,
|
||||
"attributes": {
|
||||
"doi": self.doi,
|
||||
"arxiv_id": self.arxiv_id,
|
||||
},
|
||||
"links": {
|
||||
"self": "/papers/%d" % (self.id,)
|
||||
},
|
||||
"relationships": relationships_dict
|
||||
}
|
||||
|
||||
|
||||
@ -102,6 +118,27 @@ class Relationship(Base):
|
||||
__tablename__ = "relationships"
|
||||
id = Column(Integer, primary_key=True)
|
||||
name = Column(String(), unique=True)
|
||||
associations = sqlalchemy_relationship("Association",
|
||||
associations = sqlalchemy_relationship("RelationshipAssociation",
|
||||
back_populates="relationship",
|
||||
passive_deletes=True)
|
||||
|
||||
|
||||
class Tag(Base):
|
||||
__tablename__ = "tags"
|
||||
id = Column(Integer, primary_key=True)
|
||||
name = Column(String(), unique=True)
|
||||
|
||||
def json_api_repr(self):
|
||||
"""
|
||||
Dict to dump for the JSON API.
|
||||
"""
|
||||
return {
|
||||
"types": self.__tablename__,
|
||||
"id": self.id,
|
||||
"attributes": {
|
||||
"name": self.name,
|
||||
},
|
||||
"links": {
|
||||
"self": "/tags/%d" % (self.id,)
|
||||
}
|
||||
}
|
||||
|
6
main.py
6
main.py
@ -41,7 +41,7 @@ def index():
|
||||
}))
|
||||
|
||||
app.get("/papers", callback=routes.get.fetch_papers)
|
||||
app.get("/papers/<id:int>", callback=routes.get.fetch_by_id)
|
||||
app.get("/papers/<id:int>", callback=routes.get.fetch_papers_by_id)
|
||||
app.get("/papers/<id:int>/relationships/<name>",
|
||||
callback=routes.get.fetch_relationship)
|
||||
app.get("/papers/<id:int>/<name>",
|
||||
@ -51,8 +51,12 @@ app.route("/papers/<id:int>", method="DELETE",
|
||||
app.route("/papers/<id:int>/relationships/<name>", method="DELETE",
|
||||
callback=routes.delete.delete_relationship)
|
||||
|
||||
app.get("/tags", callback=routes.get.fetch_tags)
|
||||
app.get("/tags/<id:int>", callback=routes.get.fetch_tags_by_id)
|
||||
|
||||
|
||||
app.post("/papers", callback=routes.post.create_paper)
|
||||
app.post("/tags", callback=routes.post.create_tag)
|
||||
|
||||
app.post("/papers/<id:int>/relationships/<name>",
|
||||
callback=routes.post.update_relationships)
|
||||
|
@ -73,7 +73,21 @@ def delete_relationship(id, name, db):
|
||||
return bottle.HTTPError(403, "Forbidden")
|
||||
# Delete all the requested relationships
|
||||
for i in data:
|
||||
relationship = (db.query(database.Association)
|
||||
if i["type"] == "tags":
|
||||
# Handle tags separately
|
||||
tag = db.query(database.Tag).filter_by(id=i["id"]).first()
|
||||
paper = db.query(database.Paper).filter_by(id=id).first()
|
||||
if paper is None or tag is None:
|
||||
# An error occurred => 403
|
||||
return bottle.HTTPError(403, "Forbidden")
|
||||
try:
|
||||
paper.tags.remove(tag)
|
||||
except ValueError:
|
||||
# An error occurred => 403
|
||||
return bottle.HTTPError(403, "Forbidden")
|
||||
db.flush()
|
||||
else:
|
||||
relationship = (db.query(database.RelationshipAssociation)
|
||||
.filter_by(left_id=id, right_id=i["id"])
|
||||
.filter(database.Relationship.name == name)
|
||||
.first())
|
||||
|
107
routes/get.py
107
routes/get.py
@ -56,14 +56,14 @@ def fetch_papers(db):
|
||||
resources = db.query(database.Paper).filter_by(**filters).all()
|
||||
if resources:
|
||||
return tools.APIResponse(tools.pretty_json({
|
||||
"data": [resource.json_api_repr() for resource in resources]
|
||||
"data": [resource.json_api_repr(db) for resource in resources]
|
||||
}))
|
||||
return bottle.HTTPError(404, "Not found")
|
||||
|
||||
|
||||
def fetch_by_id(id, db):
|
||||
def fetch_papers_by_id(id, db):
|
||||
"""
|
||||
Fetch a resource identified by its internal id.
|
||||
Fetch a paper identified by its internal id.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
@ -102,7 +102,7 @@ def fetch_by_id(id, db):
|
||||
resource = db.query(database.Paper).filter_by(id=id).first()
|
||||
if resource:
|
||||
return tools.APIResponse(tools.pretty_json({
|
||||
"data": resource.json_api_repr()
|
||||
"data": resource.json_api_repr(db)
|
||||
}))
|
||||
return bottle.HTTPError(404, "Not found")
|
||||
|
||||
@ -152,6 +152,14 @@ def fetch_relationship(id, name, db):
|
||||
"data": [
|
||||
]
|
||||
}
|
||||
# Tags are handled differently
|
||||
if name == "tags":
|
||||
for t in resource.tags:
|
||||
response["data"].append({
|
||||
"type": name,
|
||||
"id": t.id
|
||||
})
|
||||
else:
|
||||
if reversed:
|
||||
relationships = resource.related_by
|
||||
else:
|
||||
@ -161,3 +169,94 @@ def fetch_relationship(id, name, db):
|
||||
response["data"].append({"type": name, "id": r.right_id})
|
||||
return tools.APIResponse(tools.pretty_json(response))
|
||||
return bottle.HTTPError(404, "Not found")
|
||||
|
||||
|
||||
def fetch_tags(db):
|
||||
"""
|
||||
Fetch all matching tags.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
GET /tags
|
||||
Accept: application/vnd.api+json
|
||||
|
||||
|
||||
Filtering is possible using ``id=ID``, ``name=NAME`` or any combination of
|
||||
these GET parameters. Other parameters are ignored.
|
||||
|
||||
|
||||
.. code-block:: json
|
||||
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
"type": "tags",
|
||||
"id": 1,
|
||||
"attributes": {
|
||||
"name": "foobar",
|
||||
},
|
||||
"links": {
|
||||
"self": "/tags/1"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
:param db: A database session, injected by the ``Bottle`` plugin.
|
||||
:returns: An ``HTTPResponse``.
|
||||
"""
|
||||
filters = {k: bottle.request.params[k]
|
||||
for k in bottle.request.params
|
||||
if k in ["id", "name"]}
|
||||
resources = db.query(database.Tags).filter_by(**filters).all()
|
||||
if resources:
|
||||
return tools.APIResponse(tools.pretty_json({
|
||||
"data": [resource.json_api_repr() for resource in resources]
|
||||
}))
|
||||
return bottle.HTTPError(404, "Not found")
|
||||
|
||||
|
||||
def fetch_tags_by_id(id, db):
|
||||
"""
|
||||
Fetch a tag identified by its internal id.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
GET /tag/1
|
||||
Accept: application/vnd.api+json
|
||||
|
||||
|
||||
.. code-block:: json
|
||||
|
||||
{
|
||||
"data": {
|
||||
"type": "papers",
|
||||
"id": 1,
|
||||
"attributes": {
|
||||
"doi": "10.1126/science.1252319",
|
||||
"arxiv_id": "1401.2910"
|
||||
},
|
||||
"links": {
|
||||
"self": "/papers/1"
|
||||
},
|
||||
"relationships": {
|
||||
"cite": {
|
||||
"links": {
|
||||
"related": "/papers/1/relationships/cite"
|
||||
}
|
||||
},
|
||||
…
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
:param id: The id of the requested tag.
|
||||
:param db: A database session, injected by the ``Bottle`` plugin.
|
||||
:returns: An ``HTTPResponse``.
|
||||
"""
|
||||
resource = db.query(database.Tags).filter_by(id=id).first()
|
||||
if resource:
|
||||
return tools.APIResponse(tools.pretty_json({
|
||||
"data": resource.json_api_repr()
|
||||
}))
|
||||
return bottle.HTTPError(404, "Not found")
|
||||
|
@ -12,7 +12,7 @@ from reference_fetcher import arxiv
|
||||
|
||||
def create_paper(db):
|
||||
"""
|
||||
Create a new resource identified by its DOI or arXiv eprint id.
|
||||
Create a new paper identified by its DOI or arXiv eprint id.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
@ -76,7 +76,7 @@ def create_paper(db):
|
||||
|
||||
# Return the resource
|
||||
response = {
|
||||
"data": paper.json_api_repr()
|
||||
"data": paper.json_api_repr(db)
|
||||
}
|
||||
# Import "cite" relation
|
||||
add_cite_relationship(paper, db)
|
||||
@ -153,7 +153,6 @@ def add_cite_relationship(paper, db):
|
||||
:param db: A database session
|
||||
:returns: Nothing.
|
||||
"""
|
||||
# TODO: Known bug: too many levels of recursion!
|
||||
# If paper is on arXiv
|
||||
if paper.arxiv_id is not None:
|
||||
# Get the cited DOIs
|
||||
@ -169,7 +168,8 @@ def add_cite_relationship(paper, db):
|
||||
# If paper does not exist in db, add it
|
||||
right_paper = create_by_doi(doi, db)
|
||||
# Update cite relationship for this paper, recursively
|
||||
add_cite_relationship(right_paper, db)
|
||||
# TODO: Known bug: too many levels of recursion!
|
||||
# add_cite_relationship(right_paper, db)
|
||||
# Update the relationships
|
||||
update_relationship_backend(paper.id, right_paper.id, "cite", db)
|
||||
# If paper is not on arXiv, nothing to do
|
||||
@ -216,6 +216,17 @@ def update_relationships(id, name, db):
|
||||
return bottle.HTTPError(403, "Forbidden")
|
||||
# Update all the relationships
|
||||
for i in data:
|
||||
if i["type"] == "tags":
|
||||
# Handle tags separately
|
||||
tag = db.query(database.Tag).filter_by(id=i["id"]).first()
|
||||
paper = db.query(database.Paper).filter_by(id=id).first()
|
||||
if paper is None or tag is None:
|
||||
# An error occurred => 403
|
||||
return bottle.HTTPError(403, "Forbidden")
|
||||
paper.tags.append(tag)
|
||||
db.add(paper)
|
||||
db.flush()
|
||||
else:
|
||||
updated = update_relationship_backend(id, i["id"], name, db)
|
||||
if updated is None:
|
||||
# An error occurred => 403
|
||||
@ -246,7 +257,7 @@ def update_relationship_backend(left_id, right_id, name, db):
|
||||
db.add(relationship)
|
||||
db.flush()
|
||||
# Update the relationship
|
||||
a = database.Association(relationship_id=relationship.id)
|
||||
a = database.RelationshipAssociation(relationship_id=relationship.id)
|
||||
a.right_paper = right_paper
|
||||
left_paper.related_to.append(a)
|
||||
try:
|
||||
@ -257,3 +268,70 @@ def update_relationship_backend(left_id, right_id, name, db):
|
||||
db.rollback()
|
||||
return None
|
||||
return left_paper
|
||||
|
||||
|
||||
def create_tag(db):
|
||||
"""
|
||||
Create a new tag.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
POST /tags
|
||||
Content-Type: application/vnd.api+json
|
||||
Accept: application/vnd.api+json
|
||||
|
||||
{
|
||||
"data": {
|
||||
"name": "foobar",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
.. code-block:: json
|
||||
|
||||
{
|
||||
"data": {
|
||||
"type": "tags",
|
||||
"id": 1,
|
||||
"attributes": {
|
||||
"name": "foobar",
|
||||
},
|
||||
"links": {
|
||||
"self": "/tags/1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
:param db: A database session, injected by the ``Bottle`` plugin.
|
||||
:returns: An ``HTTPResponse``.
|
||||
"""
|
||||
data = json.loads(bottle.request.body.read().decode("utf-8"))
|
||||
# Validate the request
|
||||
if("data" not in data or
|
||||
"type" not in data["data"] or
|
||||
data["data"]["type"] != "tags" or
|
||||
"name" not in data["data"]):
|
||||
return bottle.HTTPError(403, "Forbidden")
|
||||
|
||||
data = data["data"]
|
||||
|
||||
tag = database.Tag(name=data["name"])
|
||||
|
||||
# Add it to the database
|
||||
try:
|
||||
db.add(tag)
|
||||
db.flush()
|
||||
except IntegrityError:
|
||||
# Unique constraint violation, paper already exists
|
||||
db.rollback()
|
||||
return bottle.HTTPError(409, "Conflict")
|
||||
|
||||
# Return the resource
|
||||
response = {
|
||||
"data": tag.json_api_repr()
|
||||
}
|
||||
# Return 200 with the correct body
|
||||
headers = {"Location": "/tags/%d" % (tag.id,)}
|
||||
return tools.APIResponse(status=200,
|
||||
body=tools.pretty_json(response),
|
||||
headers=headers)
|
||||
|
Loading…
Reference in New Issue
Block a user