From 4a67e974b82462398e008bdf63dd98cbfd3f2f95 Mon Sep 17 00:00:00 2001
From: "Phyks (Lucas Verney)" <phyks@phyks.me>
Date: Sat, 26 Dec 2015 01:25:38 +0100
Subject: [PATCH] Add tags support Also fix misc bugs

* Update README.md doc.
* Fix relationships not shown in JSON API for a given paper if relationship was
not referenced.
* Handle tags
---
 README.md        | 153 +++++++++++++++++++++++++++++++++++++++++++++++
 database.py      |  79 +++++++++++++++++-------
 main.py          |   6 +-
 routes/delete.py |  30 +++++++---
 routes/get.py    | 119 ++++++++++++++++++++++++++++++++----
 routes/post.py   |  96 ++++++++++++++++++++++++++---
 6 files changed, 434 insertions(+), 49 deletions(-)

diff --git a/README.md b/README.md
index f5dce56..baea074 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,30 @@ Metadata for arXiv
 
 The goal of this repository is to provide a minimal API to put metadata on arXiv papers.
 
+Disclaimer: This code is not scalable nor ready to run in production. In
+particular, it might be error-prone, and do not try to be resilient and keep
+trace of errors. It is here as a proof of concept and to back [this
+article](TODO) with some code. However, the `reference_fetcher` part is working
+quite well, and was able to extract most of the references from arXiv papers I
+tested it on. Note that it is quite long to run it on a paper, mainly due to
+the latency in [Crossref API](http://search.crossref.org/).
+
+
+## Special thanks
+
+Under the hood, this code uses the wonderful [Crossref
+API](http://search.crossref.org/) for reference parsing to DOI, which works
+really well and with a very large index.
+
+It also uses the [Dissemin API](http://beta.dissem.in/) in the
+`reference_fetcher` to try to find Open access versions of referenced papers.
+
+It works using the Open access [arXiv.org](http://arxiv.org) repository,
+without which it would be really difficult to achieve similar thing, due to
+paywalls and lack of sources. It also uses their
+[API](http://arxiv.org/help/api) to fetch DOIs from arXiv id and conversely.
+
+
 ## Introduction
 
 Most of the published scientific papers are availabe online, as preprints. For
@@ -218,6 +242,95 @@ Accept: application/vnd.api+json
 ```
 
 
+### Get tags
+
+```
+GET /tags
+Accept: application/vnd.api+json
+```
+
+Filtering is possible using ``id=ID``, ``name=NAME`` or any combination of
+these GET parameters. Other parameters are ignored.
+
+```json
+{
+    "data": [
+        {
+            "type": "tags",
+            "id": 1,
+            "attributes": {
+                "name": "foobar",
+            },
+            "links": {
+                "self": "/tags/1"
+            }
+        }
+    ]
+}
+```
+
+
+### Get a tag by id
+
+```
+GET /tag/1
+Accept: application/vnd.api+json
+```
+
+```json
+{
+    "data": {
+        "type": "papers",
+        "id": 1,
+        "attributes": {
+            "doi": "10.1126/science.1252319",
+            "arxiv_id": "1401.2910"
+        },
+        "links": {
+            "self": "/papers/1"
+        },
+        "relationships": {
+            "cite": {
+                "links": {
+                    "related": "/papers/1/relationships/cite"
+                }
+            },
+            …
+        }
+    }
+}
+```
+
+### Create a tag
+
+```
+POST /tags
+Content-Type: application/vnd.api+json
+Accept: application/vnd.api+json
+
+{
+    "data": {
+        "name": "foobar",
+    }
+}
+```
+
+```json
+{
+    "data": {
+        "type": "tags",
+        "id": 1,
+        "attributes": {
+            "name": "foobar",
+        },
+        "links": {
+            "self": "/tags/1"
+        }
+    }
+}
+```
+
+
 ### Create a relationship between two papers
 
 ```
@@ -236,6 +349,26 @@ Accept: application/vnd.api+json
 Response is empty HTTP 204.
 
 
+### Add a tag to a paper
+
+```
+POST /papers/1/relationships/tags
+Content-Type: application/vnd.api+json
+Accept: application/vnd.api+json
+
+{
+    "data": [
+        { "type": "tags", "id": "2" },
+        ...
+    ]
+}
+```
+
+`id` is the id of the tag, which has to be created previously.
+
+Response is empty HTTP 204.
+
+
 ### Delete a paper and associated relationships
 
 ```
@@ -264,6 +397,26 @@ Accept: application/vnd.api+json
 Response is empty HTTP 204.
 
 
+### Deleting a tag for a paper
+
+```
+DELETE /papers/1/relationships/tags
+Content-Type: application/vnd.api+json
+Accept: application/vnd.api+json
+
+{
+    "data": [
+        { "type": "tags", "id": "2" },
+        ...
+    ]
+}
+```
+
+`id` is the id of the tag.
+
+Response is empty HTTP 204.
+
+
 ## Associated library
 
 `reference_fetcher` is a module you can use to:
diff --git a/database.py b/database.py
index 96bb217..8078c6c 100644
--- a/database.py
+++ b/database.py
@@ -4,7 +4,7 @@ This file contains the database schema in SQLAlchemy format.
 import sqlite3
 
 from sqlalchemy import event
-from sqlalchemy import Column, ForeignKey, Integer, String
+from sqlalchemy import Column, ForeignKey, Integer, String, Table
 from sqlalchemy.engine import Engine
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import relationship as sqlalchemy_relationship
@@ -24,9 +24,9 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
         cursor.close()
 
 
-class Association(Base):
+class RelationshipAssociation(Base):
     # Relationships are to be read "left RELATION right"
-    __tablename__ = "association"
+    __tablename__ = "relationship_association"
     id = Column(Integer, primary_key=True)
     left_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"))
     right_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"))
@@ -45,6 +45,12 @@ class Association(Base):
                                          back_populates="related_to",
                                          passive_deletes=True)
 
+tag_association_table = Table(
+    'tag_association', Base.metadata,
+    Column('paper_id', Integer, ForeignKey('papers.id', ondelete="CASCADE")),
+    Column('tag_id', Integer, ForeignKey('tags.id', ondelete="CASCADE"))
+)
+
 
 class Paper(Base):
     __tablename__ = "papers"
@@ -52,15 +58,19 @@ class Paper(Base):
     doi = Column(String(), nullable=True, unique=True)
     arxiv_id = Column(String(25), nullable=True, unique=True)
     # related_to are papers related to this paper (this_paper R …)
-    related_to = sqlalchemy_relationship("Association",
-                                         foreign_keys="Association.left_id",
+    related_to = sqlalchemy_relationship("RelationshipAssociation",
+                                         foreign_keys="RelationshipAssociation.left_id",
                                          back_populates="left_paper",
                                          passive_deletes=True)
     # related_by are papers referenced by this paper (… R this_paper)
-    related_by = sqlalchemy_relationship("Association",
-                                         foreign_keys="Association.right_id",
+    related_by = sqlalchemy_relationship("RelationshipAssociation",
+                                         foreign_keys="RelationshipAssociation.right_id",
                                          back_populates="right_paper",
                                          passive_deletes=True)
+    # Tags relationship
+    tags = sqlalchemy_relationship("Tag",
+                                   secondary=tag_association_table,
+                                   backref="papers")
 
     def __repr__(self):
         return "<Paper(id='%d', doi='%s', arxiv_id='%s')>" % (
@@ -69,11 +79,27 @@ class Paper(Base):
             self.arxiv_id,
         )
 
-    def json_api_repr(self):
+    def json_api_repr(self, db):
         """
         Dict to dump for the JSON API.
         """
-        relationships = [a.relationship.name for a in self.related_to]
+        relationships = [i.name for i in db.query(Relationship).all()]
+        relationships_dict = {
+            k: {
+                "links": {
+                    "related": (
+                        "/papers/%d/relationships/%s?reverse={reverse}" %
+                        (self.id, k)
+                    )
+                }
+            }
+            for k in relationships
+        }
+        relationships_dict["tags"] = {
+            "links": {
+                "related": "/papers/%d/relationships/tags" % (self.id,)
+            }
+        }
         return {
             "types": self.__tablename__,
             "id": self.id,
@@ -84,17 +110,7 @@ class Paper(Base):
             "links": {
                 "self": "/papers/%d" % (self.id,)
             },
-            "relationships": {
-                k: {
-                    "links": {
-                        "related": (
-                            "/papers/%d/relationships/%s?reverse={reverse}" %
-                            (self.id, k)
-                        )
-                    }
-                }
-                for k in relationships
-            }
+            "relationships": relationships_dict
         }
 
 
@@ -102,6 +118,27 @@ class Relationship(Base):
     __tablename__ = "relationships"
     id = Column(Integer, primary_key=True)
     name = Column(String(), unique=True)
-    associations = sqlalchemy_relationship("Association",
+    associations = sqlalchemy_relationship("RelationshipAssociation",
                                            back_populates="relationship",
                                            passive_deletes=True)
+
+
+class Tag(Base):
+    __tablename__ = "tags"
+    id = Column(Integer, primary_key=True)
+    name = Column(String(), unique=True)
+
+    def json_api_repr(self):
+        """
+        Dict to dump for the JSON API.
+        """
+        return {
+            "types": self.__tablename__,
+            "id": self.id,
+            "attributes": {
+                "name": self.name,
+            },
+            "links": {
+                "self": "/tags/%d" % (self.id,)
+            }
+        }
diff --git a/main.py b/main.py
index b568b23..7a8023c 100755
--- a/main.py
+++ b/main.py
@@ -41,7 +41,7 @@ def index():
     }))
 
 app.get("/papers", callback=routes.get.fetch_papers)
-app.get("/papers/<id:int>", callback=routes.get.fetch_by_id)
+app.get("/papers/<id:int>", callback=routes.get.fetch_papers_by_id)
 app.get("/papers/<id:int>/relationships/<name>",
         callback=routes.get.fetch_relationship)
 app.get("/papers/<id:int>/<name>",
@@ -51,8 +51,12 @@ app.route("/papers/<id:int>", method="DELETE",
 app.route("/papers/<id:int>/relationships/<name>", method="DELETE",
           callback=routes.delete.delete_relationship)
 
+app.get("/tags", callback=routes.get.fetch_tags)
+app.get("/tags/<id:int>", callback=routes.get.fetch_tags_by_id)
+
 
 app.post("/papers", callback=routes.post.create_paper)
+app.post("/tags", callback=routes.post.create_tag)
 
 app.post("/papers/<id:int>/relationships/<name>",
          callback=routes.post.update_relationships)
diff --git a/routes/delete.py b/routes/delete.py
index cb79a3c..d9b2c8e 100644
--- a/routes/delete.py
+++ b/routes/delete.py
@@ -73,13 +73,27 @@ def delete_relationship(id, name, db):
         return bottle.HTTPError(403, "Forbidden")
     # Delete all the requested relationships
     for i in data:
-        relationship = (db.query(database.Association)
-                        .filter_by(left_id=id, right_id=i["id"])
-                        .filter(database.Relationship.name == name)
-                        .first())
-        if relationship is None:
-            # An error occurred => 403
-            return bottle.HTTPError(403, "Forbidden")
-        db.delete(relationship)
+        if i["type"] == "tags":
+            # Handle tags separately
+            tag = db.query(database.Tag).filter_by(id=i["id"]).first()
+            paper = db.query(database.Paper).filter_by(id=id).first()
+            if paper is None or tag is None:
+                # An error occurred => 403
+                return bottle.HTTPError(403, "Forbidden")
+            try:
+                paper.tags.remove(tag)
+            except ValueError:
+                # An error occurred => 403
+                return bottle.HTTPError(403, "Forbidden")
+            db.flush()
+        else:
+            relationship = (db.query(database.RelationshipAssociation)
+                            .filter_by(left_id=id, right_id=i["id"])
+                            .filter(database.Relationship.name == name)
+                            .first())
+            if relationship is None:
+                # An error occurred => 403
+                return bottle.HTTPError(403, "Forbidden")
+            db.delete(relationship)
     # Return an empty 204 on success
     return tools.APIResponse(status=204, body="")
diff --git a/routes/get.py b/routes/get.py
index 7a38ef6..1977f6c 100644
--- a/routes/get.py
+++ b/routes/get.py
@@ -56,14 +56,14 @@ def fetch_papers(db):
     resources = db.query(database.Paper).filter_by(**filters).all()
     if resources:
         return tools.APIResponse(tools.pretty_json({
-            "data": [resource.json_api_repr() for resource in resources]
+            "data": [resource.json_api_repr(db) for resource in resources]
         }))
     return bottle.HTTPError(404, "Not found")
 
 
-def fetch_by_id(id, db):
+def fetch_papers_by_id(id, db):
     """
-    Fetch a resource identified by its internal id.
+    Fetch a paper identified by its internal id.
 
     .. code-block:: bash
 
@@ -102,7 +102,7 @@ def fetch_by_id(id, db):
     resource = db.query(database.Paper).filter_by(id=id).first()
     if resource:
         return tools.APIResponse(tools.pretty_json({
-            "data": resource.json_api_repr()
+            "data": resource.json_api_repr(db)
         }))
     return bottle.HTTPError(404, "Not found")
 
@@ -152,12 +152,111 @@ def fetch_relationship(id, name, db):
             "data": [
             ]
         }
-        if reversed:
-            relationships = resource.related_by
+        # Tags are handled differently
+        if name == "tags":
+            for t in resource.tags:
+                response["data"].append({
+                    "type": name,
+                    "id": t.id
+                })
         else:
-            relationships = resource.related_to
-        for r in relationships:
-            if r.relationship.name == name:
-                response["data"].append({"type": name, "id": r.right_id})
+            if reversed:
+                relationships = resource.related_by
+            else:
+                relationships = resource.related_to
+            for r in relationships:
+                if r.relationship.name == name:
+                    response["data"].append({"type": name, "id": r.right_id})
         return tools.APIResponse(tools.pretty_json(response))
     return bottle.HTTPError(404, "Not found")
+
+
+def fetch_tags(db):
+    """
+    Fetch all matching tags.
+
+    .. code-block:: bash
+
+        GET /tags
+        Accept: application/vnd.api+json
+
+
+    Filtering is possible using ``id=ID``, ``name=NAME`` or any combination of
+    these GET parameters. Other parameters are ignored.
+
+
+    .. code-block:: json
+
+        {
+            "data": [
+                {
+                    "type": "tags",
+                    "id": 1,
+                    "attributes": {
+                        "name": "foobar",
+                    },
+                    "links": {
+                        "self": "/tags/1"
+                    }
+                }
+            ]
+        }
+
+    :param db: A database session, injected by the ``Bottle`` plugin.
+    :returns: An ``HTTPResponse``.
+    """
+    filters = {k: bottle.request.params[k]
+               for k in bottle.request.params
+               if k in ["id", "name"]}
+    resources = db.query(database.Tags).filter_by(**filters).all()
+    if resources:
+        return tools.APIResponse(tools.pretty_json({
+            "data": [resource.json_api_repr() for resource in resources]
+        }))
+    return bottle.HTTPError(404, "Not found")
+
+
+def fetch_tags_by_id(id, db):
+    """
+    Fetch a tag identified by its internal id.
+
+    .. code-block:: bash
+
+        GET /tag/1
+        Accept: application/vnd.api+json
+
+
+    .. code-block:: json
+
+        {
+            "data": {
+                "type": "papers",
+                "id": 1,
+                "attributes": {
+                    "doi": "10.1126/science.1252319",
+                    "arxiv_id": "1401.2910"
+                },
+                "links": {
+                    "self": "/papers/1"
+                },
+                "relationships": {
+                    "cite": {
+                        "links": {
+                            "related": "/papers/1/relationships/cite"
+                        }
+                    },
+                    …
+                }
+            }
+        }
+
+    :param id: The id of the requested tag.
+    :param db: A database session, injected by the ``Bottle`` plugin.
+    :returns: An ``HTTPResponse``.
+    """
+    resource = db.query(database.Tags).filter_by(id=id).first()
+    if resource:
+        return tools.APIResponse(tools.pretty_json({
+            "data": resource.json_api_repr()
+        }))
+    return bottle.HTTPError(404, "Not found")
diff --git a/routes/post.py b/routes/post.py
index be4ad3e..96f4988 100644
--- a/routes/post.py
+++ b/routes/post.py
@@ -12,7 +12,7 @@ from reference_fetcher import arxiv
 
 def create_paper(db):
     """
-    Create a new resource identified by its DOI or arXiv eprint id.
+    Create a new paper identified by its DOI or arXiv eprint id.
 
     .. code-block:: bash
 
@@ -76,7 +76,7 @@ def create_paper(db):
 
     # Return the resource
     response = {
-        "data": paper.json_api_repr()
+        "data": paper.json_api_repr(db)
     }
     # Import "cite" relation
     add_cite_relationship(paper, db)
@@ -153,7 +153,6 @@ def add_cite_relationship(paper, db):
     :param db: A database session
     :returns: Nothing.
     """
-    # TODO: Known bug: too many levels of recursion!
     # If paper is on arXiv
     if paper.arxiv_id is not None:
         # Get the cited DOIs
@@ -169,7 +168,8 @@ def add_cite_relationship(paper, db):
                 # If paper does not exist in db, add it
                 right_paper = create_by_doi(doi, db)
                 # Update cite relationship for this paper, recursively
-                add_cite_relationship(right_paper, db)
+                # TODO: Known bug: too many levels of recursion!
+                # add_cite_relationship(right_paper, db)
             # Update the relationships
             update_relationship_backend(paper.id, right_paper.id, "cite", db)
     # If paper is not on arXiv, nothing to do
@@ -216,10 +216,21 @@ def update_relationships(id, name, db):
         return bottle.HTTPError(403, "Forbidden")
     # Update all the relationships
     for i in data:
-        updated = update_relationship_backend(id, i["id"], name, db)
-        if updated is None:
-            # An error occurred => 403
-            return bottle.HTTPError(403, "Forbidden")
+        if i["type"] == "tags":
+            # Handle tags separately
+            tag = db.query(database.Tag).filter_by(id=i["id"]).first()
+            paper = db.query(database.Paper).filter_by(id=id).first()
+            if paper is None or tag is None:
+                # An error occurred => 403
+                return bottle.HTTPError(403, "Forbidden")
+            paper.tags.append(tag)
+            db.add(paper)
+            db.flush()
+        else:
+            updated = update_relationship_backend(id, i["id"], name, db)
+            if updated is None:
+                # An error occurred => 403
+                return bottle.HTTPError(403, "Forbidden")
     # Return an empty 204 on success
     return tools.APIResponse(status=204, body="")
 
@@ -246,7 +257,7 @@ def update_relationship_backend(left_id, right_id, name, db):
         db.add(relationship)
         db.flush()
     # Update the relationship
-    a = database.Association(relationship_id=relationship.id)
+    a = database.RelationshipAssociation(relationship_id=relationship.id)
     a.right_paper = right_paper
     left_paper.related_to.append(a)
     try:
@@ -257,3 +268,70 @@ def update_relationship_backend(left_id, right_id, name, db):
         db.rollback()
         return None
     return left_paper
+
+
+def create_tag(db):
+    """
+    Create a new tag.
+
+    .. code-block:: bash
+
+        POST /tags
+        Content-Type: application/vnd.api+json
+        Accept: application/vnd.api+json
+
+        {
+            "data": {
+                "name": "foobar",
+            }
+        }
+
+
+    .. code-block:: json
+
+        {
+            "data": {
+                "type": "tags",
+                "id": 1,
+                "attributes": {
+                    "name": "foobar",
+                },
+                "links": {
+                    "self": "/tags/1"
+                }
+            }
+        }
+
+    :param db: A database session, injected by the ``Bottle`` plugin.
+    :returns: An ``HTTPResponse``.
+    """
+    data = json.loads(bottle.request.body.read().decode("utf-8"))
+    # Validate the request
+    if("data" not in data or
+       "type" not in data["data"] or
+       data["data"]["type"] != "tags" or
+       "name" not in data["data"]):
+        return bottle.HTTPError(403, "Forbidden")
+
+    data = data["data"]
+
+    tag = database.Tag(name=data["name"])
+
+    # Add it to the database
+    try:
+        db.add(tag)
+        db.flush()
+    except IntegrityError:
+        # Unique constraint violation, paper already exists
+        db.rollback()
+        return bottle.HTTPError(409, "Conflict")
+
+    # Return the resource
+    response = {
+        "data": tag.json_api_repr()
+    }
+    # Return 200 with the correct body
+    headers = {"Location": "/tags/%d" % (tag.id,)}
+    return tools.APIResponse(status=200,
+                             body=tools.pretty_json(response),
+                             headers=headers)