diff --git a/.gitignore b/.gitignore index bee8a64..f78e190 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ __pycache__ +docs/ diff --git a/database.py b/database.py index 9b79f17..93b55d0 100644 --- a/database.py +++ b/database.py @@ -1,12 +1,23 @@ """ This file contains the database schema in SQLAlchemy format. """ -from sqlalchemy import Column, Integer, String +from sqlalchemy import event, Column, Integer, String +from sqlalchemy.engine import Engine from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() +@event.listens_for(Engine, "connect") +def set_sqlite_pragma(dbapi_connection, connection_record): + """ + Auto enable foreign keys for SQLite. + """ + cursor = dbapi_connection.cursor() + cursor.execute("PRAGMA foreign_keys=ON") + cursor.close() + + class Paper(Base): __tablename__ = 'papers' id = Column(Integer, primary_key=True) diff --git a/main.py b/main.py index f861e10..d741517 100755 --- a/main.py +++ b/main.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 import bottle from bottle.ext import sqlalchemy -from sqlalchemy import create_engine, event -from sqlalchemy.engine import Engine +from sqlalchemy import create_engine import database import routes @@ -32,14 +31,6 @@ plugin = sqlalchemy.Plugin( app.install(plugin) -# Auto enable foreign keys for SQLite -@event.listens_for(Engine, "connect") -def set_sqlite_pragma(dbapi_connection, connection_record): - cursor = dbapi_connection.cursor() - cursor.execute("PRAGMA foreign_keys=ON") - cursor.close() - - # Routes app.get("/papers", callback=routes.get.fetch_papers) app.get("/papers/", callback=routes.get.fetch_by_id) diff --git a/reference_fetcher/arxiv.py b/reference_fetcher/arxiv.py index 1293a8c..e975da1 100644 --- a/reference_fetcher/arxiv.py +++ b/reference_fetcher/arxiv.py @@ -13,10 +13,8 @@ def sources_from_arxiv(eprint): """ Download sources on arXiv for a given preprint. - Params: - - eprint is the arXiv id (e.g. 1401.2910 or 1401.2910v1). - - Returns a TarFile object of the sources of the arXiv preprint. + :param eprint: The arXiv id (e.g. ``1401.2910`` or ``1401.2910v1``). + :returns: A ``TarFile`` object of the sources of the arXiv preprint. """ r = requests.get("http://arxiv.org/e-print/%s" % (eprint,)) file_object = io.BytesIO(r.content) @@ -27,10 +25,8 @@ def bbl_from_arxiv(eprint): """ Get the .bbl files (if any) of a given preprint. - Params: - - eprint is the arXiv id (e.g. 1401.2910 or 1401.2910v1). - - Returns a list of the .bbl files as text (if any) or None. + :param eprint: The arXiv id (e.g. ``1401.2910`` or ``1401.2910v1``). + :returns: A list of the ``.bbl`` files as text (if any) or ``None``. """ tf = sources_from_arxiv(eprint) bbl_files = [i for i in tf.getmembers() if i.name.endswith(".bbl")] @@ -43,10 +39,8 @@ def get_cited_dois(eprint): """ Get the .bbl files (if any) of a given preprint. - Params: - - eprint is the arXiv id (e.g. 1401.2910 or 1401.2910v1). - - Returns a dict of cleaned plaintext citations and their associated doi. + :param eprint: The arXiv id (e.g. ``1401.2910`` or ``1401.2910v1``). + :returns: A dict of cleaned plaintext citations and their associated doi. """ bbl_files = bbl_from_arxiv(eprint) dois = {} @@ -59,10 +53,8 @@ def get_arxiv_eprint_from_doi(doi): """ Get the arXiv eprint id for a given DOI. - Params: - - doi is the DOI of the resource to look for. - - Returns the arXiv eprint id, or None if not found. + :param doi: The DOI of the resource to look for. + :returns: The arXiv eprint id, or ``None`` if not found. """ r = requests.get("http://export.arxiv.org/api/query", params={ @@ -80,10 +72,8 @@ def get_doi(eprint): """ Get the associated DOI for a given arXiv eprint. - Params: - - eprint is the arXiv eprint id. - - Returns the DOI if any, or None. + :param eprint: The arXiv eprint id. + :returns: The DOI if any, or ``None``. """ r = requests.get("http://export.arxiv.org/api/query", params={ diff --git a/reference_fetcher/bbl.py b/reference_fetcher/bbl.py index fa81022..add1da8 100644 --- a/reference_fetcher/bbl.py +++ b/reference_fetcher/bbl.py @@ -13,12 +13,10 @@ from . import tools def clean_bibitem(bibitem): """ - Return a plaintext representation of the bibitem from the bbl file. + Return a plaintext representation of the bibitem from the ``.bbl`` file. - Params: - - bibitem is the text content of the bibitem. - - Returns a cleaned plaintext citation from the bibitem. + :param bibitem: The text content of the bibitem. + :returns: A cleaned plaintext citation from the bibitem. """ script_dir = os.path.dirname(os.path.abspath(__file__)) output = subprocess.check_output(["%s/opendetex/delatex" % (script_dir,), @@ -31,12 +29,11 @@ def clean_bibitem(bibitem): def parse(bbl): """ - Parse a *.bbl file to get a clean list of plaintext citations. + Parse a ``*.bbl`` file to get a clean list of plaintext citations. - Params: - - bbl is either the path to the .bbl file or the content of a bbl file. - - Returns a list of cleaned plaintext citations. + :param bbl: Either the path to the .bbl file or the content of a ``.bbl`` \ + file. + :returns: A list of cleaned plaintext citations. """ # Handle path or content if os.path.isfile(bbl): @@ -59,11 +56,10 @@ def get_dois(bbl_input): """ Get the papers cited by the paper identified by the given DOI. - Params: - - bbl_input is either the path to the .bbl file or the content of a bbl - file. + :param bbl_input: Either the path to the .bbl file or the content of a \ + bbl file. - Returns a dict of cleaned plaintext citations and their associated doi. + :returns: A dict of cleaned plaintext citations and their associated doi. """ cleaned_citations_with_URLs = parse(bbl_input) dois = {} diff --git a/reference_fetcher/doi.py b/reference_fetcher/doi.py index 415118e..5f8a33b 100644 --- a/reference_fetcher/doi.py +++ b/reference_fetcher/doi.py @@ -10,6 +10,9 @@ from . import tools def extract_doi_links(urls): """ Try to find a DOI from a given list of URLs. + + :param urls: A list of URLs. + :returns: First matching DOI URL, or ``None``. """ doi_urls = [url for url in urls if "/doi/" in url] if len(doi_urls) > 0: @@ -22,6 +25,9 @@ def extract_doi_links(urls): def extract_arxiv_links(urls): """ Try to find an arXiv link from a given list of URLs. + + :param urls: A list of URLs. + :returns: First matching arXiv URL, or ``None``. """ arxiv_urls = [url for url in urls if "://arxiv.org" in url] if len(arxiv_urls) > 0: @@ -35,9 +41,14 @@ def match_doi_or_arxiv(text, only=["DOI", "arXiv"]): Search for a valid article ID (DOI or ArXiv) in the given text (regex-based). - Returns a tuple (type, first matching ID) or None if not found. - From : http://en.dogeno.us/2010/02/release-a-python-script-for-organizing-scientific-papers-pyrenamepdf-py/ - and https://github.com/minad/bibsync/blob/3fdf121016f6187a2fffc66a73cd33b45a20e55d/lib/bibsync/utils.rb + From \ + http://en.dogeno.us/2010/02/release-a-python-script-for-organizing-scientific-papers-pyrenamepdf-py/ \ + and \ + https://github.com/minad/bibsync/blob/3fdf121016f6187a2fffc66a73cd33b45a20e55d/lib/bibsync/utils.rb. + + :param text: Input text on which matching is to be done. + :param only: List of matches to look for (DOI/arXiv). + :returns: a tuple ``(type, first matching ID)`` or ``None`` if not found. """ text = text.lower() # Try to extract DOI @@ -86,10 +97,8 @@ def get_oa_version(doi): """ Get an OA version for a given DOI. - Params: - - doi is a DOI or a dx.doi.org link. - - Returns the URL of the OA version of the given DOI, or None. + :param doi: A DOI or a dx.doi.org link. + :returns: The URL of the OA version of the given DOI, or ``None``. """ # If DOI is a link, truncate it if "dx.doi.org" in doi: diff --git a/reference_fetcher/tools.py b/reference_fetcher/tools.py index e0728fd..1c3612e 100644 --- a/reference_fetcher/tools.py +++ b/reference_fetcher/tools.py @@ -3,15 +3,17 @@ This file contains various utility functions. """ -def replaceAll(text, dic): - """Replace all the dic keys by the associated item in text""" - for i, j in dic.items(): +def replaceAll(text, replace_dict): + """ + Replace all the ``replace_dict`` keys by their associated item in ``text``. + """ + for i, j in replace_dict.items(): text = text.replace(i, j) return text def clean_whitespaces(text): """ - Remove double whitespaces and trailing . and , from text. + Remove double whitespaces and trailing "." and "," from text. """ return ' '.join(text.strip().rstrip(".,").split()) diff --git a/routes/get.py b/routes/get.py index 3411a80..1f55829 100644 --- a/routes/get.py +++ b/routes/get.py @@ -11,34 +11,39 @@ def fetch_papers(db): """ Fetch all matching papers. - ``` - GET /papers - Accept: application/vnd.api+json - ``` + .. code-block:: bash - Filtering is possible using `id=ID`, `doi=DOI`, `arxiv_id=ARXIV_ID` or any - combination of these GET parameters. Other parameters are ignored. + GET /papers + Accept: application/vnd.api+json - ``` - { - "data": [ - { - "type": "papers", - "id": 1, - "attributes": { - "doi": "10.1126/science.1252319", - "arxiv_id": "1401.2910" - }, - "links": { - "self": "/papers/1" - }, - "relationships": { - TODO + + Filtering is possible using ``id=ID``, ``doi=DOI``, ``arxiv_id=ARXIV_ID`` \ + or any combination of these GET parameters. Other parameters are ignored. + + + .. code-block:: json + + { + "data": [ + { + "type": "papers", + "id": 1, + "attributes": { + "doi": "10.1126/science.1252319", + "arxiv_id": "1401.2910" + }, + "links": { + "self": "/papers/1" + }, + "relationships": { + TODO + } } - } - ] - } - ``` + ] + } + + :param db: A database session, injected by the ``Bottle`` plugin. + :returns: An ``HTTPResponse``. """ filters = {k: bottle.request.params[k] for k in bottle.request.params @@ -55,31 +60,36 @@ def fetch_by_id(id, db): """ Fetch a resource identified by its internal id. - ``` - GET /id/ - Accept: application/vnd.api+json - ``` + .. code-block:: bash - ``` - { - "data": { - { - "type": "papers", - "id": 1, - "attributes": { - "doi": "10.1126/science.1252319", - "arxiv_id": "1401.2910" - }, - "links": { - "self": "/papers/1" - }, - "relationships": { - TODO + GET /id/ + Accept: application/vnd.api+json + + + .. code-block:: json + + { + "data": { + { + "type": "papers", + "id": 1, + "attributes": { + "doi": "10.1126/science.1252319", + "arxiv_id": "1401.2910" + }, + "links": { + "self": "/papers/1" + }, + "relationships": { + TODO + } } } } - } - ``` + + :param id: The id of the requested article. + :param db: A database session, injected by the ``Bottle`` plugin. + :returns: An ``HTTPResponse``. """ resource = db.query(database.Paper).filter_by(id=id).first() if resource: diff --git a/routes/post.py b/routes/post.py index 577047b..7443faf 100644 --- a/routes/post.py +++ b/routes/post.py @@ -14,40 +14,44 @@ def create_paper(db): """ Create a new resource identified by its DOI or arXiv eprint id. - ``` - POST /papers - Content-Type: application/vnd.api+json - Accept: application/vnd.api+json + .. code-block:: bash - { - "data": { - "doi": "10.1126/science.1252319", - // OR - "arxiv_id": "1401.2910" + POST /papers + Content-Type: application/vnd.api+json + Accept: application/vnd.api+json + + { + "data": { + "doi": "10.1126/science.1252319", + // OR + "arxiv_id": "1401.2910" + } } - } - ``` - ``` - { - "data": { - { - "type": "papers", - "id": 1, - "attributes": { - "doi": "10.1126/science.1252319", - "arxiv_id": "1401.2910" - }, - "links": { - "self": "/papers/1" - }, - "relationships": { - TODO + + .. code-block:: json + + { + "data": { + { + "type": "papers", + "id": 1, + "attributes": { + "doi": "10.1126/science.1252319", + "arxiv_id": "1401.2910" + }, + "links": { + "self": "/papers/1" + }, + "relationships": { + TODO + } } } } - } - ``` + + :param db: A database session, injected by the ``Bottle`` plugin. + :returns: An ``HTTPResponse``. """ data = json.loads(bottle.request.body.read().decode("utf-8")) # Validate the request @@ -83,7 +87,9 @@ def create_by_doi(doi, db): """ Create a new resource identified by its DOI, if it does not exist. - Return None if insertion failed, the Paper object otherwise. + :param doi: The DOI of the paper. + :param db: A database session. + :returns: ``None`` if insertion failed, the ``Paper`` object otherwise. """ paper = database.Paper(doi=doi) @@ -110,7 +116,9 @@ def create_by_arxiv(arxiv, db): Create a new resource identified by its arXiv eprint ID, if it does not exist. - Return None if insertion failed, the Paper object otherwise. + :param arxiv: The arXiv eprint ID. + :param db: A database session. + :returns: ``None`` if insertion failed, the ``Paper`` object otherwise. """ paper = database.Paper(arxiv_id=arxiv) diff --git a/tools.py b/tools.py index 23b5d4e..3221592 100644 --- a/tools.py +++ b/tools.py @@ -7,7 +7,10 @@ import json def pretty_json(data): """ - Return pretty printed JSON-formatted string. + Return pretty-printed JSON-formatted string. + + :param data: A string to be converted. + :returns: A pretty-printed JSON-formatted string. """ return json.dumps(data, sort_keys=True,