Browse Source

Update doc to rst format for generation with Sphinx

master
Lucas Verney 7 years ago
parent
commit
ec97c0a320
  1. 1
      .gitignore
  2. 13
      database.py
  3. 11
      main.py
  4. 30
      reference_fetcher/arxiv.py
  5. 24
      reference_fetcher/bbl.py
  6. 23
      reference_fetcher/doi.py
  7. 10
      reference_fetcher/tools.py
  8. 108
      routes/get.py
  9. 72
      routes/post.py
  10. 5
      tools.py

1
.gitignore vendored

@ -1 +1,2 @@ @@ -1 +1,2 @@
__pycache__
docs/

13
database.py

@ -1,12 +1,23 @@ @@ -1,12 +1,23 @@
"""
This file contains the database schema in SQLAlchemy format.
"""
from sqlalchemy import Column, Integer, String
from sqlalchemy import event, Column, Integer, String
from sqlalchemy.engine import Engine
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
@event.listens_for(Engine, "connect")
def set_sqlite_pragma(dbapi_connection, connection_record):
"""
Auto enable foreign keys for SQLite.
"""
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
class Paper(Base):
__tablename__ = 'papers'
id = Column(Integer, primary_key=True)

11
main.py

@ -1,8 +1,7 @@ @@ -1,8 +1,7 @@
#!/usr/bin/env python3
import bottle
from bottle.ext import sqlalchemy
from sqlalchemy import create_engine, event
from sqlalchemy.engine import Engine
from sqlalchemy import create_engine
import database
import routes
@ -32,14 +31,6 @@ plugin = sqlalchemy.Plugin( @@ -32,14 +31,6 @@ plugin = sqlalchemy.Plugin(
app.install(plugin)
# Auto enable foreign keys for SQLite
@event.listens_for(Engine, "connect")
def set_sqlite_pragma(dbapi_connection, connection_record):
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
# Routes
app.get("/papers", callback=routes.get.fetch_papers)
app.get("/papers/<id:int>", callback=routes.get.fetch_by_id)

30
reference_fetcher/arxiv.py

@ -13,10 +13,8 @@ def sources_from_arxiv(eprint): @@ -13,10 +13,8 @@ def sources_from_arxiv(eprint):
"""
Download sources on arXiv for a given preprint.
Params:
- eprint is the arXiv id (e.g. 1401.2910 or 1401.2910v1).
Returns a TarFile object of the sources of the arXiv preprint.
:param eprint: The arXiv id (e.g. ``1401.2910`` or ``1401.2910v1``).
:returns: A ``TarFile`` object of the sources of the arXiv preprint.
"""
r = requests.get("http://arxiv.org/e-print/%s" % (eprint,))
file_object = io.BytesIO(r.content)
@ -27,10 +25,8 @@ def bbl_from_arxiv(eprint): @@ -27,10 +25,8 @@ def bbl_from_arxiv(eprint):
"""
Get the .bbl files (if any) of a given preprint.
Params:
- eprint is the arXiv id (e.g. 1401.2910 or 1401.2910v1).
Returns a list of the .bbl files as text (if any) or None.
:param eprint: The arXiv id (e.g. ``1401.2910`` or ``1401.2910v1``).
:returns: A list of the ``.bbl`` files as text (if any) or ``None``.
"""
tf = sources_from_arxiv(eprint)
bbl_files = [i for i in tf.getmembers() if i.name.endswith(".bbl")]
@ -43,10 +39,8 @@ def get_cited_dois(eprint): @@ -43,10 +39,8 @@ def get_cited_dois(eprint):
"""
Get the .bbl files (if any) of a given preprint.
Params:
- eprint is the arXiv id (e.g. 1401.2910 or 1401.2910v1).
Returns a dict of cleaned plaintext citations and their associated doi.
:param eprint: The arXiv id (e.g. ``1401.2910`` or ``1401.2910v1``).
:returns: A dict of cleaned plaintext citations and their associated doi.
"""
bbl_files = bbl_from_arxiv(eprint)
dois = {}
@ -59,10 +53,8 @@ def get_arxiv_eprint_from_doi(doi): @@ -59,10 +53,8 @@ def get_arxiv_eprint_from_doi(doi):
"""
Get the arXiv eprint id for a given DOI.
Params:
- doi is the DOI of the resource to look for.
Returns the arXiv eprint id, or None if not found.
:param doi: The DOI of the resource to look for.
:returns: The arXiv eprint id, or ``None`` if not found.
"""
r = requests.get("http://export.arxiv.org/api/query",
params={
@ -80,10 +72,8 @@ def get_doi(eprint): @@ -80,10 +72,8 @@ def get_doi(eprint):
"""
Get the associated DOI for a given arXiv eprint.
Params:
- eprint is the arXiv eprint id.
Returns the DOI if any, or None.
:param eprint: The arXiv eprint id.
:returns: The DOI if any, or ``None``.
"""
r = requests.get("http://export.arxiv.org/api/query",
params={

24
reference_fetcher/bbl.py

@ -13,12 +13,10 @@ from . import tools @@ -13,12 +13,10 @@ from . import tools
def clean_bibitem(bibitem):
"""
Return a plaintext representation of the bibitem from the bbl file.
Return a plaintext representation of the bibitem from the ``.bbl`` file.
Params:
- bibitem is the text content of the bibitem.
Returns a cleaned plaintext citation from the bibitem.
:param bibitem: The text content of the bibitem.
:returns: A cleaned plaintext citation from the bibitem.
"""
script_dir = os.path.dirname(os.path.abspath(__file__))
output = subprocess.check_output(["%s/opendetex/delatex" % (script_dir,),
@ -31,12 +29,11 @@ def clean_bibitem(bibitem): @@ -31,12 +29,11 @@ def clean_bibitem(bibitem):
def parse(bbl):
"""
Parse a *.bbl file to get a clean list of plaintext citations.
Params:
- bbl is either the path to the .bbl file or the content of a bbl file.
Parse a ``*.bbl`` file to get a clean list of plaintext citations.
Returns a list of cleaned plaintext citations.
:param bbl: Either the path to the .bbl file or the content of a ``.bbl`` \
file.
:returns: A list of cleaned plaintext citations.
"""
# Handle path or content
if os.path.isfile(bbl):
@ -59,11 +56,10 @@ def get_dois(bbl_input): @@ -59,11 +56,10 @@ def get_dois(bbl_input):
"""
Get the papers cited by the paper identified by the given DOI.
Params:
- bbl_input is either the path to the .bbl file or the content of a bbl
file.
:param bbl_input: Either the path to the .bbl file or the content of a \
bbl file.
Returns a dict of cleaned plaintext citations and their associated doi.
:returns: A dict of cleaned plaintext citations and their associated doi.
"""
cleaned_citations_with_URLs = parse(bbl_input)
dois = {}

23
reference_fetcher/doi.py

@ -10,6 +10,9 @@ from . import tools @@ -10,6 +10,9 @@ from . import tools
def extract_doi_links(urls):
"""
Try to find a DOI from a given list of URLs.
:param urls: A list of URLs.
:returns: First matching DOI URL, or ``None``.
"""
doi_urls = [url for url in urls if "/doi/" in url]
if len(doi_urls) > 0:
@ -22,6 +25,9 @@ def extract_doi_links(urls): @@ -22,6 +25,9 @@ def extract_doi_links(urls):
def extract_arxiv_links(urls):
"""
Try to find an arXiv link from a given list of URLs.
:param urls: A list of URLs.
:returns: First matching arXiv URL, or ``None``.
"""
arxiv_urls = [url for url in urls if "://arxiv.org" in url]
if len(arxiv_urls) > 0:
@ -35,9 +41,14 @@ def match_doi_or_arxiv(text, only=["DOI", "arXiv"]): @@ -35,9 +41,14 @@ def match_doi_or_arxiv(text, only=["DOI", "arXiv"]):
Search for a valid article ID (DOI or ArXiv) in the given text
(regex-based).
Returns a tuple (type, first matching ID) or None if not found.
From : http://en.dogeno.us/2010/02/release-a-python-script-for-organizing-scientific-papers-pyrenamepdf-py/
and https://github.com/minad/bibsync/blob/3fdf121016f6187a2fffc66a73cd33b45a20e55d/lib/bibsync/utils.rb
From \
http://en.dogeno.us/2010/02/release-a-python-script-for-organizing-scientific-papers-pyrenamepdf-py/ \
and \
https://github.com/minad/bibsync/blob/3fdf121016f6187a2fffc66a73cd33b45a20e55d/lib/bibsync/utils.rb.
:param text: Input text on which matching is to be done.
:param only: List of matches to look for (DOI/arXiv).
:returns: a tuple ``(type, first matching ID)`` or ``None`` if not found.
"""
text = text.lower()
# Try to extract DOI
@ -86,10 +97,8 @@ def get_oa_version(doi): @@ -86,10 +97,8 @@ def get_oa_version(doi):
"""
Get an OA version for a given DOI.
Params:
- doi is a DOI or a dx.doi.org link.
Returns the URL of the OA version of the given DOI, or None.
:param doi: A DOI or a dx.doi.org link.
:returns: The URL of the OA version of the given DOI, or ``None``.
"""
# If DOI is a link, truncate it
if "dx.doi.org" in doi:

10
reference_fetcher/tools.py

@ -3,15 +3,17 @@ This file contains various utility functions. @@ -3,15 +3,17 @@ This file contains various utility functions.
"""
def replaceAll(text, dic):
"""Replace all the dic keys by the associated item in text"""
for i, j in dic.items():
def replaceAll(text, replace_dict):
"""
Replace all the ``replace_dict`` keys by their associated item in ``text``.
"""
for i, j in replace_dict.items():
text = text.replace(i, j)
return text
def clean_whitespaces(text):
"""
Remove double whitespaces and trailing . and , from text.
Remove double whitespaces and trailing "." and "," from text.
"""
return ' '.join(text.strip().rstrip(".,").split())

108
routes/get.py

@ -11,34 +11,39 @@ def fetch_papers(db): @@ -11,34 +11,39 @@ def fetch_papers(db):
"""
Fetch all matching papers.
```
GET /papers
Accept: application/vnd.api+json
```
Filtering is possible using `id=ID`, `doi=DOI`, `arxiv_id=ARXIV_ID` or any
combination of these GET parameters. Other parameters are ignored.
```
{
"data": [
{
"type": "papers",
"id": 1,
"attributes": {
"doi": "10.1126/science.1252319",
"arxiv_id": "1401.2910"
},
"links": {
"self": "/papers/1"
},
"relationships": {
TODO
.. code-block:: bash
GET /papers
Accept: application/vnd.api+json
Filtering is possible using ``id=ID``, ``doi=DOI``, ``arxiv_id=ARXIV_ID`` \
or any combination of these GET parameters. Other parameters are ignored.
.. code-block:: json
{
"data": [
{
"type": "papers",
"id": 1,
"attributes": {
"doi": "10.1126/science.1252319",
"arxiv_id": "1401.2910"
},
"links": {
"self": "/papers/1"
},
"relationships": {
TODO
}
}
}
]
}
```
]
}
:param db: A database session, injected by the ``Bottle`` plugin.
:returns: An ``HTTPResponse``.
"""
filters = {k: bottle.request.params[k]
for k in bottle.request.params
@ -55,31 +60,36 @@ def fetch_by_id(id, db): @@ -55,31 +60,36 @@ def fetch_by_id(id, db):
"""
Fetch a resource identified by its internal id.
```
GET /id/<id>
Accept: application/vnd.api+json
```
```
{
"data": {
{
"type": "papers",
"id": 1,
"attributes": {
"doi": "10.1126/science.1252319",
"arxiv_id": "1401.2910"
},
"links": {
"self": "/papers/1"
},
"relationships": {
TODO
.. code-block:: bash
GET /id/<id>
Accept: application/vnd.api+json
.. code-block:: json
{
"data": {
{
"type": "papers",
"id": 1,
"attributes": {
"doi": "10.1126/science.1252319",
"arxiv_id": "1401.2910"
},
"links": {
"self": "/papers/1"
},
"relationships": {
TODO
}
}
}
}
}
```
:param id: The id of the requested article.
:param db: A database session, injected by the ``Bottle`` plugin.
:returns: An ``HTTPResponse``.
"""
resource = db.query(database.Paper).filter_by(id=id).first()
if resource:

72
routes/post.py

@ -14,40 +14,44 @@ def create_paper(db): @@ -14,40 +14,44 @@ def create_paper(db):
"""
Create a new resource identified by its DOI or arXiv eprint id.
```
POST /papers
Content-Type: application/vnd.api+json
Accept: application/vnd.api+json
{
"data": {
"doi": "10.1126/science.1252319",
// OR
"arxiv_id": "1401.2910"
.. code-block:: bash
POST /papers
Content-Type: application/vnd.api+json
Accept: application/vnd.api+json
{
"data": {
"doi": "10.1126/science.1252319",
// OR
"arxiv_id": "1401.2910"
}
}
}
```
```
{
"data": {
{
"type": "papers",
"id": 1,
"attributes": {
"doi": "10.1126/science.1252319",
"arxiv_id": "1401.2910"
},
"links": {
"self": "/papers/1"
},
"relationships": {
TODO
.. code-block:: json
{
"data": {
{
"type": "papers",
"id": 1,
"attributes": {
"doi": "10.1126/science.1252319",
"arxiv_id": "1401.2910"
},
"links": {
"self": "/papers/1"
},
"relationships": {
TODO
}
}
}
}
}
```
:param db: A database session, injected by the ``Bottle`` plugin.
:returns: An ``HTTPResponse``.
"""
data = json.loads(bottle.request.body.read().decode("utf-8"))
# Validate the request
@ -83,7 +87,9 @@ def create_by_doi(doi, db): @@ -83,7 +87,9 @@ def create_by_doi(doi, db):
"""
Create a new resource identified by its DOI, if it does not exist.
Return None if insertion failed, the Paper object otherwise.
:param doi: The DOI of the paper.
:param db: A database session.
:returns: ``None`` if insertion failed, the ``Paper`` object otherwise.
"""
paper = database.Paper(doi=doi)
@ -110,7 +116,9 @@ def create_by_arxiv(arxiv, db): @@ -110,7 +116,9 @@ def create_by_arxiv(arxiv, db):
Create a new resource identified by its arXiv eprint ID, if it does not
exist.
Return None if insertion failed, the Paper object otherwise.
:param arxiv: The arXiv eprint ID.
:param db: A database session.
:returns: ``None`` if insertion failed, the ``Paper`` object otherwise.
"""
paper = database.Paper(arxiv_id=arxiv)

5
tools.py

@ -7,7 +7,10 @@ import json @@ -7,7 +7,10 @@ import json
def pretty_json(data):
"""
Return pretty printed JSON-formatted string.
Return pretty-printed JSON-formatted string.
:param data: A string to be converted.
:returns: A pretty-printed JSON-formatted string.
"""
return json.dumps(data,
sort_keys=True,

Loading…
Cancel
Save