121 lines
3.0 KiB
Python
121 lines
3.0 KiB
Python
"""
|
|
This file contains all the DOI-related functions.
|
|
"""
|
|
import re
|
|
import requests
|
|
|
|
from requests.exception import RequestException
|
|
|
|
from libbmc import tools
|
|
|
|
# Taken from
|
|
# https://stackoverflow.com/questions/27910/finding-a-doi-in-a-document-or-page/10324802#10324802
|
|
REGEX = re.compile(r"\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&\'<>])\S)+)\b",
|
|
re.IGNORECASE)
|
|
# Base dx.doi.org URL for redirections
|
|
DX_URL = "http://dx.doi.org/{doi}"
|
|
|
|
|
|
def is_valid(doi):
|
|
"""
|
|
Check that a given DOI is a valid canonical DOI.
|
|
|
|
:param doi: The DOI to be checked.
|
|
:returns: Boolean indicating whether the DOI is valid or not.
|
|
"""
|
|
match = REGEX.match(doi)
|
|
return ((match is not None) and (match.group(0) == doi))
|
|
|
|
|
|
def extract_from_text(text):
|
|
"""
|
|
Extract canonical DOIs from a text.
|
|
|
|
:param text: The text to extract DOIs from.
|
|
:returns: A list of found DOIs.
|
|
"""
|
|
return tools.remove_duplicates(REGEX.findall(text))
|
|
|
|
|
|
def to_URL(dois):
|
|
"""
|
|
Convert a list of canonical DOIs to a list of DOIs URLs.
|
|
|
|
:param dois: List of canonical DOIs.
|
|
:returns: A list of DOIs URLs.
|
|
"""
|
|
if isinstance(dois, list):
|
|
return [DX_URL.format(doi=doi) for doi in dois]
|
|
else:
|
|
return DX_URL.format(doi=dois)
|
|
|
|
|
|
def to_canonical(urls):
|
|
"""
|
|
Convert a list of DOIs URLs to a list of canonical DOIs.
|
|
|
|
:param dois: A list of DOIs URLs.
|
|
:returns: List of canonical DOIs.
|
|
"""
|
|
if isinstance(urls, list):
|
|
return [extract_from_text(url) for url in urls]
|
|
else:
|
|
return extract_from_text(urls)
|
|
|
|
|
|
def get_oa_version(doi):
|
|
"""
|
|
Get an OA version for a given DOI.
|
|
|
|
.. note::
|
|
|
|
Uses beta.dissem.in API.
|
|
|
|
:param doi: A canonical DOI.
|
|
:returns: The URL of the OA version of the given DOI, or ``None``.
|
|
"""
|
|
# If DOI is a link, truncate it
|
|
try:
|
|
r = requests.get("http://beta.dissem.in/api/%s" % (doi,))
|
|
r.raise_for_status()
|
|
result = r.json()
|
|
assert(result["status"] == "ok")
|
|
return result["paper"]["pdf_url"]
|
|
except (AssertionError, ValueError, KeyError, RequestException):
|
|
return None
|
|
|
|
|
|
def get_linked_version(doi):
|
|
"""
|
|
Get the original link behind the DOI.
|
|
|
|
:param doi: A canonical DOI.
|
|
:returns: The canonical URL behind the DOI, or ``None``.
|
|
"""
|
|
try:
|
|
r = requests.head(to_URL(doi))
|
|
return r.headers.get("location")
|
|
except RequestException:
|
|
return None
|
|
|
|
|
|
def get_bibtex(doi):
|
|
"""
|
|
Get a BibTeX entry for a given DOI.
|
|
|
|
.. note::
|
|
|
|
Adapted from https://gist.github.com/jrsmith3/5513926.
|
|
|
|
:param doi: The canonical DOI to get BibTeX from.
|
|
:returns: A BibTeX string or ``None``.
|
|
"""
|
|
try:
|
|
r = requests.get(to_URL(doi),
|
|
headers={"accept": "application/x-bibtex"})
|
|
r.raise_for_status()
|
|
assert(r.headers.get("content-type") == "application/x-bibtex")
|
|
return r.text
|
|
except (RequestException, AssertionError):
|
|
return None
|