""" This file contains all the arXiv-specific functions. """ import io import requests import tarfile import xml.etree.ElementTree from . import bbl def sources_from_arxiv(eprint): """ Download sources on arXiv for a given preprint. :param eprint: The arXiv id (e.g. ``1401.2910`` or ``1401.2910v1``). :returns: A ``TarFile`` object of the sources of the arXiv preprint. """ r = requests.get("http://arxiv.org/e-print/%s" % (eprint,)) file_object = io.BytesIO(r.content) return tarfile.open(fileobj=file_object) def bbl_from_arxiv(eprint): """ Get the .bbl files (if any) of a given preprint. :param eprint: The arXiv id (e.g. ``1401.2910`` or ``1401.2910v1``). :returns: A list of the ``.bbl`` files as text (if any) or ``None``. """ tf = sources_from_arxiv(eprint) bbl_files = [i for i in tf.getmembers() if i.name.endswith(".bbl")] bbl_files = [tf.extractfile(member).read().decode(tarfile.ENCODING) for member in bbl_files] return bbl_files def get_cited_dois(eprint): """ Get the .bbl files (if any) of a given preprint. :param eprint: The arXiv id (e.g. ``1401.2910`` or ``1401.2910v1``). :returns: A dict of cleaned plaintext citations and their associated doi. """ bbl_files = bbl_from_arxiv(eprint) dois = {} for bbl_file in bbl_files: dois.update(bbl.get_dois(bbl_file)) return dois def get_arxiv_eprint_from_doi(doi): """ Get the arXiv eprint id for a given DOI. :param doi: The DOI of the resource to look for. :returns: The arXiv eprint id, or ``None`` if not found. """ r = requests.get("http://export.arxiv.org/api/query", params={ "search_query": "doi:%s" % (doi,), "max_results": 1 }) e = xml.etree.ElementTree.fromstring(r.content) for entry in e.iter("{http://www.w3.org/2005/Atom}entry"): id = entry.find("{http://www.w3.org/2005/Atom}id").text return id.replace("http://arxiv.org/abs/", "") return None def get_doi(eprint): """ Get the associated DOI for a given arXiv eprint. :param eprint: The arXiv eprint id. :returns: The DOI if any, or ``None``. """ r = requests.get("http://export.arxiv.org/api/query", params={ "id_list": eprint, "max_results": 1 }) e = xml.etree.ElementTree.fromstring(r.content) for entry in e.iter("{http://www.w3.org/2005/Atom}entry"): doi = entry.find("{http://arxiv.org/schemas/atom}doi") if doi is not None: return doi.text return None