Passing pylint on the module

2016-02-17 16:04:36 +01:00 · 2016-02-17 16:04:36 +01:00 · a2ee654eac
commit a2ee654eac
parent a69e7ef6c1
16 changed files with 282 additions and 262 deletions
--- a/libbmc/init.py
+++ b/libbmc/init.py
@ -1,15 +1,11 @@
+"""
+libbmc
+
+The :mod:`libbmc` is a generic Python library to manage bibliography and play
+with scientific papers.
+"""
+
 # Global list of valid paper identifier types. See README.md.
 __valid_identifiers__ = []

-# Import order of the modules is important, as they will populate
-# `__valid_identifiers__` on load, and the order in this list reflects their
-# priority.
-from libbmc import bibtex, doi, fetcher, isbn  # noqa
-from libbmc import citations, papers, repositories  # noqa
-
 __version__ = "0.1.3.1"
-
-__all__ = [
-    "bibtex", "doi", "fetcher", "isbn",
-    "citations", "papers", "repositories",
-]
--- a/libbmc/citations/init.py
+++ b/libbmc/citations/init.py
@ -1,5 +0,0 @@
-from libbmc.citations import bbl, bibtex, pdf, plaintext
-
-__all__ = [
-    "bbl", "bibtex", "pdf", "plaintext"
-]
--- a/libbmc/citations/bbl.py
+++ b/libbmc/citations/bbl.py
@ -73,7 +73,7 @@ def get_plaintext_citations(bbl):
    return cleaned_bbl


-def get_cited_DOIs(bbl):
+def get_cited_dois(bbl):
    """
    Get the DOIs of the papers cited in a .bbl file.

@ -85,4 +85,4 @@ def get_cited_DOIs(bbl):
    # Get the plaintext citations from the bbl file
    plaintext_citations = get_plaintext_citations(bbl)
    # Use the plaintext citations parser on these citations
-    return plaintext.get_cited_DOIs(plaintext_citations)
+    return plaintext.get_cited_dois(plaintext_citations)
--- a/libbmc/citations/bibtex.py
+++ b/libbmc/citations/bibtex.py
@ -2,15 +2,20 @@
 This files contains all the functions to extract DOIs of citations from
 BibTeX files.
 """
-import bibtexparser
 import os

+
+import bibtexparser
+
 from bibtexparser.bparser import BibTexParser
 from bibtexparser.customization import convert_to_unicode

+
 from libbmc import tools
 from libbmc.citations import plaintext

+# TODO: Use beta.dissem.in with formatted citation
+

 def bibentry_as_plaintext(bibentry):
    """
@ -51,7 +56,7 @@ def get_plaintext_citations(bibtex):
    return bibentries


-def get_cited_DOIs(bibtex):
+def get_cited_dois(bibtex):
    """
    Get the DOIs of the papers cited in a BibTeX file.

@ -71,4 +76,4 @@ def get_cited_DOIs(bibtex):
    # Get the plaintext citations from the bibtex file
    plaintext_citations = get_plaintext_citations(bibtex)
    # Use the plaintext citations parser on these citations
-    return plaintext.get_cited_DOIs(plaintext_citations)
+    return plaintext.get_cited_dois(plaintext_citations)
--- a/libbmc/citations/pdf.py
+++ b/libbmc/citations/pdf.py
@ -3,10 +3,11 @@ This files contains all the functions to extract DOIs of citations from
 PDF files.
 """
 import os
-import requests
 import subprocess
 import xml.etree.ElementTree as ET

+import requests
+
 from requests.exceptions import RequestException

 from libbmc import tools
@ -17,7 +18,7 @@ CERMINE_BASE_URL = "http://cermine.ceon.pl/"
 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))


-def cermine(pdf_file, force_API=False, override_local=None):
+def cermine(pdf_file, force_api=False, override_local=None):
    """
    Run `CERMINE <https://github.com/CeON/CERMINE>`_ to extract metadata from \
            the given PDF file, to retrieve citations (and more) from the \
@ -44,7 +45,7 @@ def cermine(pdf_file, force_API=False, override_local=None):
                the CERMINE API terms.

    :param pdf_file: Path to the PDF file to handle.
-    :param force_API: Force the use of the Cermine API \
+    :param force_api: Force the use of the Cermine API \
            (and do not try to use a local JAR file). Defaults to ``False``.
    :param override_local: Use this specific JAR file, instead of the one at \
            the default location (``libbmc/external/cermine.jar``).
@ -55,23 +56,23 @@ def cermine(pdf_file, force_API=False, override_local=None):
        # Check if we want to load the local JAR from a specific path
        local = override_local
        # Else, try to stat the JAR file at the expected local path
-        if (local is None) and (not force_API):
+        if (local is None) and (not force_api):
            if os.path.isfile(os.path.join(SCRIPT_DIR,
                                           "../external/cermine.jar")):
                local = os.path.join(SCRIPT_DIR,
                                     "../external/cermine.jar")

        # If we want to force the API use, or we could not get a local JAR
-        if force_API or (local is None):
+        if force_api or (local is None):
            print("Using API")
            with open(pdf_file, "rb") as fh:
-                    # Query the API
-                    r = requests.post(
-                        CERMINE_BASE_URL + "extract.do",
-                        headers={"Content-Type": "application/binary"},
-                        files={"file": fh}
-                    )
-                    return r.text
+                # Query the API
+                request = requests.post(
+                    CERMINE_BASE_URL + "extract.do",
+                    headers={"Content-Type": "application/binary"},
+                    files={"file": fh}
+                )
+                return request.text
        # Else, use the local JAR file
        else:
            return subprocess.check_output([
@ -86,7 +87,7 @@ def cermine(pdf_file, force_API=False, override_local=None):
        return None


-def cermine_dois(pdf_file, force_API=False, override_local=None):
+def cermine_dois(pdf_file, force_api=False, override_local=None):
    """
    Run `CERMINE <https://github.com/CeON/CERMINE>`_ to extract DOIs of cited \
            papers from a PDF file.
@ -116,7 +117,7 @@ def cermine_dois(pdf_file, force_API=False, override_local=None):
                try to match them on Crossref to get DOIs.

    :param pdf_file: Path to the PDF file to handle.
-    :param force_API: Force the use of the Cermine API \
+    :param force_api: Force the use of the Cermine API \
            (and do not try to use a local JAR file). Defaults to ``False``.
    :param override_local: Use this specific JAR file, instead of the one at \
            the default location (``libbmc/external/cermine.jar``).
@ -126,7 +127,7 @@ def cermine_dois(pdf_file, force_API=False, override_local=None):
    #    * Do not convert to plain text, but use the extra metadata from
    #      CERMINE
    # Call CERMINE on the PDF file
-    cermine_output = cermine(pdf_file, force_API, override_local)
+    cermine_output = cermine(pdf_file, force_api, override_local)
    # Parse the resulting XML
    root = ET.fromstring(cermine_output)
    plaintext_references = [
@ -136,7 +137,7 @@ def cermine_dois(pdf_file, force_API=False, override_local=None):
            ET.tostring(e, method="text").decode("utf-8").replace(e.text, ""))
        for e in root.iter("mixed-citation")]
    # Call the plaintext methods to fetch DOIs
-    return plaintext.get_cited_DOIs(plaintext_references)
+    return plaintext.get_cited_dois(plaintext_references)


 def grobid(pdf_folder, grobid_home=None, grobid_jar=None):
@ -156,6 +157,8 @@ def grobid(pdf_folder, grobid_home=None, grobid_jar=None):
    :param grobid_jar: Path to the built Grobid JAR file.
    :returns: ``True``, or ``False`` if an error occurred.
    """
+    # TODO: Should be using https://github.com/kermitt2/grobid-example and
+    # BibTeX backend.
    if grobid_home is None or grobid_jar is None:
        # User should pass the correct paths
        return False
@ -234,4 +237,4 @@ def pdfextract_dois(pdf_file):
    root = ET.fromstring(references)
    plaintext_references = [e.text for e in root.iter("reference")]
    # Call the plaintext methods to fetch DOIs
-    return plaintext.get_cited_DOIs(plaintext_references)
+    return plaintext.get_cited_dois(plaintext_references)
--- a/libbmc/citations/plaintext.py
+++ b/libbmc/citations/plaintext.py
@ -37,7 +37,7 @@ def get_plaintext_citations(file):
    return cleaned_citations


-def get_cited_DOIs(file):
+def get_cited_dois(file):
    """
    Get the DOIs of the papers cited in a plaintext file. The file should \
            have one citation per line.
@ -66,29 +66,29 @@ def get_cited_DOIs(file):
    # Try to get the DOI directly from the citation
    for citation in plaintext_citations[:]:
        # Some citations already contain a DOI so try to match it directly
-        matched_DOIs = doi.extract_from_text(citation)
-        if len(matched_DOIs) > 0:
+        matched_dois = doi.extract_from_text(citation)
+        if len(matched_dois) > 0:
            # Add the DOI and go on
-            dois[citation] = next(iter(matched_DOIs))
+            dois[citation] = next(iter(matched_dois))
            continue
        # Same thing for arXiv id
-        matched_arXiv = arxiv.extract_from_text(citation)
-        if len(matched_arXiv) > 0:
+        matched_arxiv = arxiv.extract_from_text(citation)
+        if len(matched_arxiv) > 0:
            # Add the associated DOI and go on
-            dois[citation] = arxiv.to_DOI(next(iter(matched_arXiv)))
+            dois[citation] = arxiv.to_doi(next(iter(matched_arxiv)))
            continue
        # If no match found, stack it for next step
        # Note to remove URLs in the citation as the plaintext citations can
        # contain URLs and they are bad for the CrossRef API.
-        crossref_queue.append(tools.remove_URLs(citation))
+        crossref_queue.append(tools.remove_urls(citation))

    # Do batch with remaining papers, to prevent from the timeout of CrossRef
    for batch in tools.batch(crossref_queue, CROSSREF_MAX_BATCH_SIZE):
        batch = [i for i in batch]
        try:
            # Fetch results from CrossRef
-            r = requests.post(CROSSREF_LINKS_API_URL, json=batch)
-            for result in r.json()["results"]:
+            request = requests.post(CROSSREF_LINKS_API_URL, json=batch)
+            for result in request.json()["results"]:
                # Try to get a DOI
                try:
                    dois[result["text"]] = result["doi"]
--- a/libbmc/doi.py
+++ b/libbmc/doi.py
@ -55,7 +55,7 @@ def is_valid(doi):
    False
    """
    match = REGEX.match(doi)
-    return ((match is not None) and (match.group(0) == doi))
+    return (match is not None) and (match.group(0) == doi)


 def extract_from_text(text):
@ -71,17 +71,17 @@ def extract_from_text(text):
    return tools.remove_duplicates(REGEX.findall(text))


-def to_URL(dois):
+def to_url(dois):
    """
    Convert a list of canonical DOIs to a list of DOIs URLs.

    :param dois: List of canonical DOIs. Can also be a single canonical DOI.
    :returns: A list of DOIs URLs (resp. a single value).

-    >>> to_URL(['10.1209/0295-5075/111/40005'])
+    >>> to_url(['10.1209/0295-5075/111/40005'])
    ['http://dx.doi.org/10.1209/0295-5075/111/40005']

-    >>> to_URL('10.1209/0295-5075/111/40005')
+    >>> to_url('10.1209/0295-5075/111/40005')
    'http://dx.doi.org/10.1209/0295-5075/111/40005'
    """
    if isinstance(dois, list):
@ -110,13 +110,7 @@ def to_canonical(urls):
    >>> to_canonical(['aaaa']) is None
    True
    """
-    try:
-        if isinstance(urls, list):
-            return [next(iter(extract_from_text(url))) for url in urls]
-        else:
-            return next(iter(extract_from_text(urls)))
-    except StopIteration:
-        return None
+    return tools.map_or_apply(extract_from_text, urls)


 def get_oa_version(doi):
@ -134,10 +128,10 @@ def get_oa_version(doi):
    'http://arxiv.org/abs/1506.06690'
    """
    try:
-        r = requests.get("%s%s" % (DISSEMIN_API, doi))
-        r.raise_for_status()
-        result = r.json()
-        assert(result["status"] == "ok")
+        request = requests.get("%s%s" % (DISSEMIN_API, doi))
+        request.raise_for_status()
+        result = request.json()
+        assert result["status"] == "ok"
        return result["paper"]["pdf_url"]
    except (AssertionError, ValueError, KeyError, RequestException):
        return None
@ -162,10 +156,10 @@ def get_oa_policy(doi):
    True
    """
    try:
-        r = requests.get("%s%s" % (DISSEMIN_API, doi))
-        r.raise_for_status()
-        result = r.json()
-        assert(result["status"] == "ok")
+        request = requests.get("%s%s" % (DISSEMIN_API, doi))
+        request.raise_for_status()
+        result = request.json()
+        assert result["status"] == "ok"
        return ([i
                 for i in result["paper"]["publications"]
                 if i["doi"] == doi][0])["policy"]
@ -185,8 +179,8 @@ def get_linked_version(doi):
    'http://stacks.iop.org/0295-5075/111/i=4/a=40005?key=crossref.9ad851948a976ecdf216d4929b0b6f01'
    """
    try:
-        r = requests.head(to_URL(doi))
-        return r.headers.get("location")
+        request = requests.head(to_url(doi))
+        return request.headers.get("location")
    except RequestException:
        return None

@ -206,10 +200,10 @@ def get_bibtex(doi):
    '@article{Verney_2015,\\n\\tdoi = {10.1209/0295-5075/111/40005},\\n\\turl = {http://dx.doi.org/10.1209/0295-5075/111/40005},\\n\\tyear = 2015,\\n\\tmonth = {aug},\\n\\tpublisher = {{IOP} Publishing},\\n\\tvolume = {111},\\n\\tnumber = {4},\\n\\tpages = {40005},\\n\\tauthor = {Lucas Verney and Lev Pitaevskii and Sandro Stringari},\\n\\ttitle = {Hybridization of first and second sound in a weakly interacting Bose gas},\\n\\tjournal = {{EPL}}\\n}'
    """
    try:
-        r = requests.get(to_URL(doi),
-                         headers={"accept": "application/x-bibtex"})
-        r.raise_for_status()
-        assert(r.headers.get("content-type") == "application/x-bibtex")
-        return r.text
+        request = requests.get(to_url(doi),
+                               headers={"accept": "application/x-bibtex"})
+        request.raise_for_status()
+        assert request.headers.get("content-type") == "application/x-bibtex"
+        return request.text
    except (RequestException, AssertionError):
        return None
--- a/libbmc/fetcher.py
+++ b/libbmc/fetcher.py
@ -3,24 +3,89 @@ This file contains functions to download locally some papers, eventually using
 a proxy.
 """
 import socket
-import socks
 import sys
 import urllib

+import socks
+

 # Default socket to use, if no proxy is used
 DEFAULT_SOCKET = socket.socket


-def download(url, proxies=[None]):
+def _download_helper(url):
+    """
+    Handle the download of an URL, using the proxy currently set in \
+            :mod:`socks`.
+
+    :param url: The URL to download.
+    :returns: A tuple of the raw content of the downloaded data and its \
+            associated content-type. Returns None if it was \
+            unable to download the document.
+    """
+    # Try to fetch the URL using the current proxy
+    try:
+        request = urllib.request.urlopen(url)
+        try:
+            size = int(dict(request.info())['content-length'].strip())
+        except KeyError:
+            try:
+                size = int(dict(request.info())['Content-Length'].strip())
+            except KeyError:
+                size = 0
+        # Download the document
+        doc = b""
+        doc_size = 0
+        while True:
+            buf = request.read(1024)
+            if buf:
+                doc += buf
+                doc_size += len(buf)
+                if size != 0:
+                    # Write progress bar on stdout
+                    done = int(50 * doc_size / size)
+                    sys.stdout.write("\r[%s%s]" %
+                                     ('='*done, ' '*(50-done)))
+                    sys.stdout.write(" "+str(int(float(done)/52*100))+"%")
+                    sys.stdout.flush()
+            else:
+                break
+        # Fetch content type
+        contenttype = None
+        contenttype_req = None
+        try:
+            contenttype_req = dict(request.info())['content-type']
+        except KeyError:
+            try:
+                contenttype_req = dict(request.info())['Content-Type']
+            except KeyError:
+                return None
+        if 'pdf' in contenttype_req:
+            contenttype = 'pdf'
+        elif 'djvu' in contenttype_req:
+            contenttype = 'djvu'
+
+        # Check content type and status code are ok
+        if request.getcode() != 200 or contenttype is None:
+            # Else, try with the next available proxy
+            return None
+
+        # Return a tuple of the downloaded content and the content-type
+        return (doc, contenttype)
+    # If an exception occurred, continue with next available proxy
+    except (urllib.error.URLError, socket.error, ValueError):
+        return None
+
+
+def download(url, proxies=None):
    """
    Download a PDF or DJVU document from a url, eventually using proxies.

    :params url: The URL to the PDF/DJVU document to fetch.
    :params proxies: An optional list of proxies to use. Proxies will be \
            used sequentially. Proxies should be a list of proxy strings. \
-            Do not forget to include ``None`` in the list if you want to try \
-            direct fetching without any proxy.
+            Do not forget to include ``""`` (empty string) in the list if \
+            you want to try direct fetching without any proxy.

    :returns: A tuple of the raw content of the downloaded data and its \
            associated content-type. Returns ``(None, None)`` if it was \
@ -28,10 +93,14 @@ def download(url, proxies=[None]):

    >>> download("http://arxiv.org/pdf/1312.4006.pdf") # doctest: +SKIP
    """
+    # Handle default argument
+    if proxies is None:
+        proxies = [""]
+
    # Loop over all available connections
    for proxy in proxies:
        # Handle no proxy case
-        if proxy is None:
+        if proxy == "":
            socket.socket = DEFAULT_SOCKET
        # Handle SOCKS proxy
        elif proxy.startswith('socks'):
@ -55,58 +124,9 @@ def download(url, proxies=[None]):
            socks.set_default_proxy(socks.HTTP, proxy, port)
            socket.socket = socks.socksocket

-        # Try to fetch the URL using the current proxy
-        try:
-            r = urllib.request.urlopen(url)
-            try:
-                size = int(dict(r.info())['content-length'].strip())
-            except KeyError:
-                try:
-                    size = int(dict(r.info())['Content-Length'].strip())
-                except KeyError:
-                    size = 0
-            # Download the document
-            dl = b""
-            dl_size = 0
-            while True:
-                buf = r.read(1024)
-                if buf:
-                    dl += buf
-                    dl_size += len(buf)
-                    if size != 0:
-                        # Write progress bar on stdout
-                        done = int(50 * dl_size / size)
-                        sys.stdout.write("\r[%s%s]" %
-                                         ('='*done, ' '*(50-done)))
-                        sys.stdout.write(" "+str(int(float(done)/52*100))+"%")
-                        sys.stdout.flush()
-                else:
-                    break
-            # Fetch content type
-            contenttype = False
-            contenttype_req = None
-            try:
-                contenttype_req = dict(r.info())['content-type']
-            except KeyError:
-                try:
-                    contenttype_req = dict(r.info())['Content-Type']
-                except KeyError:
-                    continue
-            if 'pdf' in contenttype_req:
-                contenttype = 'pdf'
-            elif 'djvu' in contenttype_req:
-                contenttype = 'djvu'
-
-            # Check content type and status code are ok
-            if r.getcode() != 200 or contenttype is False:
-                # Else, try with the next available proxy
-                continue
-
-            # Return a tuple of the downloaded content and the content-type
-            return (dl, contenttype)
-        # If an exception occurred, continue with next available proxy
-        except (urllib.error.URLError, socket.error, ValueError):
-            continue
+        downloaded = _download_helper(url)
+        if downloaded is not None:
+            return downloaded

    # In case of running out of proxies, return (None, None)
    return (None, None)
--- a/libbmc/isbn.py
+++ b/libbmc/isbn.py
@ -11,11 +11,11 @@ from libbmc import __valid_identifiers__
 __valid_identifiers__ += ["isbn"]


-def is_valid(isbn):
+def is_valid(isbn_id):
    """
    Check that a given string is a valid ISBN.

-    :param isbn: the isbn to be checked.
+    :param isbn_id: the isbn to be checked.
    :returns: boolean indicating whether the isbn is valid or not.

    >>> is_valid("978-3-16-148410-0")
@ -43,9 +43,9 @@ def is_valid(isbn):
    True
    """
    return (
-        (not isbnlib.notisbn(isbn)) and (
-            isbnlib.get_canonical_isbn(isbn) == isbn or
-            isbnlib.mask(isbnlib.get_canonical_isbn(isbn)) == isbn)
+        (not isbnlib.notisbn(isbn_id)) and (
+            isbnlib.get_canonical_isbn(isbn_id) == isbn_id or
+            isbnlib.mask(isbnlib.get_canonical_isbn(isbn_id)) == isbn_id)
    )


@ -64,28 +64,28 @@ def extract_from_text(text):
    return [i for i in isbns if i is not None]


-def get_bibtex(isbn):
+def get_bibtex(isbn_identifier):
    """
    Get a BibTeX string for the given ISBN.

-    :param isbn: ISBN to fetch BibTeX entry for.
+    :param isbn_identifier: ISBN to fetch BibTeX entry for.
    :returns: A BibTeX string or ``None`` if could not fetch it.

    >>> get_bibtex('9783161484100')
    '@book{9783161484100,\\n     title = {Berkeley, Oakland: Albany, Emeryville, Alameda, Kensington},\\n    author = {Peekaboo Maps},\\n      isbn = {9783161484100},\\n      year = {2009},\\n publisher = {Peek A Boo Maps}\\n}'
    """
    # Try to find the BibTeX using associated DOIs
-    bibtex = doi.get_bibtex(to_DOI(isbn))
+    bibtex = doi.get_bibtex(to_doi(isbn_identifier))
    if bibtex is None:
        # In some cases, there are no DOIs for a given ISBN. In this case, try
        # to fetch bibtex directly from the ISBN, using a combination of
        # Google Books and worldcat.org results.
        bibtex = isbnlib.registry.bibformatters['bibtex'](
-            isbnlib.meta(isbn, 'default'))
+            isbnlib.meta(isbn_identifier, 'default'))
    return bibtex


-def to_DOI(isbn):
+def to_doi(isbn_identifier):
    """
    Make a DOI out of the given ISBN.

@ -94,16 +94,16 @@ def to_DOI(isbn):
        See https://github.com/xlcnd/isbnlib#note. The returned DOI may not be
        issued yet.

-    :param isbn: A valid ISBN string.
+    :param isbn_identifier: A valid ISBN string.
    :returns: A DOI as string.

-    >>> to_DOI('9783161484100')
+    >>> to_doi('9783161484100')
    '10.978.316/1484100'
    """
-    return isbnlib.doi(isbn)
+    return isbnlib.doi(isbn_identifier)


-def from_DOI(doi):
+def from_doi(doi_identifier):
    """
    Make an ISBN out of the given DOI.

@ -119,10 +119,10 @@ def from_DOI(doi):
        issued yet (it is a valid one, but not necessary corresponding to a
        valid book).

-    :param doi: A valid canonical DOI.
+    :param doi_identifier: A valid canonical DOI.
    :returns: An ISBN string.

-    >>> from_DOI('10.978.316/1484100')
+    >>> from_doi('10.978.316/1484100')
    '9783161484100'
    """
-    return "".join(c for c in doi[2:] if c in "0123456789xX")
+    return "".join(c for c in doi_identifier[2:] if c in "0123456789xX")
--- a/libbmc/papers/init.py
+++ b/libbmc/papers/init.py
@ -1,6 +0,0 @@
-from libbmc.papers import identifiers
-
-__all__ = [
-    "identifiers",
-    "tearpages"
-]
--- a/libbmc/papers/identifiers.py
+++ b/libbmc/papers/identifiers.py
@ -13,8 +13,8 @@ import sys
 from libbmc import __valid_identifiers__

 # Import all the modules associated to __valid_identifiers__
-for type in __valid_identifiers__:
-    importlib.import_module("libbmc.%s" % (type,))
+for valid_identifier in __valid_identifiers__:
+    importlib.import_module("libbmc.%s" % (valid_identifier,))


 def find_identifiers(src):
@ -53,18 +53,19 @@ def find_identifiers(src):

    while totext.poll() is None:
        extract_full = ' '.join([i.decode("utf-8").strip()
-                                for i in totext.stdout.readlines()])
+                                 for i in totext.stdout.readlines()])
        # Loop over all the valid identifier types
-        for type in __valid_identifiers__:
+        for identifier in __valid_identifiers__:
            # Dynamically call the ``extract_from_text`` method for the
            # associated module.
-            m = sys.modules.get("libbmc.%s" % (type,), None)
-            if m is None:
+            module = sys.modules.get("libbmc.%s" % (identifier,), None)
+            if module is None:
                continue
-            found_id = getattr(m, "extract_from_text")(extract_full)
+            found_id = getattr(module, "extract_from_text")(extract_full)
            if found_id:
                totext.terminate()
-                return (type, found_id[0])  # found_id is a list of found IDs
+                # found_id is a list of found IDs
+                return (identifier, found_id[0])
    return (None, None)


@ -80,12 +81,12 @@ def get_bibtex(identifier):
    :returns: A BibTeX string or ``None`` if an error occurred.
    # TODO: Should return a BiBTeX object?
    """
-    type, id = identifier
-    if type not in __valid_identifiers__:
+    identifier_type, identifier_id = identifier
+    if identifier_type not in __valid_identifiers__:
        return None

    # Dynamically call the ``get_bibtex`` method from the associated module.
-    m = sys.modules.get("libbmc.%s" % (type,), None)
-    if m is None:
+    module = sys.modules.get("libbmc.%s" % (identifier_type,), None)
+    if module is None:
        return None
-    return getattr(m, "get_bibtex")(id)
+    return getattr(module, "get_bibtex")(identifier_id)
--- a/libbmc/papers/tearpages.py
+++ b/libbmc/papers/tearpages.py
@ -21,7 +21,7 @@ BAD_JOURNALS = {
 }


-def fixPdf(pdfFile, destination):
+def fix_pdf(pdf_file, destination):
    """
    Fix malformed pdf files when data are present after '%%EOF'

@ -33,18 +33,16 @@ def fixPdf(pdfFile, destination):
    :param destination: destination
    """
    tmp = tempfile.NamedTemporaryFile()
-    output = open(tmp.name, 'wb')
-    with open(pdfFile, "rb") as fh:
-        with open(pdfFile, "rb") as fh:
+    with open(tmp.name, 'wb') as output:
+        with open(pdf_file, "rb") as fh:
            for line in fh:
                output.write(line)
                if b'%%EOF' in line:
                    break
-    output.close()
    shutil.copy(tmp.name, destination)


-def tearpage_backend(filename, teared_pages=[0]):
+def tearpage_backend(filename, teared_pages=None):
    """
    Copy filename to a tempfile, write pages to filename except the teared one.

@ -56,29 +54,35 @@ def tearpage_backend(filename, teared_pages=[0]):
    :param teared_pages: Numbers of the pages to tear. Default to first page \
            only.
    """
+    # Handle default argument
+    if teared_pages is None:
+        teared_pages = [0]
+
    # Copy the pdf to a tmp file
-    tmp = tempfile.NamedTemporaryFile()
-    shutil.copy(filename, tmp.name)
+    with tempfile.NamedTemporaryFile() as tmp:
+        # Copy the input file to tmp
+        shutil.copy(filename, tmp.name)

-    # Read the copied pdf
-    try:
-        input_file = PdfFileReader(open(tmp.name, 'rb'))
-    except PdfReadError:
-        fixPdf(filename, tmp.name)
-        input_file = PdfFileReader(open(tmp.name, 'rb'))
-    # Seek for the number of pages
-    num_pages = input_file.getNumPages()
+        # Read the copied pdf
+        # TODO: Use with syntax
+        try:
+            input_file = PdfFileReader(open(tmp.name, 'rb'))
+        except PdfReadError:
+            fix_pdf(filename, tmp.name)
+            input_file = PdfFileReader(open(tmp.name, 'rb'))
+        # Seek for the number of pages
+        num_pages = input_file.getNumPages()

-    # Write pages excepted the first one
-    output_file = PdfFileWriter()
-    for i in range(num_pages):
-        if i in teared_pages:
-            continue
-        output_file.addPage(input_file.getPage(i))
+        # Write pages excepted the first one
+        output_file = PdfFileWriter()
+        for i in range(num_pages):
+            if i in teared_pages:
+                continue
+            output_file.addPage(input_file.getPage(i))

-    tmp.close()
-    outputStream = open(filename, "wb")
-    output_file.write(outputStream)
+        tmp.close()
+        outputStream = open(filename, "wb")
+        output_file.write(outputStream)


 def tearpage_needed(bibtex):
@ -89,16 +93,16 @@ def tearpage_needed(bibtex):
            whether tearing is needed.
    :returns: A list of pages to tear.
    """
-    for p in BAD_JOURNALS:
-        if p in bibtex.get("journal", "").lower():
+    for publisher in BAD_JOURNALS:
+        if publisher in bibtex.get("journal", "").lower():
            # Bad journal is found, add pages to tear
-            return BAD_JOURNALS[p]
+            return BAD_JOURNALS[publisher]

    # If no bad journals are found, return an empty list
    return []


-def tearpage(filename, bibtex=None, force=False):
+def tearpage(filename, bibtex=None, force=None):
    """
    Tear some pages of the file if needed.

@ -112,7 +116,7 @@ def tearpage(filename, bibtex=None, force=False):
    """
    # Fetch pages to tear
    pages_to_tear = []
-    if force is not False:
+    if force is not None:
        pages_to_tear = force
    elif bibtex is not None:
        pages_to_tear = tearpage_needed(bibtex)
--- a/libbmc/repositories/init.py
+++ b/libbmc/repositories/init.py
@ -1,5 +0,0 @@
-from libbmc.repositories import arxiv, hal
-
-__all__ = [
-    "arxiv", "hal"
-]
--- a/libbmc/repositories/arxiv.py
+++ b/libbmc/repositories/arxiv.py
@ -1,15 +1,18 @@
 """
 This file contains all the arXiv-related functions.
 """
-import arxiv2bib
-import bibtexparser
 import io
 import re
-import requests
 import tarfile
 import xml.etree.ElementTree

 from urllib.error import HTTPError
+
+
+import arxiv2bib
+import bibtexparser
+import requests
+
 from requests.exceptions import RequestException


@ -268,7 +271,7 @@ def is_valid(arxiv_id):
    False
    """
    match = REGEX.match(arxiv_id)
-    return ((match is not None) and (match.group(0) == arxiv_id))
+    return  (match is not None) and (match.group(0) == arxiv_id)


 def get_bibtex(arxiv_id):
@ -320,17 +323,17 @@ def extract_from_text(text):
                                    for i in REGEX.findall(text) if i[0] != ''])


-def to_URL(arxiv_ids):
+def to_url(arxiv_ids):
    """
    Convert a list of canonical DOIs to a list of DOIs URLs.

    :param dois: List of canonical DOIs.
    :returns: A list of DOIs URLs.

-    >>> to_URL('1506.06690')
+    >>> to_url('1506.06690')
    'http://arxiv.org/abs/1506.06690'

-    >>> to_URL('1506.06690v1')
+    >>> to_url('1506.06690v1')
    'http://arxiv.org/abs/1506.06690v1'
    """
    if isinstance(arxiv_ids, list):
@ -358,16 +361,10 @@ def to_canonical(urls):
    >>> to_canonical('aaa') is None
    True
    """
-    try:
-        if isinstance(urls, list):
-            return [next(iter(extract_from_text(url))) for url in urls]
-        else:
-            return next(iter(extract_from_text(urls)))
-    except StopIteration:
-        return None
+    return tools.map_or_apply(extract_from_text, urls)


-def from_DOI(doi):
+def from_doi(doi):
    """
    Get the arXiv eprint id for a given DOI.

@ -379,29 +376,29 @@ def from_DOI(doi):
    :param doi: The DOI of the resource to look for.
    :returns: The arXiv eprint id, or ``None`` if not found.

-    >>> from_DOI('10.1209/0295-5075/111/40005')
+    >>> from_doi('10.1209/0295-5075/111/40005')
    # Note: Test do not pass due to an arXiv API bug.
    '1506.06690'
    """
    try:
-        r = requests.get("http://export.arxiv.org/api/query",
-                         params={
-                             "search_query": "doi:%s" % (doi,),
-                             "max_results": 1
-                         })
-        r.raise_for_status()
+        request = requests.get("http://export.arxiv.org/api/query",
+                               params={
+                                   "search_query": "doi:%s" % (doi,),
+                                   "max_results": 1
+                               })
+        request.raise_for_status()
    except RequestException:
        return None
-    e = xml.etree.ElementTree.fromstring(r.content)
-    for entry in e.iter("{http://www.w3.org/2005/Atom}entry"):
-        id = entry.find("{http://www.w3.org/2005/Atom}id").text
-        # id is an arXiv full URL. We only want the id which is the last URL
-        # component.
-        return id.split("/")[-1]
+    root = xml.etree.ElementTree.fromstring(request.content)
+    for entry in root.iter("{http://www.w3.org/2005/Atom}entry"):
+        arxiv_id = entry.find("{http://www.w3.org/2005/Atom}id").text
+        # arxiv_id is an arXiv full URL. We only want the id which is the last
+        # URL component.
+        return arxiv_id.split("/")[-1]
    return None


-def to_DOI(arxiv_id):
+def to_doi(arxiv_id):
    """
    Get the associated DOI for a given arXiv eprint.

@ -413,23 +410,23 @@ def to_DOI(arxiv_id):
    :param eprint: The arXiv eprint id.
    :returns: The DOI if any, or ``None``.

-    >>> to_DOI('1506.06690v1')
+    >>> to_doi('1506.06690v1')
    '10.1209/0295-5075/111/40005'

-    >>> to_DOI('1506.06690')
+    >>> to_doi('1506.06690')
    '10.1209/0295-5075/111/40005'
    """
    try:
-        r = requests.get("http://export.arxiv.org/api/query",
-                         params={
-                             "id_list": arxiv_id,
-                             "max_results": 1
-                         })
-        r.raise_for_status()
+        request = requests.get("http://export.arxiv.org/api/query",
+                               params={
+                                   "id_list": arxiv_id,
+                                   "max_results": 1
+                               })
+        request.raise_for_status()
    except RequestException:
        return None
-    e = xml.etree.ElementTree.fromstring(r.content)
-    for entry in e.iter("{http://www.w3.org/2005/Atom}entry"):
+    root = xml.etree.ElementTree.fromstring(request.content)
+    for entry in root.iter("{http://www.w3.org/2005/Atom}entry"):
        doi = entry.find("{http://arxiv.org/schemas/atom}doi")
        if doi is not None:
            return doi.text
@ -451,9 +448,9 @@ def get_sources(arxiv_id):
            ``None``.
    """
    try:
-        r = requests.get(ARXIV_EPRINT_URL.format(arxiv_id=arxiv_id))
-        r.raise_for_status()
-        file_object = io.BytesIO(r.content)
+        request = requests.get(ARXIV_EPRINT_URL.format(arxiv_id=arxiv_id))
+        request.raise_for_status()
+        file_object = io.BytesIO(request.content)
        return tarfile.open(fileobj=file_object)
    except (RequestException, AssertionError, tarfile.TarError):
        return None
@ -473,9 +470,9 @@ def get_bbl(arxiv_id):
    :returns: A list of the full text of the ``.bbl`` files (if any) \
            or ``None``.
    """
-    tf = get_sources(arxiv_id)
-    bbl_files = [i for i in tf.getmembers() if i.name.endswith(".bbl")]
-    bbl_files = [tf.extractfile(member).read().decode(tarfile.ENCODING)
+    tar_file = get_sources(arxiv_id)
+    bbl_files = [i for i in tar_file.getmembers() if i.name.endswith(".bbl")]
+    bbl_files = [tar_file.extractfile(member).read().decode(tarfile.ENCODING)
                 for member in bbl_files]
    return bbl_files

@ -498,5 +495,5 @@ def get_citations(arxiv_id):
    bbl_files = get_bbl(arxiv_id)
    for bbl_file in bbl_files:
        # Fetch the cited DOIs for each of the bbl files
-        dois.update(bbl.get_cited_DOIs(bbl_file))
+        dois.update(bbl.get_cited_dois(bbl_file))
    return dois
--- a/libbmc/repositories/hal.py
+++ b/libbmc/repositories/hal.py
@ -33,7 +33,7 @@ def is_valid(hal_id):
    False
    """
    match = REGEX.match(hal_id)
-    return ((match is not None) and (match.group(0) == hal_id))
+    return (match is not None) and (match.group(0) == hal_id)


 def extract_from_text(text):
--- a/libbmc/tools.py
+++ b/libbmc/tools.py
@ -9,9 +9,11 @@ from itertools import islice, chain

 # Huge URL regex taken from https://gist.github.com/gruber/8891611
 URL_REGEX = re.compile(r"(?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)/)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)\b/?(?!@)))")
+_SLUGIFY_STRIP_RE = re.compile(r'[^\w\s-]')
+_SLUGIFY_HYPHENATE_RE = re.compile(r'[\s]+')


-def replaceAll(text, replace_dict):
+def replace_all(text, replace_dict):
    """
    Replace multiple strings in a text.

@ -26,7 +28,7 @@ def replaceAll(text, replace_dict):
            substitution.
    :returns: Text after replacements.

-    >>> replaceAll("foo bar foo thing", {"foo": "oof", "bar": "rab"})
+    >>> replace_all("foo bar foo thing", {"foo": "oof", "bar": "rab"})
    'oof rab oof thing'
    """
    for i, j in replace_dict.items():
@ -34,6 +36,24 @@ def replaceAll(text, replace_dict):
    return text


+def map_or_apply(function, param):
+    """
+    Map the function on ``param``, or apply it, depending whether ``param`` \
+            is a list or an item.
+
+    :param function: The function to apply.
+    :param param: The parameter to feed the function with (list or item).
+    :returns: The computed value or ``None``.
+    """
+    try:
+        if isinstance(param, list):
+            return [next(iter(function(i))) for i in param]
+        else:
+            return next(iter(function(param)))
+    except StopIteration:
+        return None
+
+
 def clean_whitespaces(text):
    """
    Remove multiple whitespaces from text. Also removes leading and trailing \
@ -85,13 +105,13 @@ def batch(iterable, size):
    >>> [list(i) for i in batch([1, 2, 3, 4, 5], 2)]
    [[1, 2], [3, 4], [5]]
    """
-    it = iter(iterable)
+    item = iter(iterable)
    while True:
-        bi = islice(it, size)
-        yield chain([next(bi)], bi)
+        batch_iterator = islice(item, size)
+        yield chain([next(batch_iterator)], batch_iterator)


-def remove_URLs(text):
+def remove_urls(text):
    """
    Remove URLs from a given text (only removes http, https and naked domains \
    URLs).
@ -99,16 +119,12 @@ def remove_URLs(text):
    :param text: The text to remove URLs from.
    :returns: The text without URLs.

-    >>> remove_URLs("foobar http://example.com https://example.com foobar")
+    >>> remove_urls("foobar http://example.com https://example.com foobar")
    'foobar foobar'
    """
    return clean_whitespaces(URL_REGEX.sub("", text))


-_slugify_strip_re = re.compile(r'[^\w\s-]')
-_slugify_hyphenate_re = re.compile(r'[\s]+')
-
-
 def slugify(value):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
@ -127,5 +143,5 @@ def slugify(value):
        value = unicode_type(value)
    value = (unicodedata.normalize('NFKD', value).
             encode('ascii', 'ignore').decode('ascii'))
-    value = unicode_type(_slugify_strip_re.sub('', value).strip())
-    return _slugify_hyphenate_re.sub('_', value)
+    value = unicode_type(_SLUGIFY_STRIP_RE.sub('', value).strip())
+    return _SLUGIFY_HYPHENATE_RE.sub('_', value)