diff --git a/libbmc/papers/identifiers.py b/libbmc/papers/identifiers.py
new file mode 100644
index 0000000..a0c408d
--- /dev/null
+++ b/libbmc/papers/identifiers.py
@@ -0,0 +1,64 @@
+"""
+This file contains various functions to fetch unique identifiers from papers
+(DOIs, arXiv id etc).
+
+Needs pdftotext and/or djvutxt installed on the machine.
+"""
+import subprocess
+
+from libbmc import doi, isbn
+from libbmc.repositories import arxiv, hal
+
+
+def find_identifiers(src):
+    """
+    Search for a valid identifier (DOI, ISBN, arXiv, HAL) in a given file.
+
+    .. note ::
+
+        This function returns the first matching identifier, that is the most
+        likely to be relevant for this file. However, it may fail and return an
+        identifier taken from the references or another paper.
+
+    :params src: Path to the file to scan.
+
+    :returns: a tuple (type, identifier) or ``None`` if not found or \
+            an error occurred.
+    """
+    if src.endswith(".pdf"):
+        totext = subprocess.Popen(["pdftotext", src, "-"],
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE,
+                                  bufsize=1)
+    elif src.endswith(".djvu"):
+        totext = subprocess.Popen(["djvutxt", src],
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE,
+                                  bufsize=1)
+    else:
+        return None
+
+    while totext.poll() is None:
+        extract_full = ' '.join([i.decode("utf-8").strip()
+                                for i in totext.stdout.readlines()])
+        found_isbn = isbn.extract_from_text(extract_full)
+        if isbn:
+            totext.terminate()
+            return ("isbn", found_isbn)
+
+        found_doi = doi.extract_from_text(extract_full)
+        if doi:
+            totext.terminate()
+            return ("doi", found_doi)
+
+        found_arxiv = arxiv.extract_from_text(extract_full)
+        if arxiv:
+            totext.terminate()
+            return ("arxiv", found_arxiv)
+
+        found_hal = hal.extract_from_text(extract_full)
+        if hal:
+            totext.terminate()
+            return ("hal", found_hal)
+
+    return None
diff --git a/libbmc/repositories/arxiv.py b/libbmc/repositories/arxiv.py
index 7862be5..acf6a6f 100644
--- a/libbmc/repositories/arxiv.py
+++ b/libbmc/repositories/arxiv.py
@@ -189,7 +189,7 @@ def is_valid(arxiv_id):
 
 def get_bibtex(arxiv_id):
     """
-    Get a BibTeX entry for a given DOI.
+    Get a BibTeX entry for a given arXiv ID.
 
     .. note::
 
diff --git a/libbmc/repositories/hal.py b/libbmc/repositories/hal.py
index 4640904..5e36165 100644
--- a/libbmc/repositories/hal.py
+++ b/libbmc/repositories/hal.py
@@ -1 +1,33 @@
-# TODO
+"""
+This file contains all the HAL-related functions.
+
+TODO:
+    * Add functions to homogeneize interface with arXiv one.
+"""
+import re
+
+from libbmc import tools
+
+
+REGEX = re.compile(r"(hal-\d{8}), version (\d+)")
+
+
+def is_valid(hal_id):
+    """
+    Check that a given HAL id is a valid one.
+
+    :param hal_id: The HAL id to be checked.
+    :returns: Boolean indicating whether the HAL id is valid or not.
+    """
+    match = REGEX.match(hal_id)
+    return ((match is not None) and (match.group(0) == hal_id))
+
+
+def extract_from_text(text):
+    """
+    Extract HAL ids from a text.
+
+    :param text: The text to extract HAL ids from.
+    :returns: A list of matching HAL ids.
+    """
+    return tools.remove_duplicates(REGEX.findall(text))