51 lines
1.1 KiB
Python
51 lines
1.1 KiB
Python
"""
|
|
This file contains all the HAL-related functions.
|
|
|
|
TODO:
|
|
* Add functions to homogeneize interface with arXiv one.
|
|
"""
|
|
import re
|
|
|
|
from libbmc import tools
|
|
|
|
|
|
# TODO: This is too restrictive
|
|
REGEX = re.compile(r"((hal-\d{8})((, version (\d+))|v(\d+))?)")
|
|
|
|
|
|
def is_valid(hal_id):
|
|
"""
|
|
Check that a given HAL id is a valid one.
|
|
|
|
:param hal_id: The HAL id to be checked.
|
|
:returns: Boolean indicating whether the HAL id is valid or not.
|
|
|
|
>>> is_valid("hal-01258754, version 1")
|
|
True
|
|
|
|
>>> is_valid("hal-01258754")
|
|
True
|
|
|
|
>>> is_valid("hal-01258754v2")
|
|
True
|
|
|
|
>>> is_valid("foobar")
|
|
False
|
|
"""
|
|
match = REGEX.match(hal_id)
|
|
return (match is not None) and (match.group(0) == hal_id)
|
|
|
|
|
|
def extract_from_text(text):
|
|
"""
|
|
Extract HAL ids from a text.
|
|
|
|
:param text: The text to extract HAL ids from.
|
|
:returns: A list of matching HAL ids.
|
|
|
|
>>> sorted(extract_from_text("hal-01258754 hal-01258754v2 foobar"))
|
|
['hal-01258754', 'hal-01258754v2']
|
|
"""
|
|
return tools.remove_duplicates([i[0]
|
|
for i in REGEX.findall(text) if i != ''])
|