Search for Digital Object Identifier as well as DOI in text.

If the paper identifier is marked with Digital Object Identifier, but
one or more of its references has a DOI link in it, then the reference
DOI is taken as the paper one. This change replaces the words Digital
Object Identifier with DOI in the text being searched to pull out the
correct ID.
This commit is contained in:
Blair Bonnett 2015-12-07 15:39:57 +13:00
parent 5f8665940d
commit 330c2f2b5f

View File

@ -213,15 +213,14 @@ def findArticleID(src, only=["DOI", "arXiv"]):
extractfull += ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()])
# Try to extract DOI
if "DOI" in only:
extractID = doi_re.search(extractfull.lower().replace('Œ', '-'))
extractlower = extractfull.lower().replace('digital object identifier', 'doi')
extractID = doi_re.search(extractlower.replace('Œ', '-'))
if not extractID:
# PNAS fix
extractID = doi_pnas_re.search(extractfull.
lower().
replace('pnas', '/pnas'))
extractID = doi_pnas_re.search(extractlower.replace('pnas', '/pnas'))
if not extractID:
# JSB fix
extractID = doi_jsb_re.search(extractfull.lower())
extractID = doi_jsb_re.search(extractlower)
if extractID:
extract_type = "DOI"
totext.terminate()