Merge pull request #33 from bcbnz/fixdoisearch

Search for Digital Object Identifier as well as DOI in text.
This commit is contained in:
Lucas Verney 2015-12-07 19:08:20 +01:00
commit c84159068c

View File

@ -213,15 +213,14 @@ def findArticleID(src, only=["DOI", "arXiv"]):
extractfull += ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()])
# Try to extract DOI
if "DOI" in only:
extractID = doi_re.search(extractfull.lower().replace('Œ', '-'))
extractlower = extractfull.lower().replace('digital object identifier', 'doi')
extractID = doi_re.search(extractlower.replace('Œ', '-'))
if not extractID:
# PNAS fix
extractID = doi_pnas_re.search(extractfull.
lower().
replace('pnas', '/pnas'))
extractID = doi_pnas_re.search(extractlower.replace('pnas', '/pnas'))
if not extractID:
# JSB fix
extractID = doi_jsb_re.search(extractfull.lower())
extractID = doi_jsb_re.search(extractlower)
if extractID:
extract_type = "DOI"
totext.terminate()