From 330c2f2b5f2a1bd78473c9024563fa51b38e7df6 Mon Sep 17 00:00:00 2001 From: Blair Bonnett Date: Mon, 7 Dec 2015 15:39:57 +1300 Subject: [PATCH] Search for Digital Object Identifier as well as DOI in text. If the paper identifier is marked with Digital Object Identifier, but one or more of its references has a DOI link in it, then the reference DOI is taken as the paper one. This change replaces the words Digital Object Identifier with DOI in the text being searched to pull out the correct ID. --- libbmc/fetcher.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/libbmc/fetcher.py b/libbmc/fetcher.py index ef0c929..29fb716 100644 --- a/libbmc/fetcher.py +++ b/libbmc/fetcher.py @@ -213,15 +213,14 @@ def findArticleID(src, only=["DOI", "arXiv"]): extractfull += ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()]) # Try to extract DOI if "DOI" in only: - extractID = doi_re.search(extractfull.lower().replace('Œ', '-')) + extractlower = extractfull.lower().replace('digital object identifier', 'doi') + extractID = doi_re.search(extractlower.replace('Œ', '-')) if not extractID: # PNAS fix - extractID = doi_pnas_re.search(extractfull. - lower(). - replace('pnas', '/pnas')) + extractID = doi_pnas_re.search(extractlower.replace('pnas', '/pnas')) if not extractID: # JSB fix - extractID = doi_jsb_re.search(extractfull.lower()) + extractID = doi_jsb_re.search(extractlower) if extractID: extract_type = "DOI" totext.terminate()