Bugfixes in fetcher.py + function to find hal id
This commit is contained in:
parent
289c7dece4
commit
787113db66
53
fetcher.py
53
fetcher.py
@ -58,8 +58,11 @@ def findISBN(src):
|
|||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE,
|
stderr=subprocess.PIPE,
|
||||||
bufsize=1)
|
bufsize=1)
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
while totext.poll() is None:
|
while totext.poll() is None:
|
||||||
extractfull = totext.stdin.readline()
|
extractfull = totext.stdout.readline()
|
||||||
extractISBN = isbn_re.search(extractfull.lower().replace('Œ',
|
extractISBN = isbn_re.search(extractfull.lower().replace('Œ',
|
||||||
'-'))
|
'-'))
|
||||||
if extractISBN:
|
if extractISBN:
|
||||||
@ -112,9 +115,11 @@ def findDOI(src):
|
|||||||
totext = subprocess.Popen(["djvutxt", src],
|
totext = subprocess.Popen(["djvutxt", src],
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE)
|
stderr=subprocess.PIPE)
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
while totext.poll() is None:
|
while totext.poll() is None:
|
||||||
extractfull = totext.stdin.readline()
|
extractfull = totext.stdout.readline()
|
||||||
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
|
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
|
||||||
if not extractDOI:
|
if not extractDOI:
|
||||||
# PNAS fix
|
# PNAS fix
|
||||||
@ -182,7 +187,7 @@ arXiv_re = re.compile(r'arXiv:\s*([\w\.\/\-]+)')
|
|||||||
|
|
||||||
|
|
||||||
def findArXivId(src):
|
def findArXivId(src):
|
||||||
"""Search for a valid arXiv id in src.
|
"""Searches for a valid arXiv id in src.
|
||||||
|
|
||||||
Returns the arXiv id or False if not found or an error occurred.
|
Returns the arXiv id or False if not found or an error occurred.
|
||||||
From : https://github.com/minad/bibsync/blob/3fdf121016f6187a2fffc66a73cd33b45a20e55d/lib/bibsync/utils.rb
|
From : https://github.com/minad/bibsync/blob/3fdf121016f6187a2fffc66a73cd33b45a20e55d/lib/bibsync/utils.rb
|
||||||
@ -195,9 +200,11 @@ def findArXivId(src):
|
|||||||
totext = subprocess.Popen(["djvutxt", src],
|
totext = subprocess.Popen(["djvutxt", src],
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE)
|
stderr=subprocess.PIPE)
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
while totext.poll() is None:
|
while totext.poll() is None:
|
||||||
extractfull = totext.stdin.readline()
|
extractfull = totext.stdout.readline()
|
||||||
extractID = arXiv_re.search(extractfull)
|
extractID = arXiv_re.search(extractfull)
|
||||||
if extractID:
|
if extractID:
|
||||||
totext.terminate()
|
totext.terminate()
|
||||||
@ -209,7 +216,7 @@ def findArXivId(src):
|
|||||||
tools.warning(err)
|
tools.warning(err)
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
return extractID
|
return extractID.group(1)
|
||||||
|
|
||||||
|
|
||||||
def arXiv2Bib(arxiv):
|
def arXiv2Bib(arxiv):
|
||||||
@ -224,3 +231,39 @@ def arXiv2Bib(arxiv):
|
|||||||
else:
|
else:
|
||||||
return bib.bibtex()
|
return bib.bibtex()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
HAL_re = re.compile(r'(hal-\d{8}), version (\d+)')
|
||||||
|
|
||||||
|
|
||||||
|
def findHALId(src):
|
||||||
|
"""Searches for a valid HAL id in src
|
||||||
|
|
||||||
|
Returns a tuple of the HAL id and the version
|
||||||
|
or False if not found or an error occurred.
|
||||||
|
"""
|
||||||
|
if src.endswith(".pdf"):
|
||||||
|
totext = subprocess.Popen(["pdftotext", src, "-"],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE)
|
||||||
|
elif src.endswith(".djvu"):
|
||||||
|
totext = subprocess.Popen(["djvutxt", src],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE)
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
while totext.poll() is None:
|
||||||
|
extractfull = totext.stdout.readline()
|
||||||
|
extractID = HAL_re.search(extractfull)
|
||||||
|
if extractID:
|
||||||
|
totext.terminate()
|
||||||
|
break
|
||||||
|
|
||||||
|
err = totext.communicate()[1]
|
||||||
|
if totext.returncode > 0:
|
||||||
|
# Error happened
|
||||||
|
tools.warning(err)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return extractID.group(1), extractID.group(2)
|
||||||
|
Loading…
Reference in New Issue
Block a user