Check output before processing the whole file for ISBN and DOI search
This commit is contained in:
parent
91685bc46b
commit
3d07af0e71
@ -108,8 +108,6 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
|
||||
10. Refactor
|
||||
11. Use bibtex-parser lib to write bibtex, instead of parsed2BibTex
|
||||
12. Rebuild function
|
||||
15. Check output of subprocesses before it ends
|
||||
16. TODO in files
|
||||
20. No DOI for arXiv / HAL
|
||||
30. Parameter to disable remote search
|
||||
40. Open file
|
||||
|
44
fetcher.py
44
fetcher.py
@ -50,19 +50,26 @@ def findISBN(src):
|
||||
if src.endswith(".pdf"):
|
||||
totext = subprocess.Popen(["pdftotext", src, "-"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
stderr=subprocess.PIPE,
|
||||
bufsize=1)
|
||||
elif src.endswith(".djvu"):
|
||||
totext = subprocess.Popen(["djvutxt", src],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
extractfull = totext.communicate()
|
||||
# TODO : ^ Return result before processing the whole book ?
|
||||
if extractfull[1] is not "":
|
||||
# Error happened
|
||||
tools.warning(extractfull[1])
|
||||
return False
|
||||
extractfull = extractfull[0]
|
||||
stderr=subprocess.PIPE,
|
||||
bufsize=1)
|
||||
while totext.poll() == None:
|
||||
extractfull = totext.stdin.readline()
|
||||
extractISBN = isbn_re.search(extractfull.lower().replace('Œ', '-'))
|
||||
if extractISBN:
|
||||
totext.terminate()
|
||||
break
|
||||
|
||||
err = totext.communicate()[1]
|
||||
if totext.returncode > 0:
|
||||
# Error happened
|
||||
tools.warning(err)
|
||||
return False
|
||||
|
||||
cleanISBN = False
|
||||
# Clean ISBN is the ISBN number without separators
|
||||
if extractISBN:
|
||||
@ -103,13 +110,9 @@ def findDOI(src):
|
||||
totext = subprocess.Popen(["djvutxt", src],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
extractfull = totext.communicate()
|
||||
# TODO : ^ Return result before full conversion ?
|
||||
if extractfull[1] is not "":
|
||||
# Error happened
|
||||
tools.warning(extractfull[1])
|
||||
return False
|
||||
extractfull = extractfull[0]
|
||||
|
||||
while totext.poll() == None:
|
||||
extractfull = totext.stdin.readline()
|
||||
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
|
||||
if not extractDOI:
|
||||
# PNAS fix
|
||||
@ -118,6 +121,15 @@ def findDOI(src):
|
||||
if not extractDOI:
|
||||
# JSB fix
|
||||
extractDOI = doi_jsb_re.search(extractfull.lower())
|
||||
if extractDOI:
|
||||
totext.terminate()
|
||||
break
|
||||
|
||||
err = totext.communicate()[1]
|
||||
if totext.returncode > 0:
|
||||
# Error happened
|
||||
tools.warning(err)
|
||||
return False
|
||||
|
||||
cleanDOI = False
|
||||
if extractDOI:
|
||||
|
@ -52,7 +52,7 @@ def tearpage(filename):
|
||||
|
||||
# Write pages excepted the first one
|
||||
output_file = PdfFileWriter()
|
||||
for i in range(0, num_pages):
|
||||
for i in range(1, num_pages):
|
||||
output_file.addPage(input_file.getPage(i))
|
||||
|
||||
tmp.close()
|
||||
|
Loading…
Reference in New Issue
Block a user