Check output before processing the whole file for ISBN and DOI search

This commit is contained in:
Phyks 2014-04-30 00:36:15 +02:00
parent 91685bc46b
commit 3d07af0e71
3 changed files with 35 additions and 25 deletions

View File

@ -108,8 +108,6 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
10. Refactor
11. Use bibtex-parser lib to write bibtex, instead of parsed2BibTex
12. Rebuild function
15. Check output of subprocesses before it ends
16. TODO in files
20. No DOI for arXiv / HAL
30. Parameter to disable remote search
40. Open file

View File

@ -50,19 +50,26 @@ def findISBN(src):
if src.endswith(".pdf"):
totext = subprocess.Popen(["pdftotext", src, "-"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stderr=subprocess.PIPE,
bufsize=1)
elif src.endswith(".djvu"):
totext = subprocess.Popen(["djvutxt", src],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
extractfull = totext.communicate()
# TODO : ^ Return result before processing the whole book ?
if extractfull[1] is not "":
stderr=subprocess.PIPE,
bufsize=1)
while totext.poll() == None:
extractfull = totext.stdin.readline()
extractISBN = isbn_re.search(extractfull.lower().replace('Œ', '-'))
if extractISBN:
totext.terminate()
break
err = totext.communicate()[1]
if totext.returncode > 0:
# Error happened
tools.warning(extractfull[1])
tools.warning(err)
return False
extractfull = extractfull[0]
extractISBN = isbn_re.search(extractfull.lower().replace('Œ', '-'))
cleanISBN = False
# Clean ISBN is the ISBN number without separators
if extractISBN:
@ -103,21 +110,26 @@ def findDOI(src):
totext = subprocess.Popen(["djvutxt", src],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
extractfull = totext.communicate()
# TODO : ^ Return result before full conversion ?
if extractfull[1] is not "":
# Error happened
tools.warning(extractfull[1])
return False
extractfull = extractfull[0]
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
if not extractDOI:
# PNAS fix
extractDOI = doi_pnas_re.search(extractfull.lower().replace('pnas',
'/pnas'))
while totext.poll() == None:
extractfull = totext.stdin.readline()
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
if not extractDOI:
# JSB fix
extractDOI = doi_jsb_re.search(extractfull.lower())
# PNAS fix
extractDOI = doi_pnas_re.search(extractfull.lower().replace('pnas',
'/pnas'))
if not extractDOI:
# JSB fix
extractDOI = doi_jsb_re.search(extractfull.lower())
if extractDOI:
totext.terminate()
break
err = totext.communicate()[1]
if totext.returncode > 0:
# Error happened
tools.warning(err)
return False
cleanDOI = False
if extractDOI:

View File

@ -52,7 +52,7 @@ def tearpage(filename):
# Write pages excepted the first one
output_file = PdfFileWriter()
for i in range(0, num_pages):
for i in range(1, num_pages):
output_file.addPage(input_file.getPage(i))
tmp.close()