Check output before processing the whole file for ISBN and DOI search

This commit is contained in:
Phyks 2014-04-30 00:36:15 +02:00
parent 91685bc46b
commit 3d07af0e71
3 changed files with 35 additions and 25 deletions

View File

@ -108,8 +108,6 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
10. Refactor 10. Refactor
11. Use bibtex-parser lib to write bibtex, instead of parsed2BibTex 11. Use bibtex-parser lib to write bibtex, instead of parsed2BibTex
12. Rebuild function 12. Rebuild function
15. Check output of subprocesses before it ends
16. TODO in files
20. No DOI for arXiv / HAL 20. No DOI for arXiv / HAL
30. Parameter to disable remote search 30. Parameter to disable remote search
40. Open file 40. Open file

View File

@ -50,19 +50,26 @@ def findISBN(src):
if src.endswith(".pdf"): if src.endswith(".pdf"):
totext = subprocess.Popen(["pdftotext", src, "-"], totext = subprocess.Popen(["pdftotext", src, "-"],
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE) stderr=subprocess.PIPE,
bufsize=1)
elif src.endswith(".djvu"): elif src.endswith(".djvu"):
totext = subprocess.Popen(["djvutxt", src], totext = subprocess.Popen(["djvutxt", src],
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE) stderr=subprocess.PIPE,
extractfull = totext.communicate() bufsize=1)
# TODO : ^ Return result before processing the whole book ? while totext.poll() == None:
if extractfull[1] is not "": extractfull = totext.stdin.readline()
extractISBN = isbn_re.search(extractfull.lower().replace('Œ', '-'))
if extractISBN:
totext.terminate()
break
err = totext.communicate()[1]
if totext.returncode > 0:
# Error happened # Error happened
tools.warning(extractfull[1]) tools.warning(err)
return False return False
extractfull = extractfull[0]
extractISBN = isbn_re.search(extractfull.lower().replace('Œ', '-'))
cleanISBN = False cleanISBN = False
# Clean ISBN is the ISBN number without separators # Clean ISBN is the ISBN number without separators
if extractISBN: if extractISBN:
@ -103,21 +110,26 @@ def findDOI(src):
totext = subprocess.Popen(["djvutxt", src], totext = subprocess.Popen(["djvutxt", src],
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE) stderr=subprocess.PIPE)
extractfull = totext.communicate()
# TODO : ^ Return result before full conversion ? while totext.poll() == None:
if extractfull[1] is not "": extractfull = totext.stdin.readline()
# Error happened extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
tools.warning(extractfull[1])
return False
extractfull = extractfull[0]
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
if not extractDOI:
# PNAS fix
extractDOI = doi_pnas_re.search(extractfull.lower().replace('pnas',
'/pnas'))
if not extractDOI: if not extractDOI:
# JSB fix # PNAS fix
extractDOI = doi_jsb_re.search(extractfull.lower()) extractDOI = doi_pnas_re.search(extractfull.lower().replace('pnas',
'/pnas'))
if not extractDOI:
# JSB fix
extractDOI = doi_jsb_re.search(extractfull.lower())
if extractDOI:
totext.terminate()
break
err = totext.communicate()[1]
if totext.returncode > 0:
# Error happened
tools.warning(err)
return False
cleanDOI = False cleanDOI = False
if extractDOI: if extractDOI:

View File

@ -52,7 +52,7 @@ def tearpage(filename):
# Write pages excepted the first one # Write pages excepted the first one
output_file = PdfFileWriter() output_file = PdfFileWriter()
for i in range(0, num_pages): for i in range(1, num_pages):
output_file.addPage(input_file.getPage(i)) output_file.addPage(input_file.getPage(i))
tmp.close() tmp.close()