Check output before processing the whole file for ISBN and DOI search
This commit is contained in:
parent
91685bc46b
commit
3d07af0e71
@ -108,8 +108,6 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
|
|||||||
10. Refactor
|
10. Refactor
|
||||||
11. Use bibtex-parser lib to write bibtex, instead of parsed2BibTex
|
11. Use bibtex-parser lib to write bibtex, instead of parsed2BibTex
|
||||||
12. Rebuild function
|
12. Rebuild function
|
||||||
15. Check output of subprocesses before it ends
|
|
||||||
16. TODO in files
|
|
||||||
20. No DOI for arXiv / HAL
|
20. No DOI for arXiv / HAL
|
||||||
30. Parameter to disable remote search
|
30. Parameter to disable remote search
|
||||||
40. Open file
|
40. Open file
|
||||||
|
56
fetcher.py
56
fetcher.py
@ -50,19 +50,26 @@ def findISBN(src):
|
|||||||
if src.endswith(".pdf"):
|
if src.endswith(".pdf"):
|
||||||
totext = subprocess.Popen(["pdftotext", src, "-"],
|
totext = subprocess.Popen(["pdftotext", src, "-"],
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE)
|
stderr=subprocess.PIPE,
|
||||||
|
bufsize=1)
|
||||||
elif src.endswith(".djvu"):
|
elif src.endswith(".djvu"):
|
||||||
totext = subprocess.Popen(["djvutxt", src],
|
totext = subprocess.Popen(["djvutxt", src],
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE)
|
stderr=subprocess.PIPE,
|
||||||
extractfull = totext.communicate()
|
bufsize=1)
|
||||||
# TODO : ^ Return result before processing the whole book ?
|
while totext.poll() == None:
|
||||||
if extractfull[1] is not "":
|
extractfull = totext.stdin.readline()
|
||||||
|
extractISBN = isbn_re.search(extractfull.lower().replace('Œ', '-'))
|
||||||
|
if extractISBN:
|
||||||
|
totext.terminate()
|
||||||
|
break
|
||||||
|
|
||||||
|
err = totext.communicate()[1]
|
||||||
|
if totext.returncode > 0:
|
||||||
# Error happened
|
# Error happened
|
||||||
tools.warning(extractfull[1])
|
tools.warning(err)
|
||||||
return False
|
return False
|
||||||
extractfull = extractfull[0]
|
|
||||||
extractISBN = isbn_re.search(extractfull.lower().replace('Œ', '-'))
|
|
||||||
cleanISBN = False
|
cleanISBN = False
|
||||||
# Clean ISBN is the ISBN number without separators
|
# Clean ISBN is the ISBN number without separators
|
||||||
if extractISBN:
|
if extractISBN:
|
||||||
@ -103,21 +110,26 @@ def findDOI(src):
|
|||||||
totext = subprocess.Popen(["djvutxt", src],
|
totext = subprocess.Popen(["djvutxt", src],
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE)
|
stderr=subprocess.PIPE)
|
||||||
extractfull = totext.communicate()
|
|
||||||
# TODO : ^ Return result before full conversion ?
|
while totext.poll() == None:
|
||||||
if extractfull[1] is not "":
|
extractfull = totext.stdin.readline()
|
||||||
# Error happened
|
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
|
||||||
tools.warning(extractfull[1])
|
|
||||||
return False
|
|
||||||
extractfull = extractfull[0]
|
|
||||||
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
|
|
||||||
if not extractDOI:
|
|
||||||
# PNAS fix
|
|
||||||
extractDOI = doi_pnas_re.search(extractfull.lower().replace('pnas',
|
|
||||||
'/pnas'))
|
|
||||||
if not extractDOI:
|
if not extractDOI:
|
||||||
# JSB fix
|
# PNAS fix
|
||||||
extractDOI = doi_jsb_re.search(extractfull.lower())
|
extractDOI = doi_pnas_re.search(extractfull.lower().replace('pnas',
|
||||||
|
'/pnas'))
|
||||||
|
if not extractDOI:
|
||||||
|
# JSB fix
|
||||||
|
extractDOI = doi_jsb_re.search(extractfull.lower())
|
||||||
|
if extractDOI:
|
||||||
|
totext.terminate()
|
||||||
|
break
|
||||||
|
|
||||||
|
err = totext.communicate()[1]
|
||||||
|
if totext.returncode > 0:
|
||||||
|
# Error happened
|
||||||
|
tools.warning(err)
|
||||||
|
return False
|
||||||
|
|
||||||
cleanDOI = False
|
cleanDOI = False
|
||||||
if extractDOI:
|
if extractDOI:
|
||||||
|
@ -52,7 +52,7 @@ def tearpage(filename):
|
|||||||
|
|
||||||
# Write pages excepted the first one
|
# Write pages excepted the first one
|
||||||
output_file = PdfFileWriter()
|
output_file = PdfFileWriter()
|
||||||
for i in range(0, num_pages):
|
for i in range(1, num_pages):
|
||||||
output_file.addPage(input_file.getPage(i))
|
output_file.addPage(input_file.getPage(i))
|
||||||
|
|
||||||
tmp.close()
|
tmp.close()
|
||||||
|
Loading…
Reference in New Issue
Block a user