Check output before processing the whole file for ISBN and DOI search

2014-04-30 00:36:15 +02:00 · 2014-04-30 00:36:15 +02:00 · 3d07af0e71
commit 3d07af0e71
parent 91685bc46b
3 changed files with 35 additions and 25 deletions
--- a/README.md
+++ b/README.md
@ -108,8 +108,6 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
 10. Refactor
    11. Use bibtex-parser lib to write bibtex, instead of parsed2BibTex
    12. Rebuild function
    15. Check output of subprocesses before it ends
    16. TODO in files
 20. No DOI for arXiv / HAL
 30. Parameter to disable remote search
 40. Open file
--- a/fetcher.py
+++ b/fetcher.py
@ -50,19 +50,26 @@ def findISBN(src):
    if src.endswith(".pdf"):
        totext = subprocess.Popen(["pdftotext", src, "-"],
                                  stdout=subprocess.PIPE,
-                                  stderr=subprocess.PIPE)
+                                  stderr=subprocess.PIPE,
                                  bufsize=1)
    elif src.endswith(".djvu"):
        totext = subprocess.Popen(["djvutxt", src],
                                  stdout=subprocess.PIPE,
-                                  stderr=subprocess.PIPE)
+                                  stderr=subprocess.PIPE,
-    extractfull = totext.communicate()
+                                  bufsize=1)
-    # TODO : ^ Return result before processing the whole book ?
+    while totext.poll() == None:
-    if extractfull[1] is not "":
+        extractfull = totext.stdin.readline()
        # Error happened
        tools.warning(extractfull[1])
        return False
    extractfull = extractfull[0]
        extractISBN = isbn_re.search(extractfull.lower().replace('&#338;', '-'))
        if extractISBN:
            totext.terminate()
            break
    err = totext.communicate()[1]
    if totext.returncode > 0:
        # Error happened
        tools.warning(err)
        return False
    cleanISBN = False
    # Clean ISBN is the ISBN number without separators
    if extractISBN:
@ -103,13 +110,9 @@ def findDOI(src):
        totext = subprocess.Popen(["djvutxt", src],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
-    extractfull = totext.communicate()
+
-    # TODO : ^ Return result before full conversion ?
+    while totext.poll() == None:
-    if extractfull[1] is not "":
+        extractfull = totext.stdin.readline()
        # Error happened
        tools.warning(extractfull[1])
        return False
    extractfull = extractfull[0]
        extractDOI = doi_re.search(extractfull.lower().replace('&#338;', '-'))
        if not extractDOI:
            # PNAS fix
@ -118,6 +121,15 @@ def findDOI(src):
            if not extractDOI:
                # JSB fix
                extractDOI = doi_jsb_re.search(extractfull.lower())
        if extractDOI:
            totext.terminate()
            break
    err = totext.communicate()[1]
    if totext.returncode > 0:
        # Error happened
        tools.warning(err)
        return False
    cleanDOI = False
    if extractDOI:
--- a/tearpages.py
+++ b/tearpages.py
@ -52,7 +52,7 @@ def tearpage(filename):
    # Write pages excepted the first one
    output_file = PdfFileWriter()
-    for i in range(0, num_pages):
+    for i in range(1, num_pages):
        output_file.addPage(input_file.getPage(i))
    tmp.close()