From 3d07af0e7195312b1be49e397f1692210ec16f2c Mon Sep 17 00:00:00 2001
From: Phyks <webmaster@phyks.me>
Date: Wed, 30 Apr 2014 00:36:15 +0200
Subject: [PATCH] Check output before processing the whole file for ISBN and
 DOI search

---
 README.md    |  2 --
 fetcher.py   | 56 +++++++++++++++++++++++++++++++---------------------
 tearpages.py |  2 +-
 3 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index 4f2053d..aa90dc9 100644
--- a/README.md
+++ b/README.md
@@ -108,8 +108,6 @@ A list of ideas and TODO. Don't hesitate to give feedback on the ones you really
 10. Refactor
     11. Use bibtex-parser lib to write bibtex, instead of parsed2BibTex
     12. Rebuild function
-    15. Check output of subprocesses before it ends
-    16. TODO in files
 20. No DOI for arXiv / HAL
 30. Parameter to disable remote search
 40. Open file
diff --git a/fetcher.py b/fetcher.py
index 297a6c7..3e77bef 100644
--- a/fetcher.py
+++ b/fetcher.py
@@ -50,19 +50,26 @@ def findISBN(src):
     if src.endswith(".pdf"):
         totext = subprocess.Popen(["pdftotext", src, "-"],
                                   stdout=subprocess.PIPE,
-                                  stderr=subprocess.PIPE)
+                                  stderr=subprocess.PIPE,
+                                  bufsize=1)
     elif src.endswith(".djvu"):
         totext = subprocess.Popen(["djvutxt", src],
                                   stdout=subprocess.PIPE,
-                                  stderr=subprocess.PIPE)
-    extractfull = totext.communicate()
-    # TODO : ^ Return result before processing the whole book ?
-    if extractfull[1] is not "":
+                                  stderr=subprocess.PIPE,
+                                  bufsize=1)
+    while totext.poll() == None:
+        extractfull = totext.stdin.readline()
+        extractISBN = isbn_re.search(extractfull.lower().replace('&#338;', '-'))
+        if extractISBN:
+            totext.terminate()
+            break
+
+    err = totext.communicate()[1]
+    if totext.returncode > 0:
         # Error happened
-        tools.warning(extractfull[1])
+        tools.warning(err)
         return False
-    extractfull = extractfull[0]
-    extractISBN = isbn_re.search(extractfull.lower().replace('&#338;', '-'))
+
     cleanISBN = False
     # Clean ISBN is the ISBN number without separators
     if extractISBN:
@@ -103,21 +110,26 @@ def findDOI(src):
         totext = subprocess.Popen(["djvutxt", src],
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
-    extractfull = totext.communicate()
-    # TODO : ^ Return result before full conversion ?
-    if extractfull[1] is not "":
-        # Error happened
-        tools.warning(extractfull[1])
-        return False
-    extractfull = extractfull[0]
-    extractDOI = doi_re.search(extractfull.lower().replace('&#338;', '-'))
-    if not extractDOI:
-        # PNAS fix
-        extractDOI = doi_pnas_re.search(extractfull.lower().replace('pnas',
-                                                                    '/pnas'))
+
+    while totext.poll() == None:
+        extractfull = totext.stdin.readline()
+        extractDOI = doi_re.search(extractfull.lower().replace('&#338;', '-'))
         if not extractDOI:
-            # JSB fix
-            extractDOI = doi_jsb_re.search(extractfull.lower())
+            # PNAS fix
+            extractDOI = doi_pnas_re.search(extractfull.lower().replace('pnas',
+                                                                        '/pnas'))
+            if not extractDOI:
+                # JSB fix
+                extractDOI = doi_jsb_re.search(extractfull.lower())
+        if extractDOI:
+            totext.terminate()
+            break
+
+    err = totext.communicate()[1]
+    if totext.returncode > 0:
+        # Error happened
+        tools.warning(err)
+        return False
 
     cleanDOI = False
     if extractDOI:
diff --git a/tearpages.py b/tearpages.py
index 37b75c1..6f442d7 100644
--- a/tearpages.py
+++ b/tearpages.py
@@ -52,7 +52,7 @@ def tearpage(filename):
 
     # Write pages excepted the first one
     output_file = PdfFileWriter()
-    for i in range(0, num_pages):
+    for i in range(1, num_pages):
         output_file.addPage(input_file.getPage(i))
 
     tmp.close()