Updated test files

According to https://github.com/Phyks/BMC/issues/7. Also updated fetcher file to fix two bugs : * Whitespaces in ISBN * If PDF to text (or djvu to text) is not long enough, the end of the file was not processed.
2014-06-29 23:02:44 +02:00 · 2014-06-29 23:02:44 +02:00 · b231b578cc
commit b231b578cc
parent bc15f86057
5 changed files with 16 additions and 8 deletions
--- a/fetcher.py
+++ b/fetcher.py
@ -67,7 +67,7 @@ def download(url):
    return False
-isbn_re = re.compile(r'isbn ((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])',
+isbn_re = re.compile(r'isbn[\s]?:?[\s]?((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])',
                     re.IGNORECASE)
@ -89,7 +89,7 @@ def findISBN(src):
        return False
    while totext.poll() is None:
-        extractfull = totext.stdout.readline()
+        extractfull = ' '.join([i.strip() for i in totext.stdout.readlines()])
        extractISBN = isbn_re.search(extractfull.lower().replace('&#338;',
                                                                 '-'))
        if extractISBN:
@ -146,7 +146,7 @@ def findDOI(src):
    extractfull = ''
    while totext.poll() is None:
-        extractfull += "".join([i.strip() for i in totext.stdout.readlines()])
+        extractfull += ' '.join([i.strip() for i in totext.stdout.readlines()])
        extractDOI = doi_re.search(extractfull.lower().replace('&#338;', '-'))
        if not extractDOI:
            # PNAS fix
@ -232,7 +232,7 @@ def findArXivId(src):
    extractfull = ''
    while totext.poll() is None:
-        extractfull += totext.stdout.readline().strip()
+        extractfull += ' '.join([i.strip() for i in totext.stdout.readlines()])
        extractID = arXiv_re.search(extractfull)
        if extractID:
            totext.terminate()
@ -291,7 +291,7 @@ def findHALId(src):
        return False
    while totext.poll() is None:
-        extractfull = totext.stdout.readline()
+        extractfull = ' '.join([i.strip() for i in totext.stdout.readlines()])
        extractID = HAL_re.search(extractfull)
        if extractID:
            totext.terminate()
--- a/tests/src/isbn.bib
+++ b/tests/src/isbn.bib
@ -0,0 +1,7 @@
@book{0198507194,
     title = {Bose-Einstein Condensation},
    author = {Lev. P. Pitaevskii and S. Stringari},
      isbn = {0198507194},
      year = {2004},
 publisher = {Clarendon Press}
 }
--- a/tests/src/test_book.djvu
+++ b/tests/src/test_book.djvu
--- a/tests/src/test_book.pdf
+++ b/tests/src/test_book.pdf
--- a/tests/test_fetcher.py
+++ b/tests/test_fetcher.py
@ -34,10 +34,11 @@ class TestFetcher(unittest.TestCase):
        self.assertFalse(download('a'))
    def test_findISBN_DJVU(self):
-        self.assertEqual(findISBN("tests/src/test_book.djvu"), '0198507194')
+        # ISBN is incomplete in this test because my djvu file is bad
        self.assertEqual(findISBN("tests/src/test_book.djvu"), '978295391873')
    def test_findISBN_PDF(self):
-        self.assertEqual(findISBN("tests/src/test_book.pdf"), '9780521846516')
+        self.assertEqual(findISBN("tests/src/test_book.pdf"), '9782953918731')
    def test_findISBN_False(self):
        self.assertFalse(findISBN("tests/src/test.pdf"))
@ -53,7 +54,7 @@ class TestFetcher(unittest.TestCase):
                         "10.1103/physrevlett.112.253201")
    def test_findDOI_DJVU(self):
-        # DOI is incomplete in this text because my djvu file is bad
+        # DOI is incomplete in this test because my djvu file is bad
        self.assertEqual(findDOI("tests/src/test.djvu"),
                         "10.1103/physrevlett.112")