diff --git a/fetcher.py b/fetcher.py index 18fa442..67b9704 100644 --- a/fetcher.py +++ b/fetcher.py @@ -67,7 +67,7 @@ def download(url): return False -isbn_re = re.compile(r'isbn ((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])', +isbn_re = re.compile(r'isbn[\s]?:?[\s]?((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])', re.IGNORECASE) @@ -89,7 +89,7 @@ def findISBN(src): return False while totext.poll() is None: - extractfull = totext.stdout.readline() + extractfull = ' '.join([i.strip() for i in totext.stdout.readlines()]) extractISBN = isbn_re.search(extractfull.lower().replace('Œ', '-')) if extractISBN: @@ -146,7 +146,7 @@ def findDOI(src): extractfull = '' while totext.poll() is None: - extractfull += "".join([i.strip() for i in totext.stdout.readlines()]) + extractfull += ' '.join([i.strip() for i in totext.stdout.readlines()]) extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-')) if not extractDOI: # PNAS fix @@ -232,7 +232,7 @@ def findArXivId(src): extractfull = '' while totext.poll() is None: - extractfull += totext.stdout.readline().strip() + extractfull += ' '.join([i.strip() for i in totext.stdout.readlines()]) extractID = arXiv_re.search(extractfull) if extractID: totext.terminate() @@ -291,7 +291,7 @@ def findHALId(src): return False while totext.poll() is None: - extractfull = totext.stdout.readline() + extractfull = ' '.join([i.strip() for i in totext.stdout.readlines()]) extractID = HAL_re.search(extractfull) if extractID: totext.terminate() diff --git a/tests/src/isbn.bib b/tests/src/isbn.bib new file mode 100644 index 0000000..6e8c62b --- /dev/null +++ b/tests/src/isbn.bib @@ -0,0 +1,7 @@ +@book{0198507194, + title = {Bose-Einstein Condensation}, + author = {Lev. P. Pitaevskii and S. Stringari}, + isbn = {0198507194}, + year = {2004}, + publisher = {Clarendon Press} +} \ No newline at end of file diff --git a/tests/src/test_book.djvu b/tests/src/test_book.djvu new file mode 100644 index 0000000..94b9465 Binary files /dev/null and b/tests/src/test_book.djvu differ diff --git a/tests/src/test_book.pdf b/tests/src/test_book.pdf new file mode 100644 index 0000000..b723280 Binary files /dev/null and b/tests/src/test_book.pdf differ diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py index 972007a..39d543e 100644 --- a/tests/test_fetcher.py +++ b/tests/test_fetcher.py @@ -34,10 +34,11 @@ class TestFetcher(unittest.TestCase): self.assertFalse(download('a')) def test_findISBN_DJVU(self): - self.assertEqual(findISBN("tests/src/test_book.djvu"), '0198507194') + # ISBN is incomplete in this test because my djvu file is bad + self.assertEqual(findISBN("tests/src/test_book.djvu"), '978295391873') def test_findISBN_PDF(self): - self.assertEqual(findISBN("tests/src/test_book.pdf"), '9780521846516') + self.assertEqual(findISBN("tests/src/test_book.pdf"), '9782953918731') def test_findISBN_False(self): self.assertFalse(findISBN("tests/src/test.pdf")) @@ -53,7 +54,7 @@ class TestFetcher(unittest.TestCase): "10.1103/physrevlett.112.253201") def test_findDOI_DJVU(self): - # DOI is incomplete in this text because my djvu file is bad + # DOI is incomplete in this test because my djvu file is bad self.assertEqual(findDOI("tests/src/test.djvu"), "10.1103/physrevlett.112")