Updated test files

According to https://github.com/Phyks/BMC/issues/7.

Also updated fetcher file to fix two bugs :
* Whitespaces in ISBN
* If PDF to text (or djvu to text) is not long enough, the end of the
file was not processed.
This commit is contained in:
Phyks 2014-06-29 23:02:44 +02:00
parent bc15f86057
commit b231b578cc
5 changed files with 16 additions and 8 deletions

View File

@ -67,7 +67,7 @@ def download(url):
return False
isbn_re = re.compile(r'isbn ((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])',
isbn_re = re.compile(r'isbn[\s]?:?[\s]?((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])',
re.IGNORECASE)
@ -89,7 +89,7 @@ def findISBN(src):
return False
while totext.poll() is None:
extractfull = totext.stdout.readline()
extractfull = ' '.join([i.strip() for i in totext.stdout.readlines()])
extractISBN = isbn_re.search(extractfull.lower().replace('Œ',
'-'))
if extractISBN:
@ -146,7 +146,7 @@ def findDOI(src):
extractfull = ''
while totext.poll() is None:
extractfull += "".join([i.strip() for i in totext.stdout.readlines()])
extractfull += ' '.join([i.strip() for i in totext.stdout.readlines()])
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
if not extractDOI:
# PNAS fix
@ -232,7 +232,7 @@ def findArXivId(src):
extractfull = ''
while totext.poll() is None:
extractfull += totext.stdout.readline().strip()
extractfull += ' '.join([i.strip() for i in totext.stdout.readlines()])
extractID = arXiv_re.search(extractfull)
if extractID:
totext.terminate()
@ -291,7 +291,7 @@ def findHALId(src):
return False
while totext.poll() is None:
extractfull = totext.stdout.readline()
extractfull = ' '.join([i.strip() for i in totext.stdout.readlines()])
extractID = HAL_re.search(extractfull)
if extractID:
totext.terminate()

7
tests/src/isbn.bib Normal file
View File

@ -0,0 +1,7 @@
@book{0198507194,
title = {Bose-Einstein Condensation},
author = {Lev. P. Pitaevskii and S. Stringari},
isbn = {0198507194},
year = {2004},
publisher = {Clarendon Press}
}

BIN
tests/src/test_book.djvu Normal file

Binary file not shown.

BIN
tests/src/test_book.pdf Normal file

Binary file not shown.

View File

@ -34,10 +34,11 @@ class TestFetcher(unittest.TestCase):
self.assertFalse(download('a'))
def test_findISBN_DJVU(self):
self.assertEqual(findISBN("tests/src/test_book.djvu"), '0198507194')
# ISBN is incomplete in this test because my djvu file is bad
self.assertEqual(findISBN("tests/src/test_book.djvu"), '978295391873')
def test_findISBN_PDF(self):
self.assertEqual(findISBN("tests/src/test_book.pdf"), '9780521846516')
self.assertEqual(findISBN("tests/src/test_book.pdf"), '9782953918731')
def test_findISBN_False(self):
self.assertFalse(findISBN("tests/src/test.pdf"))
@ -53,7 +54,7 @@ class TestFetcher(unittest.TestCase):
"10.1103/physrevlett.112.253201")
def test_findDOI_DJVU(self):
# DOI is incomplete in this text because my djvu file is bad
# DOI is incomplete in this test because my djvu file is bad
self.assertEqual(findDOI("tests/src/test.djvu"),
"10.1103/physrevlett.112")