Updated test files
According to https://github.com/Phyks/BMC/issues/7. Also updated fetcher file to fix two bugs : * Whitespaces in ISBN * If PDF to text (or djvu to text) is not long enough, the end of the file was not processed.
This commit is contained in:
parent
bc15f86057
commit
b231b578cc
10
fetcher.py
10
fetcher.py
@ -67,7 +67,7 @@ def download(url):
|
||||
return False
|
||||
|
||||
|
||||
isbn_re = re.compile(r'isbn ((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])',
|
||||
isbn_re = re.compile(r'isbn[\s]?:?[\s]?((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])',
|
||||
re.IGNORECASE)
|
||||
|
||||
|
||||
@ -89,7 +89,7 @@ def findISBN(src):
|
||||
return False
|
||||
|
||||
while totext.poll() is None:
|
||||
extractfull = totext.stdout.readline()
|
||||
extractfull = ' '.join([i.strip() for i in totext.stdout.readlines()])
|
||||
extractISBN = isbn_re.search(extractfull.lower().replace('Œ',
|
||||
'-'))
|
||||
if extractISBN:
|
||||
@ -146,7 +146,7 @@ def findDOI(src):
|
||||
|
||||
extractfull = ''
|
||||
while totext.poll() is None:
|
||||
extractfull += "".join([i.strip() for i in totext.stdout.readlines()])
|
||||
extractfull += ' '.join([i.strip() for i in totext.stdout.readlines()])
|
||||
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
|
||||
if not extractDOI:
|
||||
# PNAS fix
|
||||
@ -232,7 +232,7 @@ def findArXivId(src):
|
||||
|
||||
extractfull = ''
|
||||
while totext.poll() is None:
|
||||
extractfull += totext.stdout.readline().strip()
|
||||
extractfull += ' '.join([i.strip() for i in totext.stdout.readlines()])
|
||||
extractID = arXiv_re.search(extractfull)
|
||||
if extractID:
|
||||
totext.terminate()
|
||||
@ -291,7 +291,7 @@ def findHALId(src):
|
||||
return False
|
||||
|
||||
while totext.poll() is None:
|
||||
extractfull = totext.stdout.readline()
|
||||
extractfull = ' '.join([i.strip() for i in totext.stdout.readlines()])
|
||||
extractID = HAL_re.search(extractfull)
|
||||
if extractID:
|
||||
totext.terminate()
|
||||
|
7
tests/src/isbn.bib
Normal file
7
tests/src/isbn.bib
Normal file
@ -0,0 +1,7 @@
|
||||
@book{0198507194,
|
||||
title = {Bose-Einstein Condensation},
|
||||
author = {Lev. P. Pitaevskii and S. Stringari},
|
||||
isbn = {0198507194},
|
||||
year = {2004},
|
||||
publisher = {Clarendon Press}
|
||||
}
|
BIN
tests/src/test_book.djvu
Normal file
BIN
tests/src/test_book.djvu
Normal file
Binary file not shown.
BIN
tests/src/test_book.pdf
Normal file
BIN
tests/src/test_book.pdf
Normal file
Binary file not shown.
@ -34,10 +34,11 @@ class TestFetcher(unittest.TestCase):
|
||||
self.assertFalse(download('a'))
|
||||
|
||||
def test_findISBN_DJVU(self):
|
||||
self.assertEqual(findISBN("tests/src/test_book.djvu"), '0198507194')
|
||||
# ISBN is incomplete in this test because my djvu file is bad
|
||||
self.assertEqual(findISBN("tests/src/test_book.djvu"), '978295391873')
|
||||
|
||||
def test_findISBN_PDF(self):
|
||||
self.assertEqual(findISBN("tests/src/test_book.pdf"), '9780521846516')
|
||||
self.assertEqual(findISBN("tests/src/test_book.pdf"), '9782953918731')
|
||||
|
||||
def test_findISBN_False(self):
|
||||
self.assertFalse(findISBN("tests/src/test.pdf"))
|
||||
@ -53,7 +54,7 @@ class TestFetcher(unittest.TestCase):
|
||||
"10.1103/physrevlett.112.253201")
|
||||
|
||||
def test_findDOI_DJVU(self):
|
||||
# DOI is incomplete in this text because my djvu file is bad
|
||||
# DOI is incomplete in this test because my djvu file is bad
|
||||
self.assertEqual(findDOI("tests/src/test.djvu"),
|
||||
"10.1103/physrevlett.112")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user