Updated test files
According to https://github.com/Phyks/BMC/issues/7. Also updated fetcher file to fix two bugs : * Whitespaces in ISBN * If PDF to text (or djvu to text) is not long enough, the end of the file was not processed.
This commit is contained in:
parent
bc15f86057
commit
b231b578cc
10
fetcher.py
10
fetcher.py
@ -67,7 +67,7 @@ def download(url):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
isbn_re = re.compile(r'isbn ((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])',
|
isbn_re = re.compile(r'isbn[\s]?:?[\s]?((?:[0-9]{3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[- ]?[0-9])',
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
@ -89,7 +89,7 @@ def findISBN(src):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
while totext.poll() is None:
|
while totext.poll() is None:
|
||||||
extractfull = totext.stdout.readline()
|
extractfull = ' '.join([i.strip() for i in totext.stdout.readlines()])
|
||||||
extractISBN = isbn_re.search(extractfull.lower().replace('Œ',
|
extractISBN = isbn_re.search(extractfull.lower().replace('Œ',
|
||||||
'-'))
|
'-'))
|
||||||
if extractISBN:
|
if extractISBN:
|
||||||
@ -146,7 +146,7 @@ def findDOI(src):
|
|||||||
|
|
||||||
extractfull = ''
|
extractfull = ''
|
||||||
while totext.poll() is None:
|
while totext.poll() is None:
|
||||||
extractfull += "".join([i.strip() for i in totext.stdout.readlines()])
|
extractfull += ' '.join([i.strip() for i in totext.stdout.readlines()])
|
||||||
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
|
extractDOI = doi_re.search(extractfull.lower().replace('Œ', '-'))
|
||||||
if not extractDOI:
|
if not extractDOI:
|
||||||
# PNAS fix
|
# PNAS fix
|
||||||
@ -232,7 +232,7 @@ def findArXivId(src):
|
|||||||
|
|
||||||
extractfull = ''
|
extractfull = ''
|
||||||
while totext.poll() is None:
|
while totext.poll() is None:
|
||||||
extractfull += totext.stdout.readline().strip()
|
extractfull += ' '.join([i.strip() for i in totext.stdout.readlines()])
|
||||||
extractID = arXiv_re.search(extractfull)
|
extractID = arXiv_re.search(extractfull)
|
||||||
if extractID:
|
if extractID:
|
||||||
totext.terminate()
|
totext.terminate()
|
||||||
@ -291,7 +291,7 @@ def findHALId(src):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
while totext.poll() is None:
|
while totext.poll() is None:
|
||||||
extractfull = totext.stdout.readline()
|
extractfull = ' '.join([i.strip() for i in totext.stdout.readlines()])
|
||||||
extractID = HAL_re.search(extractfull)
|
extractID = HAL_re.search(extractfull)
|
||||||
if extractID:
|
if extractID:
|
||||||
totext.terminate()
|
totext.terminate()
|
||||||
|
7
tests/src/isbn.bib
Normal file
7
tests/src/isbn.bib
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
@book{0198507194,
|
||||||
|
title = {Bose-Einstein Condensation},
|
||||||
|
author = {Lev. P. Pitaevskii and S. Stringari},
|
||||||
|
isbn = {0198507194},
|
||||||
|
year = {2004},
|
||||||
|
publisher = {Clarendon Press}
|
||||||
|
}
|
BIN
tests/src/test_book.djvu
Normal file
BIN
tests/src/test_book.djvu
Normal file
Binary file not shown.
BIN
tests/src/test_book.pdf
Normal file
BIN
tests/src/test_book.pdf
Normal file
Binary file not shown.
@ -34,10 +34,11 @@ class TestFetcher(unittest.TestCase):
|
|||||||
self.assertFalse(download('a'))
|
self.assertFalse(download('a'))
|
||||||
|
|
||||||
def test_findISBN_DJVU(self):
|
def test_findISBN_DJVU(self):
|
||||||
self.assertEqual(findISBN("tests/src/test_book.djvu"), '0198507194')
|
# ISBN is incomplete in this test because my djvu file is bad
|
||||||
|
self.assertEqual(findISBN("tests/src/test_book.djvu"), '978295391873')
|
||||||
|
|
||||||
def test_findISBN_PDF(self):
|
def test_findISBN_PDF(self):
|
||||||
self.assertEqual(findISBN("tests/src/test_book.pdf"), '9780521846516')
|
self.assertEqual(findISBN("tests/src/test_book.pdf"), '9782953918731')
|
||||||
|
|
||||||
def test_findISBN_False(self):
|
def test_findISBN_False(self):
|
||||||
self.assertFalse(findISBN("tests/src/test.pdf"))
|
self.assertFalse(findISBN("tests/src/test.pdf"))
|
||||||
@ -53,7 +54,7 @@ class TestFetcher(unittest.TestCase):
|
|||||||
"10.1103/physrevlett.112.253201")
|
"10.1103/physrevlett.112.253201")
|
||||||
|
|
||||||
def test_findDOI_DJVU(self):
|
def test_findDOI_DJVU(self):
|
||||||
# DOI is incomplete in this text because my djvu file is bad
|
# DOI is incomplete in this test because my djvu file is bad
|
||||||
self.assertEqual(findDOI("tests/src/test.djvu"),
|
self.assertEqual(findDOI("tests/src/test.djvu"),
|
||||||
"10.1103/physrevlett.112")
|
"10.1103/physrevlett.112")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user