Merge branch 'fix-roman-numbers' into 'master'
Fix roman numbers See merge request phyks/Flatisfy!18
This commit is contained in:
commit
6bcfb62e8d
@ -22,9 +22,39 @@ class TestTexts(unittest.TestCase):
|
|||||||
"""
|
"""
|
||||||
Checks roman numbers replacement.
|
Checks roman numbers replacement.
|
||||||
"""
|
"""
|
||||||
|
tester = tools.RomanNumbers()
|
||||||
|
self.assertTrue(tester.check_valid("XIV"))
|
||||||
|
self.assertTrue(not tester.check_valid("ABC"))
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
"14",
|
"14",
|
||||||
tools.normalize_string("XIV")
|
tester.convert_to_arabic("XIV")
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
"1987",
|
||||||
|
tester.convert_to_arabic("MCMLXXXVII")
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
"Dans le 15e arrondissement",
|
||||||
|
tester.convert_to_arabic_in_text("Dans le XVe arrondissement")
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
"20eme arr.",
|
||||||
|
tester.convert_to_arabic_in_text("XXeme arr.")
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
"A AIX EN PROVENCE",
|
||||||
|
tester.convert_to_arabic_in_text("A AIX EN PROVENCE")
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_roman_numbers_in_text(self):
|
||||||
|
self.assertEqual(
|
||||||
|
"dans le 15e arrondissement",
|
||||||
|
tools.normalize_string("Dans le XVe arrondissement")
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_multiple_whitespaces(self):
|
def test_multiple_whitespaces(self):
|
||||||
@ -32,8 +62,8 @@ class TestTexts(unittest.TestCase):
|
|||||||
Checks whitespaces are collapsed.
|
Checks whitespaces are collapsed.
|
||||||
"""
|
"""
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
"avec ascenseur",
|
"avec ascenseur",
|
||||||
tools.normalize_string("avec ascenseur")
|
tools.normalize_string("avec ascenseur")
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_accents(self):
|
def test_accents(self):
|
||||||
@ -41,8 +71,8 @@ class TestTexts(unittest.TestCase):
|
|||||||
Checks accents are replaced.
|
Checks accents are replaced.
|
||||||
"""
|
"""
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
"éèêàüï",
|
"eeeaui",
|
||||||
tools.normalize_string("eeeaui")
|
tools.normalize_string(u"éèêàüï")
|
||||||
)
|
)
|
||||||
|
|
||||||
class TestPhoneNumbers(unittest.TestCase):
|
class TestPhoneNumbers(unittest.TestCase):
|
||||||
|
@ -24,6 +24,42 @@ LOGGER = logging.getLogger(__name__)
|
|||||||
# Constants
|
# Constants
|
||||||
NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys"
|
NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys"
|
||||||
|
|
||||||
|
class RomanNumbers():
|
||||||
|
"""
|
||||||
|
Utilities to check and convert roman numbers.
|
||||||
|
Part of the convertions are based on
|
||||||
|
https://gist.github.com/riverrun/ac91218bb1678b857c12
|
||||||
|
"""
|
||||||
|
|
||||||
|
def check_valid(self, roman):
|
||||||
|
if not re.match('^[MDCLXVI]+$', roman):
|
||||||
|
return False
|
||||||
|
|
||||||
|
invalid = ['IIII', 'VV', 'XXXX', 'LL', 'CCCC', 'DD', 'MMMM']
|
||||||
|
if any(sub in roman for sub in invalid):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# TODO: check M does not appear after any other, etc.
|
||||||
|
return True
|
||||||
|
|
||||||
|
def convert_to_arabic(self, roman):
|
||||||
|
if not self.check_valid(roman):
|
||||||
|
return roman
|
||||||
|
|
||||||
|
keys = ['IV', 'IX', 'XL', 'XC', 'CD', 'CM', 'I', 'V', 'X', 'L', 'C', 'D', 'M']
|
||||||
|
to_arabic = {'IV': '4', 'IX': '9', 'XL': '40', 'XC': '90', 'CD': '400', 'CM': '900',
|
||||||
|
'I': '1', 'V': '5', 'X': '10', 'L': '50', 'C': '100', 'D': '500', 'M': '1000'}
|
||||||
|
for key in keys:
|
||||||
|
if key in roman:
|
||||||
|
roman = roman.replace(key, ' {}'.format(to_arabic.get(key)))
|
||||||
|
return str(sum(int(num) for num in roman.split()))
|
||||||
|
|
||||||
|
def convert_to_arabic_in_text(self, text):
|
||||||
|
return re.sub(
|
||||||
|
'(?<![\S])+([MDCLXVI]+)(?=[eè\s$])',
|
||||||
|
lambda matchobj: self.convert_to_arabic(matchobj.group(0)),
|
||||||
|
text
|
||||||
|
)
|
||||||
|
|
||||||
def hash_dict(func):
|
def hash_dict(func):
|
||||||
"""
|
"""
|
||||||
@ -162,6 +198,10 @@ def normalize_string(string):
|
|||||||
# Keep some basic punctuation to keep syntaxic units
|
# Keep some basic punctuation to keep syntaxic units
|
||||||
string = re.sub(r"[^a-zA-Z0-9,;:]", " ", string)
|
string = re.sub(r"[^a-zA-Z0-9,;:]", " ", string)
|
||||||
|
|
||||||
|
# Convert roman numbers to arabic numbers
|
||||||
|
converter = RomanNumbers()
|
||||||
|
string = converter.convert_to_arabic_in_text(string)
|
||||||
|
|
||||||
# Convert to lowercase
|
# Convert to lowercase
|
||||||
string = string.lower()
|
string = string.lower()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user