Trim whitespaces in string normalization

This commit is contained in:
nicofrand 2018-01-22 12:50:37 +01:00
parent 88087c8628
commit bb78725a05
2 changed files with 12 additions and 0 deletions

View File

@ -114,6 +114,15 @@ class TestTexts(unittest.TestCase):
tools.normalize_string("avec ascenseur") tools.normalize_string("avec ascenseur")
) )
def test_whitespace_trim(self):
"""
Checks that trailing and beginning whitespaces are trimmed.
"""
self.assertEqual(
"rennes 35000",
tools.normalize_string(" Rennes 35000 ")
)
def test_accents(self): def test_accents(self):
""" """
Checks accents are replaced. Checks accents are replaced.

View File

@ -225,6 +225,9 @@ def normalize_string(string, lowercase=True, convert_arabic_numerals=True):
# Collapse multiple spaces, replace tabulations and newlines by space # Collapse multiple spaces, replace tabulations and newlines by space
string = re.sub(r"\s+", " ", string) string = re.sub(r"\s+", " ", string)
# Trim whitespaces
string = string.strip()
return string return string