flatisfy/flatisfy/tests.py

295 lines
8.2 KiB
Python
Raw Normal View History

2018-01-14 11:53:59 +01:00
# coding: utf-8
"""
This module contains unit testing functions.
"""
import copy
import json
import logging
import os
import random
import sys
import unittest
2018-01-14 11:53:59 +01:00
from flatisfy import tools
from flatisfy.filters import duplicates
from flatisfy.filters.cache import ImageCache
from flatisfy.constants import BACKENDS_BY_PRECEDENCE
LOGGER = logging.getLogger(__name__)
TESTS_DATA_DIR = os.path.dirname(os.path.realpath(__file__)) + "/test_files/"
2018-01-14 11:53:59 +01:00
class TestTexts(unittest.TestCase):
"""
Checks string normalizations.
"""
2018-01-14 11:53:59 +01:00
def test_roman_numbers(self):
"""
Checks roman numbers replacement.
"""
2018-01-18 13:15:09 +01:00
tester = tools.RomanNumbers()
self.assertTrue(tester.check_valid("XIV"))
self.assertTrue(not tester.check_valid("ABC"))
2018-01-14 11:53:59 +01:00
self.assertEqual(
"14",
2018-01-18 13:15:09 +01:00
tester.convert_to_arabic("XIV")
)
self.assertEqual(
"1987",
tester.convert_to_arabic("MCMLXXXVII")
)
self.assertEqual(
"Dans le 15e arrondissement",
tester.convert_to_arabic_in_text("Dans le XVe arrondissement")
)
self.assertEqual(
"20eme arr.",
tester.convert_to_arabic_in_text("XXeme arr.")
)
self.assertEqual(
"A AIX EN PROVENCE",
tester.convert_to_arabic_in_text("A AIX EN PROVENCE")
)
self.assertEqual(
"Montigny Le Bretonneux",
tester.convert_to_arabic_in_text("Montigny Le Bretonneux")
)
2018-01-18 13:15:09 +01:00
def test_roman_numbers_in_text(self):
"""
Checks conversion of roman numbers to arabic ones in string
normalization.
"""
2018-01-18 13:15:09 +01:00
self.assertEqual(
"dans le 15e arrondissement",
tools.normalize_string("Dans le XVe arrondissement")
2018-01-14 11:53:59 +01:00
)
def test_multiple_whitespaces(self):
"""
Checks whitespaces are collapsed.
"""
self.assertEqual(
2018-01-18 13:27:22 +01:00
"avec ascenseur",
tools.normalize_string("avec ascenseur")
2018-01-14 11:53:59 +01:00
)
def test_accents(self):
"""
Checks accents are replaced.
"""
self.assertEqual(
2018-01-18 13:27:22 +01:00
"eeeaui",
tools.normalize_string(u"éèêàüï")
2018-01-14 11:53:59 +01:00
)
2018-01-14 11:53:59 +01:00
class TestPhoneNumbers(unittest.TestCase):
"""
Checks phone numbers normalizations.
"""
2018-01-14 11:53:59 +01:00
def test_prefix(self):
"""
Checks phone numbers with international prefixes.
"""
self.assertEqual(
"0605040302",
duplicates.homogeneize_phone_number("+33605040302")
)
def test_dots_separators(self):
"""
Checks phone numbers with dots.
"""
self.assertEqual(
"0605040302",
duplicates.homogeneize_phone_number("06.05.04.03.02")
)
def test_spaces_separators(self):
"""
Checks phone numbers with spaces.
"""
self.assertEqual(
"0605040302",
duplicates.homogeneize_phone_number("06 05 04 03 02")
)
2018-01-14 11:53:59 +01:00
class TestDuplicates(unittest.TestCase):
"""
Checks duplicates detection.
"""
DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 14 # pylint: disable=invalid-name
DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15 # pylint: disable=invalid-name
IMAGE_CACHE = ImageCache() # pylint: disable=invalid-name
2018-01-14 11:53:59 +01:00
@staticmethod
def generate_fake_flat():
2018-01-14 11:53:59 +01:00
"""
Generates a fake flat post.
"""
backend = BACKENDS_BY_PRECEDENCE[
random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)
]
2018-01-14 11:53:59 +01:00
return {
"id": str(random.randint(100000, 199999)) + "@" + backend,
"phone": "0607080910",
"rooms": random.randint(1, 4),
"utilities": "",
"area": random.randint(200, 1500) / 10,
"cost": random.randint(100000, 300000),
"bedrooms": random.randint(1, 4)
}
@staticmethod
def load_files(file1, file2):
2018-01-14 11:53:59 +01:00
"""
Load two files
:return: A dict with two flats
"""
with open(TESTS_DATA_DIR + file1 + ".json", "r") as flat_file:
flat1 = json.loads(flat_file.read())
with open(TESTS_DATA_DIR + file2 + ".json", "r") as flat_file:
flat2 = json.loads(flat_file.read())
return [flat1, flat2]
def test_duplicates(self):
"""
Two identical flats should be detected as duplicates.
"""
flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1)
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
2018-01-14 11:53:59 +01:00
def test_different_prices(self):
"""
Two flats with different prices should not be detected as duplicates.
"""
flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1)
flat2["cost"] += 1000
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
2018-01-14 11:53:59 +01:00
def test_different_rooms(self):
"""
Two flats with different rooms quantity should not be detected as
duplicates.
"""
flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1)
flat2["rooms"] += 1
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
2018-01-14 11:53:59 +01:00
def test_different_areas(self):
"""
Two flats with different areas should not be detected as duplicates.
"""
flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1)
flat2["area"] += 10
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
2018-01-14 11:53:59 +01:00
def test_different_areas_decimals(self):
"""
Two flats which areas integers are equal but decimals are present and
different should not be detected as duplicates.
"""
flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1)
flat1["area"] = 50.65
flat2["area"] = 50.37
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
2018-01-14 11:53:59 +01:00
def test_different_phones(self):
"""
Two flats with different phone numbers should not be detected as
duplicates.
2018-01-14 11:53:59 +01:00
"""
flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1)
flat2["phone"] = "0708091011"
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
2018-01-14 11:53:59 +01:00
def test_real_duplicates(self):
"""
Two flats with same price, area and rooms quantity should be detected
as duplicates.
2018-01-14 11:53:59 +01:00
"""
flats = self.load_files(
"127028739@seloger",
"14428129@explorimmo"
)
score = duplicates.get_duplicate_score(
flats[0], flats[1], TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS
)
2018-01-14 11:53:59 +01:00
def run():
2018-01-14 11:53:59 +01:00
"""
Run all the tests
"""
LOGGER.info("Running tests…")
try:
suite = unittest.TestLoader().loadTestsFromTestCase(TestTexts)
result = unittest.TextTestRunner(verbosity=2).run(suite)
assert result.wasSuccessful()
suite = unittest.TestLoader().loadTestsFromTestCase(TestPhoneNumbers)
result = unittest.TextTestRunner(verbosity=2).run(suite)
assert result.wasSuccessful()
suite = unittest.TestLoader().loadTestsFromTestCase(TestDuplicates)
result = unittest.TextTestRunner(verbosity=2).run(suite)
assert result.wasSuccessful()
except AssertionError:
sys.exit(1)