235 lines
7.2 KiB
Python
235 lines
7.2 KiB
Python
# coding: utf-8
|
|
"""
|
|
This module contains unit testing functions.
|
|
"""
|
|
|
|
import random
|
|
import logging
|
|
import unittest
|
|
import copy
|
|
import os
|
|
import json
|
|
from flatisfy import tools
|
|
from flatisfy.filters import duplicates
|
|
from flatisfy.filters.cache import ImageCache
|
|
from flatisfy.constants import BACKENDS_BY_PRECEDENCE
|
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
TESTS_DATA_DIR = os.path.dirname(os.path.realpath(__file__)) + "/test_files/"
|
|
|
|
class TestTexts(unittest.TestCase):
|
|
def test_roman_numbers(self):
|
|
"""
|
|
Checks roman numbers replacement.
|
|
"""
|
|
tester = tools.RomanNumbers()
|
|
self.assertTrue(tester.check_valid("XIV"))
|
|
self.assertTrue(not tester.check_valid("ABC"))
|
|
|
|
self.assertEqual(
|
|
"14",
|
|
tester.convert_to_arabic("XIV")
|
|
)
|
|
|
|
self.assertEqual(
|
|
"1987",
|
|
tester.convert_to_arabic("MCMLXXXVII")
|
|
)
|
|
|
|
self.assertEqual(
|
|
"Dans le 15e arrondissement",
|
|
tester.convert_to_arabic_in_text("Dans le XVe arrondissement")
|
|
)
|
|
|
|
self.assertEqual(
|
|
"20eme arr.",
|
|
tester.convert_to_arabic_in_text("XXeme arr.")
|
|
)
|
|
|
|
self.assertEqual(
|
|
"A AIX EN PROVENCE",
|
|
tester.convert_to_arabic_in_text("A AIX EN PROVENCE")
|
|
)
|
|
|
|
def test_roman_numbers_in_text(self):
|
|
self.assertEqual(
|
|
"dans le 15e arrondissement",
|
|
tools.normalize_string("Dans le XVe arrondissement")
|
|
)
|
|
|
|
def test_multiple_whitespaces(self):
|
|
"""
|
|
Checks whitespaces are collapsed.
|
|
"""
|
|
self.assertEqual(
|
|
"avec ascenseur",
|
|
tools.normalize_string("avec ascenseur")
|
|
)
|
|
|
|
def test_accents(self):
|
|
"""
|
|
Checks accents are replaced.
|
|
"""
|
|
self.assertEqual(
|
|
"éèêàüï",
|
|
tools.normalize_string("eeeaui")
|
|
)
|
|
|
|
class TestPhoneNumbers(unittest.TestCase):
|
|
def test_prefix(self):
|
|
"""
|
|
Checks phone numbers with international prefixes.
|
|
"""
|
|
self.assertEqual(
|
|
"0605040302",
|
|
duplicates.homogeneize_phone_number("+33605040302")
|
|
)
|
|
|
|
def test_dots_separators(self):
|
|
"""
|
|
Checks phone numbers with dots.
|
|
"""
|
|
self.assertEqual(
|
|
"0605040302",
|
|
duplicates.homogeneize_phone_number("06.05.04.03.02")
|
|
)
|
|
|
|
def test_spaces_separators(self):
|
|
"""
|
|
Checks phone numbers with spaces.
|
|
"""
|
|
self.assertEqual(
|
|
"0605040302",
|
|
duplicates.homogeneize_phone_number("06 05 04 03 02")
|
|
)
|
|
|
|
class TestDuplicates(unittest.TestCase):
|
|
DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 14
|
|
DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15
|
|
IMAGE_CACHE = ImageCache()
|
|
|
|
def generate_fake_flat(self):
|
|
"""
|
|
Generates a fake flat post.
|
|
"""
|
|
backend = BACKENDS_BY_PRECEDENCE[random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)]
|
|
return {
|
|
"id": str(random.randint(100000, 199999)) + "@" + backend,
|
|
"phone": "0607080910",
|
|
"rooms": random.randint(1, 4),
|
|
"utilities": "",
|
|
"area": random.randint(200, 1500) / 10,
|
|
"cost": random.randint(100000, 300000),
|
|
"bedrooms": random.randint(1, 4)
|
|
}
|
|
|
|
def load_files(self, file1, file2):
|
|
"""
|
|
Load two files
|
|
|
|
:return: A dict with two flats
|
|
"""
|
|
with open(TESTS_DATA_DIR + file1 + ".json", "r") as flat_file:
|
|
flat1 = json.loads(flat_file.read())
|
|
|
|
with open(TESTS_DATA_DIR + file2 + ".json", "r") as flat_file:
|
|
flat2 = json.loads(flat_file.read())
|
|
|
|
return [flat1, flat2]
|
|
|
|
def test_duplicates(self):
|
|
"""
|
|
Two identical flats should be detected as duplicates.
|
|
"""
|
|
flat1 = self.generate_fake_flat()
|
|
flat2 = copy.deepcopy(flat1)
|
|
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
|
self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
|
|
|
def test_different_prices(self):
|
|
"""
|
|
Two flats with different prices should not be detected as duplicates.
|
|
"""
|
|
flat1 = self.generate_fake_flat()
|
|
flat2 = copy.deepcopy(flat1)
|
|
flat2["cost"] += 1000
|
|
|
|
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
|
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
|
|
|
def test_different_rooms(self):
|
|
"""
|
|
Two flats with different rooms quantity should not be detected as
|
|
duplicates.
|
|
"""
|
|
flat1 = self.generate_fake_flat()
|
|
flat2 = copy.deepcopy(flat1)
|
|
flat2["rooms"] += 1
|
|
|
|
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
|
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
|
|
|
def test_different_areas(self):
|
|
"""
|
|
Two flats with different areas should not be detected as duplicates.
|
|
"""
|
|
flat1 = self.generate_fake_flat()
|
|
flat2 = copy.deepcopy(flat1)
|
|
flat2["area"] += 10
|
|
|
|
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
|
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
|
|
|
def test_different_areas_decimals(self):
|
|
"""
|
|
Two flats which areas integers are equal but decimals are present and
|
|
different should not be detected as duplicates.
|
|
"""
|
|
flat1 = self.generate_fake_flat()
|
|
flat2 = copy.deepcopy(flat1)
|
|
flat1["area"] = 50.65
|
|
flat2["area"] = 50.37
|
|
|
|
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
|
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
|
|
|
def test_different_phones(self):
|
|
"""
|
|
Two flats with different phone numbers should not be detected as duplicates.
|
|
"""
|
|
flat1 = self.generate_fake_flat()
|
|
flat2 = copy.deepcopy(flat1)
|
|
flat2["phone"] = "0708091011"
|
|
|
|
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
|
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
|
|
|
def test_real_duplicates(self):
|
|
"""
|
|
Two flats with same price, area and rooms quantity should be detected as
|
|
duplicates.
|
|
"""
|
|
flats = self.load_files(
|
|
"127028739@seloger",
|
|
"14428129@explorimmo"
|
|
)
|
|
|
|
score = duplicates.get_duplicate_score(flats[0], flats[1], TestDuplicates.IMAGE_CACHE)
|
|
self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS)
|
|
|
|
def run(config):
|
|
"""
|
|
Run all the tests
|
|
|
|
:param config: A config dict.
|
|
"""
|
|
LOGGER.info("Running tests…")
|
|
suite = unittest.TestLoader().loadTestsFromTestCase(TestTexts)
|
|
unittest.TextTestRunner(verbosity=2).run(suite)
|
|
|
|
suite = unittest.TestLoader().loadTestsFromTestCase(TestPhoneNumbers)
|
|
unittest.TextTestRunner(verbosity=2).run(suite)
|
|
|
|
suite = unittest.TestLoader().loadTestsFromTestCase(TestDuplicates)
|
|
unittest.TextTestRunner(verbosity=2).run(suite)
|