Add unit tests
This commit is contained in:
parent
48835c0d83
commit
5b85ad6e59
@ -16,6 +16,7 @@ from flatisfy import cmds
|
||||
from flatisfy import data
|
||||
from flatisfy import fetch
|
||||
from flatisfy import tools
|
||||
from flatisfy import tests
|
||||
# pylint: enable=locally-disabled,wrong-import-position
|
||||
|
||||
|
||||
@ -113,6 +114,10 @@ def parse_args(argv=None):
|
||||
parser_serve.add_argument("--port", type=int, help="Port to bind to.")
|
||||
parser_serve.add_argument("--host", help="Host to listen on.")
|
||||
|
||||
# Test subcommand parser
|
||||
subparsers.add_parser("test", parents=[parent_parser],
|
||||
help="Unit testing.")
|
||||
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
@ -212,6 +217,10 @@ def main():
|
||||
elif args.cmd == "serve":
|
||||
cmds.serve(config)
|
||||
return
|
||||
# Tests command
|
||||
elif args.cmd == "test":
|
||||
tests.run(config)
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -167,6 +167,103 @@ def detect(flats_list, key="id", merge=True, should_intersect=False):
|
||||
|
||||
return unique_flats_list, duplicate_flats
|
||||
|
||||
def get_duplicate_score(flat1, flat2, photo_cache):
|
||||
n_common_items = 0
|
||||
try:
|
||||
# They should have the same area, up to one unit
|
||||
assert abs(flat1["area"] - flat2["area"]) < 1
|
||||
n_common_items += 1
|
||||
|
||||
# They should be at the same price, up to one unit
|
||||
assert abs(flat1["cost"] - flat2["cost"]) < 1
|
||||
n_common_items += 1
|
||||
|
||||
# They should have the same number of bedrooms if this was
|
||||
# fetched for both
|
||||
if flat1["bedrooms"] and flat2["bedrooms"]:
|
||||
assert flat1["bedrooms"] == flat2["bedrooms"]
|
||||
n_common_items += 1
|
||||
|
||||
# They should have the same utilities (included or excluded for
|
||||
# both of them), if this was fetched for both
|
||||
if flat1["utilities"] and flat2["utilities"]:
|
||||
assert flat1["utilities"] == flat2["utilities"]
|
||||
n_common_items += 1
|
||||
|
||||
# They should have the same number of rooms if it was fetched
|
||||
# for both of them
|
||||
if flat1["rooms"] and flat2["rooms"]:
|
||||
assert flat1["rooms"] == flat2["rooms"]
|
||||
n_common_items += 1
|
||||
|
||||
# They should have the same postal code, if available
|
||||
if (
|
||||
"flatisfy" in flat1 and "flatisfy" in flat2 and
|
||||
flat1["flatisfy"].get("postal_code", None) and
|
||||
flat2["flatisfy"].get("postal_code", None)
|
||||
):
|
||||
assert (
|
||||
flat1["flatisfy"]["postal_code"] ==
|
||||
flat2["flatisfy"]["postal_code"]
|
||||
)
|
||||
n_common_items += 1
|
||||
|
||||
# TODO: Better text comparison (one included in the other, fuzzymatch)
|
||||
flat1_text = tools.normalize_string(flat1.get("text", ""))
|
||||
flat2_text = tools.normalize_string(flat2.get("text", ""))
|
||||
if flat1_text and flat2_text and flat1_text == flat2_text:
|
||||
n_common_items += 1
|
||||
|
||||
# They should have the same phone number if it was fetched for
|
||||
# both
|
||||
flat1_phone = homogeneize_phone_number(flat1["phone"])
|
||||
flat2_phone = homogeneize_phone_number(flat2["phone"])
|
||||
if flat1_phone and flat2_phone:
|
||||
assert flat1_phone == flat2_phone
|
||||
n_common_items += 10 # Counts much more than the rest
|
||||
|
||||
# They should have at least one photo in common if there
|
||||
# are some photos
|
||||
if flat1.get("photos", []) and flat2.get("photos", []):
|
||||
n_common_photos = find_number_common_photos(
|
||||
photo_cache,
|
||||
flat1["photos"],
|
||||
flat2["photos"]
|
||||
)
|
||||
assert n_common_photos > 1
|
||||
|
||||
min_number_photos = min(len(flat1["photos"]),
|
||||
len(flat2["photos"]))
|
||||
|
||||
# Either all the photos are the same, or there are at least
|
||||
# three common photos.
|
||||
if n_common_photos == min_number_photos:
|
||||
n_common_items += 15
|
||||
else:
|
||||
n_common_items += 5 * min(n_common_photos, 3)
|
||||
|
||||
# If the two flats are from the same website and have a
|
||||
# different float part, consider they cannot be duplicates. See
|
||||
# https://framagit.org/phyks/Flatisfy/issues/100.
|
||||
both_are_from_same_backend = (
|
||||
flat1["id"].split("@")[-1] == flat2["id"].split("@")[-1]
|
||||
)
|
||||
both_have_float_part = (
|
||||
(flat1["area"] % 1) > 0 and (flat2["area"] % 1) > 0
|
||||
)
|
||||
both_have_equal_float_part = (
|
||||
(flat1["area"] % 1) == (flat2["area"] % 1)
|
||||
)
|
||||
if both_have_float_part and both_are_from_same_backend:
|
||||
assert both_have_equal_float_part
|
||||
except (AssertionError, TypeError):
|
||||
# Skip and consider as not duplicates whenever the conditions
|
||||
# are not met
|
||||
# TypeError occurs when an area or a cost is None, which should
|
||||
# not be considered as duplicates
|
||||
n_common_items = 0
|
||||
|
||||
return n_common_items
|
||||
|
||||
def deep_detect(flats_list, config):
|
||||
"""
|
||||
@ -192,111 +289,20 @@ def deep_detect(flats_list, config):
|
||||
if flat2["id"] in matching_flats[flat1["id"]]:
|
||||
continue
|
||||
|
||||
n_common_items = 0
|
||||
try:
|
||||
# They should have the same area, up to one unit
|
||||
assert abs(flat1["area"] - flat2["area"]) < 1
|
||||
n_common_items += 1
|
||||
n_common_items = get_duplicate_score(flat1, flat2, photo_cache)
|
||||
|
||||
# They should be at the same price, up to one unit
|
||||
assert abs(flat1["cost"] - flat2["cost"]) < 1
|
||||
n_common_items += 1
|
||||
|
||||
# They should have the same number of bedrooms if this was
|
||||
# fetched for both
|
||||
if flat1["bedrooms"] and flat2["bedrooms"]:
|
||||
assert flat1["bedrooms"] == flat2["bedrooms"]
|
||||
n_common_items += 1
|
||||
|
||||
# They should have the same utilities (included or excluded for
|
||||
# both of them), if this was fetched for both
|
||||
if flat1["utilities"] and flat2["utilities"]:
|
||||
assert flat1["utilities"] == flat2["utilities"]
|
||||
n_common_items += 1
|
||||
|
||||
# They should have the same number of rooms if it was fetched
|
||||
# for both of them
|
||||
if flat1["rooms"] and flat2["rooms"]:
|
||||
assert flat1["rooms"] == flat2["rooms"]
|
||||
n_common_items += 1
|
||||
|
||||
# They should have the same postal code, if available
|
||||
if (
|
||||
"flatisfy" in flat1 and "flatisfy" in flat2 and
|
||||
flat1["flatisfy"].get("postal_code", None) and
|
||||
flat2["flatisfy"].get("postal_code", None)
|
||||
):
|
||||
assert (
|
||||
flat1["flatisfy"]["postal_code"] ==
|
||||
flat2["flatisfy"]["postal_code"]
|
||||
)
|
||||
n_common_items += 1
|
||||
|
||||
# TODO: Compare texts (one is included in another? fuzzymatch?)
|
||||
|
||||
# They should have the same phone number if it was fetched for
|
||||
# both
|
||||
flat1_phone = homogeneize_phone_number(flat1["phone"])
|
||||
flat2_phone = homogeneize_phone_number(flat2["phone"])
|
||||
if flat1_phone and flat2_phone:
|
||||
assert flat1_phone == flat2_phone
|
||||
n_common_items += 10 # Counts much more than the rest
|
||||
|
||||
# They should have at least one photo in common if there
|
||||
# are some photos
|
||||
if flat1["photos"] and flat2["photos"]:
|
||||
n_common_photos = find_number_common_photos(
|
||||
photo_cache,
|
||||
flat1["photos"],
|
||||
flat2["photos"]
|
||||
)
|
||||
assert n_common_photos > 1
|
||||
|
||||
min_number_photos = min(len(flat1["photos"]),
|
||||
len(flat2["photos"]))
|
||||
|
||||
# Either all the photos are the same, or there are at least
|
||||
# three common photos.
|
||||
if n_common_photos == min_number_photos:
|
||||
n_common_items += 15
|
||||
else:
|
||||
n_common_items += 5 * min(n_common_photos, 3)
|
||||
|
||||
# Minimal score to consider they are duplicates
|
||||
assert n_common_items >= config["duplicate_threshold"]
|
||||
|
||||
# If the two flats are from the same website and have a
|
||||
# different float part, consider they cannot be duplicates. See
|
||||
# https://framagit.org/phyks/Flatisfy/issues/100.
|
||||
both_are_from_same_backend = (
|
||||
flat1["id"].split("@")[-1] == flat2["id"].split("@")[-1]
|
||||
# Minimal score to consider they are duplicates
|
||||
if n_common_items >= config["duplicate_threshold"]:
|
||||
# Mark flats as duplicates
|
||||
LOGGER.info(
|
||||
("Found duplicates using deep detection: (%s, %s). "
|
||||
"Score is %d."),
|
||||
flat1["id"],
|
||||
flat2["id"],
|
||||
n_common_items
|
||||
)
|
||||
both_have_float_part = (
|
||||
(flat1["area"] % 1) > 0 and (flat2["area"] % 1) > 0
|
||||
)
|
||||
both_have_different_float_part = (
|
||||
(flat1["area"] % 1) != (flat2["area"] % 1)
|
||||
)
|
||||
if(both_have_float_part and both_are_from_same_backend and
|
||||
both_have_different_float_part):
|
||||
continue
|
||||
except (AssertionError, TypeError):
|
||||
# Skip and consider as not duplicates whenever the conditions
|
||||
# are not met
|
||||
# TypeError occurs when an area or a cost is None, which should
|
||||
# not be considered as duplicates
|
||||
continue
|
||||
|
||||
# Mark flats as duplicates
|
||||
LOGGER.info(
|
||||
("Found duplicates using deep detection: (%s, %s). "
|
||||
"Score is %d."),
|
||||
flat1["id"],
|
||||
flat2["id"],
|
||||
n_common_items
|
||||
)
|
||||
matching_flats[flat1["id"]].append(flat2["id"])
|
||||
matching_flats[flat2["id"]].append(flat1["id"])
|
||||
matching_flats[flat1["id"]].append(flat2["id"])
|
||||
matching_flats[flat2["id"]].append(flat1["id"])
|
||||
|
||||
if photo_cache.total():
|
||||
LOGGER.debug("Photo cache: hits: %d%% / misses: %d%%.",
|
||||
|
73
flatisfy/test_files/127028739@seloger.json
Normal file
73
flatisfy/test_files/127028739@seloger.json
Normal file
@ -0,0 +1,73 @@
|
||||
{
|
||||
"id": "127028739@seloger",
|
||||
"url": "http://www.seloger.com/annonces/achat/appartement/rennes-35/centre/127028739.htm?p=",
|
||||
"title": "Appartement 3 pièces 67m² - Rennes",
|
||||
"area": 67,
|
||||
"cost": 155700,
|
||||
"price_per_meter": 2323.8805970149256,
|
||||
"currency": "€",
|
||||
"utilities": "",
|
||||
"date": "2018-01-12T02:10:00",
|
||||
"location": "17 PLACE MARECHAL JUIN Rennes (35000)",
|
||||
"station": "",
|
||||
"text": "Exclusivité Nexity Dans un immeuble de standing, en étage élevé avec ascenseur, Appartement Type 3 de 67 m² exposé Sud / Ouest, un séjour avec balcon et double exposition vue dégagée. Deux chambres dont une avec balcon, salle de douches, WC séparé, cave et parking en sous-sol.",
|
||||
"phone": null,
|
||||
"photos": [
|
||||
{
|
||||
"id": "0an3yarge9y446j653dewxu0jwy33pmwar47k2qym.jpg",
|
||||
"url": "https://v.seloger.com/s/width/800/visuels/0/a/n/3/0an3yarge9y446j653dewxu0jwy33pmwar47k2qym.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "1qnz6hpffcrd1c71htbooubgb7s57d82ie1v0zyf2.jpg",
|
||||
"url": "https://v.seloger.com/s/width/800/visuels/1/q/n/z/1qnz6hpffcrd1c71htbooubgb7s57d82ie1v0zyf2.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "16bv8yqgytefa1fq57hyk6e0y6ox8t2mh8wj2dgxq.jpg",
|
||||
"url": "https://v.seloger.com/s/width/800/visuels/1/6/b/v/16bv8yqgytefa1fq57hyk6e0y6ox8t2mh8wj2dgxq.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "1o23blwk87ew95e3vcq5ygyk10z2hy82fzo5j6hha.jpg",
|
||||
"url": "https://v.seloger.com/s/width/800/visuels/1/o/2/3/1o23blwk87ew95e3vcq5ygyk10z2hy82fzo5j6hha.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "20vuxbdp160sot4ccryf6g7g4rwxrkhz3b3tmq7zy.jpg",
|
||||
"url": "https://v.seloger.com/s/width/800/visuels/2/0/v/u/20vuxbdp160sot4ccryf6g7g4rwxrkhz3b3tmq7zy.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "00d9bpezie95lqtfmoccqg1ddrld2m64c2mcod5ha.jpg",
|
||||
"url": "https://v.seloger.com/s/width/800/visuels/0/0/d/9/00d9bpezie95lqtfmoccqg1ddrld2m64c2mcod5ha.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "0lhqf881qm2j03hz5581d8ggplp1xwwchb2rtoqgu.jpg",
|
||||
"url": "https://v.seloger.com/s/width/800/visuels/0/l/h/q/0lhqf881qm2j03hz5581d8ggplp1xwwchb2rtoqgu.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "0chwbagbf8tc0qf9sd3wryzl4gm7hkswcnrtnx2bi.jpg",
|
||||
"url": "https://v.seloger.com/s/width/800/visuels/0/c/h/w/0chwbagbf8tc0qf9sd3wryzl4gm7hkswcnrtnx2bi.jpg",
|
||||
"data": null
|
||||
}
|
||||
],
|
||||
"rooms": 3,
|
||||
"bedrooms": 2,
|
||||
"details": {
|
||||
"Vue": "",
|
||||
"Pièces": "3",
|
||||
"Etage": "15",
|
||||
"Reference": "MT0136601",
|
||||
"Chambres": "2",
|
||||
"Cave": "",
|
||||
"Balcon": "5 m²",
|
||||
"Surface": "67 m²",
|
||||
"Ascenseur": "",
|
||||
"Etages": "30",
|
||||
"Parking": "1",
|
||||
"Salle de Séjour": ""
|
||||
}
|
||||
}
|
77
flatisfy/test_files/14428129@explorimmo.json
Normal file
77
flatisfy/test_files/14428129@explorimmo.json
Normal file
@ -0,0 +1,77 @@
|
||||
{
|
||||
"id": "14428129@explorimmo",
|
||||
"url": "http://www.explorimmo.com/annonce-14428129.html",
|
||||
"title": "Vente appartement 3 pièces 67 m2",
|
||||
"area": 67,
|
||||
"cost": 155700,
|
||||
"price_per_meter": 2323.8805970149256,
|
||||
"currency": "EUR",
|
||||
"utilities": "H.C.",
|
||||
"date": "2017-12-05T07:40:00",
|
||||
"location": "17 PLACE MARECHAL JUIN Rennes 35000",
|
||||
"station": null,
|
||||
"text": "Exclusivité Nexity Dans un immeuble de standing, en étage élevé avec\nascenseur, Appartement Type 3 de 67 m² exposé Sud / Ouest, un séjour avec\nbalcon et double exposition vue dégagée. Deux chambres dont une avec balcon,\nsalle de douches, WC séparé, cave et parking en sous-sol.\n\n",
|
||||
"phone": null,
|
||||
"photos": [
|
||||
{
|
||||
"id": "f9b2da6dfa184759aa0c349edb1cd037.jpg",
|
||||
"url": "http://thbr.figarocms.net/images/2qEDBqRV-QNlp4fHVNhSCWlt6rU=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/f9b2da6dfa184759aa0c349edb1cd037.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "3f2cc9dc429d4e3dbb9f4216f109d224.jpg",
|
||||
"url": "http://thbr.figarocms.net/images/DulZQyZkkwa0ZFBT1nYD9rUD0A4=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/3f2cc9dc429d4e3dbb9f4216f109d224.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "56ae1db620f44af6b860df10eba55870.jpg",
|
||||
"url": "http://thbr.figarocms.net/images/EpvEffLcFbBT7spEZB2dcOHaZwA=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/56ae1db620f44af6b860df10eba55870.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "5acdef1f05314fe19111a0c3d92b8fe5.jpg",
|
||||
"url": "http://thbr.figarocms.net/images/wHtDlJMwIrMC3cWXi8ASN4I6Zl4=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/5acdef1f05314fe19111a0c3d92b8fe5.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "16c686ea91b248129fe60011d61e060b.jpg",
|
||||
"url": "http://thbr.figarocms.net/images/SD5VT1gxRSXSlt3pAz8r_SI3rqw=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/16c686ea91b248129fe60011d61e060b.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "e6a67d42709d443481da0feb9a7e11a1.jpg",
|
||||
"url": "http://thbr.figarocms.net/images/u8PGKXqC0CL9AyEOI5T9TFeGs-Y=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/e6a67d42709d443481da0feb9a7e11a1.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "6888cc7bc823402198205e480c8cab6c.jpg",
|
||||
"url": "http://thbr.figarocms.net/images/-3AseFCRaleidG2vsDJpA5BLBa4=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/6888cc7bc823402198205e480c8cab6c.jpg",
|
||||
"data": null
|
||||
},
|
||||
{
|
||||
"id": "d40dbeea9e424ea2a846f5683746ea9e.jpg",
|
||||
"url": "http://thbr.figarocms.net/images/TMKBtBuucYge-BgCoUGRjxZjdBE=/560x420/filters:fill(f6f6f6):quality(80):strip_icc()/d40dbeea9e424ea2a846f5683746ea9e.jpg",
|
||||
"data": null
|
||||
}
|
||||
],
|
||||
"rooms": 3,
|
||||
"bedrooms": 2,
|
||||
"details": {
|
||||
"available": true,
|
||||
"heatingType": "",
|
||||
"agency": "NEXITY LAMY, 6 avenue Jean Janvier, 35000, Rennes",
|
||||
"bathrooms": 0,
|
||||
"exposure": "Non précisé",
|
||||
"floor": "15",
|
||||
"energy": "C",
|
||||
"bedrooms": 2,
|
||||
"greenhouseGasEmission": null,
|
||||
"isFurnished": false,
|
||||
"rooms": 3,
|
||||
"fees": 0,
|
||||
"creationDate": 1512455998000,
|
||||
"agencyFees": 0,
|
||||
"availabilityDate": null,
|
||||
"guarantee": 0
|
||||
}
|
||||
}
|
204
flatisfy/tests.py
Normal file
204
flatisfy/tests.py
Normal file
@ -0,0 +1,204 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains unit testing functions.
|
||||
"""
|
||||
|
||||
import random
|
||||
import logging
|
||||
import unittest
|
||||
import copy
|
||||
import os
|
||||
import json
|
||||
from flatisfy import tools
|
||||
from flatisfy.filters import duplicates
|
||||
from flatisfy.filters.cache import ImageCache
|
||||
from flatisfy.constants import BACKENDS_BY_PRECEDENCE
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
TESTS_DATA_DIR = os.path.dirname(os.path.realpath(__file__)) + "/test_files/"
|
||||
|
||||
class TestTexts(unittest.TestCase):
|
||||
def test_roman_numbers(self):
|
||||
"""
|
||||
Checks roman numbers replacement.
|
||||
"""
|
||||
self.assertEqual(
|
||||
"14",
|
||||
tools.normalize_string("XIV")
|
||||
)
|
||||
|
||||
def test_multiple_whitespaces(self):
|
||||
"""
|
||||
Checks whitespaces are collapsed.
|
||||
"""
|
||||
self.assertEqual(
|
||||
"avec ascenseur",
|
||||
tools.normalize_string("avec ascenseur")
|
||||
)
|
||||
|
||||
def test_accents(self):
|
||||
"""
|
||||
Checks accents are replaced.
|
||||
"""
|
||||
self.assertEqual(
|
||||
"éèêàüï",
|
||||
tools.normalize_string("eeeaui")
|
||||
)
|
||||
|
||||
class TestPhoneNumbers(unittest.TestCase):
|
||||
def test_prefix(self):
|
||||
"""
|
||||
Checks phone numbers with international prefixes.
|
||||
"""
|
||||
self.assertEqual(
|
||||
"0605040302",
|
||||
duplicates.homogeneize_phone_number("+33605040302")
|
||||
)
|
||||
|
||||
def test_dots_separators(self):
|
||||
"""
|
||||
Checks phone numbers with dots.
|
||||
"""
|
||||
self.assertEqual(
|
||||
"0605040302",
|
||||
duplicates.homogeneize_phone_number("06.05.04.03.02")
|
||||
)
|
||||
|
||||
def test_spaces_separators(self):
|
||||
"""
|
||||
Checks phone numbers with spaces.
|
||||
"""
|
||||
self.assertEqual(
|
||||
"0605040302",
|
||||
duplicates.homogeneize_phone_number("06 05 04 03 02")
|
||||
)
|
||||
|
||||
class TestDuplicates(unittest.TestCase):
|
||||
DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 14
|
||||
DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15
|
||||
IMAGE_CACHE = ImageCache()
|
||||
|
||||
def generate_fake_flat(self):
|
||||
"""
|
||||
Generates a fake flat post.
|
||||
"""
|
||||
backend = BACKENDS_BY_PRECEDENCE[random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)]
|
||||
return {
|
||||
"id": str(random.randint(100000, 199999)) + "@" + backend,
|
||||
"phone": "0607080910",
|
||||
"rooms": random.randint(1, 4),
|
||||
"utilities": "",
|
||||
"area": random.randint(200, 1500) / 10,
|
||||
"cost": random.randint(100000, 300000),
|
||||
"bedrooms": random.randint(1, 4)
|
||||
}
|
||||
|
||||
def load_files(self, file1, file2):
|
||||
"""
|
||||
Load two files
|
||||
|
||||
:return: A dict with two flats
|
||||
"""
|
||||
with open(TESTS_DATA_DIR + file1 + ".json", "r") as flat_file:
|
||||
flat1 = json.loads(flat_file.read())
|
||||
|
||||
with open(TESTS_DATA_DIR + file2 + ".json", "r") as flat_file:
|
||||
flat2 = json.loads(flat_file.read())
|
||||
|
||||
return [flat1, flat2]
|
||||
|
||||
def test_duplicates(self):
|
||||
"""
|
||||
Two identical flats should be detected as duplicates.
|
||||
"""
|
||||
flat1 = self.generate_fake_flat()
|
||||
flat2 = copy.deepcopy(flat1)
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
|
||||
def test_different_prices(self):
|
||||
"""
|
||||
Two flats with different prices should not be detected as duplicates.
|
||||
"""
|
||||
flat1 = self.generate_fake_flat()
|
||||
flat2 = copy.deepcopy(flat1)
|
||||
flat2["cost"] += 1000
|
||||
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
|
||||
def test_different_rooms(self):
|
||||
"""
|
||||
Two flats with different rooms quantity should not be detected as
|
||||
duplicates.
|
||||
"""
|
||||
flat1 = self.generate_fake_flat()
|
||||
flat2 = copy.deepcopy(flat1)
|
||||
flat2["rooms"] += 1
|
||||
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
|
||||
def test_different_areas(self):
|
||||
"""
|
||||
Two flats with different areas should not be detected as duplicates.
|
||||
"""
|
||||
flat1 = self.generate_fake_flat()
|
||||
flat2 = copy.deepcopy(flat1)
|
||||
flat2["area"] += 10
|
||||
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
|
||||
def test_different_areas_decimals(self):
|
||||
"""
|
||||
Two flats which areas integers are equal but decimals are present and
|
||||
different should not be detected as duplicates.
|
||||
"""
|
||||
flat1 = self.generate_fake_flat()
|
||||
flat2 = copy.deepcopy(flat1)
|
||||
flat1["area"] = 50.65
|
||||
flat2["area"] = 50.37
|
||||
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
|
||||
def test_different_phones(self):
|
||||
"""
|
||||
Two flats with different phone numbers should not be detected as duplicates.
|
||||
"""
|
||||
flat1 = self.generate_fake_flat()
|
||||
flat2 = copy.deepcopy(flat1)
|
||||
flat2["phone"] = "0708091011"
|
||||
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
|
||||
def test_real_duplicates(self):
|
||||
"""
|
||||
Two flats with same price, area and rooms quantity should be detected as
|
||||
duplicates.
|
||||
"""
|
||||
flats = self.load_files(
|
||||
"127028739@seloger",
|
||||
"14428129@explorimmo"
|
||||
)
|
||||
|
||||
score = duplicates.get_duplicate_score(flats[0], flats[1], TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS)
|
||||
|
||||
def run(config):
|
||||
"""
|
||||
Run all the tests
|
||||
|
||||
:param config: A config dict.
|
||||
"""
|
||||
LOGGER.info("Running tests…")
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestTexts)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestPhoneNumbers)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestDuplicates)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
Loading…
Reference in New Issue
Block a user