Make CI fail if a test fails + a bit of linting
This commit is contained in:
parent
6bcfb62e8d
commit
d482d9b6fa
@ -71,6 +71,14 @@ flatisfy.fetch module
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
flatisfy.tests module
|
||||
---------------------
|
||||
|
||||
.. automodule:: flatisfy.tests
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
flatisfy.tools module
|
||||
---------------------
|
||||
|
||||
|
@ -219,7 +219,7 @@ def main():
|
||||
return
|
||||
# Tests command
|
||||
elif args.cmd == "test":
|
||||
tests.run(config)
|
||||
tests.run()
|
||||
return
|
||||
|
||||
|
||||
|
@ -3,21 +3,20 @@
|
||||
Preprocessing functions to convert input opendata files into SQLAlchemy objects
|
||||
ready to be stored in the database.
|
||||
"""
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
if sys.version_info >= (3,0):
|
||||
from flatisfy.models.postal_code import PostalCode
|
||||
from flatisfy.models.public_transport import PublicTransport
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
import csv
|
||||
else:
|
||||
from backports import csv
|
||||
|
||||
from flatisfy.models.postal_code import PostalCode
|
||||
from flatisfy.models.public_transport import PublicTransport
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
|
@ -167,7 +167,17 @@ def detect(flats_list, key="id", merge=True, should_intersect=False):
|
||||
|
||||
return unique_flats_list, duplicate_flats
|
||||
|
||||
|
||||
def get_duplicate_score(flat1, flat2, photo_cache):
|
||||
"""
|
||||
Compute the duplicate score between two flats. The higher the score, the
|
||||
more likely the two flats to be duplicates.
|
||||
|
||||
:param flat1: First flat dict.
|
||||
:param flat2: Second flat dict.
|
||||
:param photo_cache: An instance of ``ImageCache`` to use to cache images.
|
||||
:return: The duplicate score as ``int``.
|
||||
"""
|
||||
n_common_items = 0
|
||||
try:
|
||||
# They should have the same area, up to one unit
|
||||
@ -265,6 +275,7 @@ def get_duplicate_score(flat1, flat2, photo_cache):
|
||||
|
||||
return n_common_items
|
||||
|
||||
|
||||
def deep_detect(flats_list, config):
|
||||
"""
|
||||
Deeper detection of duplicates based on any available data.
|
||||
|
@ -5,8 +5,8 @@ This modules defines an SQLAlchemy ORM model for a flat.
|
||||
# pylint: disable=locally-disabled,invalid-name,too-few-public-methods
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import enum
|
||||
import logging
|
||||
import enum
|
||||
|
||||
import arrow
|
||||
|
||||
|
@ -2,13 +2,14 @@
|
||||
"""
|
||||
This module contains unit testing functions.
|
||||
"""
|
||||
|
||||
import random
|
||||
import logging
|
||||
import unittest
|
||||
import copy
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
from flatisfy import tools
|
||||
from flatisfy.filters import duplicates
|
||||
from flatisfy.filters.cache import ImageCache
|
||||
@ -17,7 +18,11 @@ from flatisfy.constants import BACKENDS_BY_PRECEDENCE
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
TESTS_DATA_DIR = os.path.dirname(os.path.realpath(__file__)) + "/test_files/"
|
||||
|
||||
|
||||
class TestTexts(unittest.TestCase):
|
||||
"""
|
||||
Checks string normalizations.
|
||||
"""
|
||||
def test_roman_numbers(self):
|
||||
"""
|
||||
Checks roman numbers replacement.
|
||||
@ -52,6 +57,10 @@ class TestTexts(unittest.TestCase):
|
||||
)
|
||||
|
||||
def test_roman_numbers_in_text(self):
|
||||
"""
|
||||
Checks conversion of roman numbers to arabic ones in string
|
||||
normalization.
|
||||
"""
|
||||
self.assertEqual(
|
||||
"dans le 15e arrondissement",
|
||||
tools.normalize_string("Dans le XVe arrondissement")
|
||||
@ -75,7 +84,11 @@ class TestTexts(unittest.TestCase):
|
||||
tools.normalize_string(u"éèêàüï")
|
||||
)
|
||||
|
||||
|
||||
class TestPhoneNumbers(unittest.TestCase):
|
||||
"""
|
||||
Checks phone numbers normalizations.
|
||||
"""
|
||||
def test_prefix(self):
|
||||
"""
|
||||
Checks phone numbers with international prefixes.
|
||||
@ -103,16 +116,23 @@ class TestPhoneNumbers(unittest.TestCase):
|
||||
duplicates.homogeneize_phone_number("06 05 04 03 02")
|
||||
)
|
||||
|
||||
class TestDuplicates(unittest.TestCase):
|
||||
DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 14
|
||||
DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15
|
||||
IMAGE_CACHE = ImageCache()
|
||||
|
||||
def generate_fake_flat(self):
|
||||
class TestDuplicates(unittest.TestCase):
|
||||
"""
|
||||
Checks duplicates detection.
|
||||
"""
|
||||
DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 14 # pylint: disable=invalid-name
|
||||
DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15 # pylint: disable=invalid-name
|
||||
IMAGE_CACHE = ImageCache() # pylint: disable=invalid-name
|
||||
|
||||
@staticmethod
|
||||
def generate_fake_flat():
|
||||
"""
|
||||
Generates a fake flat post.
|
||||
"""
|
||||
backend = BACKENDS_BY_PRECEDENCE[random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)]
|
||||
backend = BACKENDS_BY_PRECEDENCE[
|
||||
random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)
|
||||
]
|
||||
return {
|
||||
"id": str(random.randint(100000, 199999)) + "@" + backend,
|
||||
"phone": "0607080910",
|
||||
@ -123,7 +143,8 @@ class TestDuplicates(unittest.TestCase):
|
||||
"bedrooms": random.randint(1, 4)
|
||||
}
|
||||
|
||||
def load_files(self, file1, file2):
|
||||
@staticmethod
|
||||
def load_files(file1, file2):
|
||||
"""
|
||||
Load two files
|
||||
|
||||
@ -143,8 +164,12 @@ class TestDuplicates(unittest.TestCase):
|
||||
"""
|
||||
flat1 = self.generate_fake_flat()
|
||||
flat2 = copy.deepcopy(flat1)
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
score = duplicates.get_duplicate_score(
|
||||
flat1, flat2, TestDuplicates.IMAGE_CACHE
|
||||
)
|
||||
self.assertTrue(
|
||||
score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
|
||||
)
|
||||
|
||||
def test_different_prices(self):
|
||||
"""
|
||||
@ -154,8 +179,12 @@ class TestDuplicates(unittest.TestCase):
|
||||
flat2 = copy.deepcopy(flat1)
|
||||
flat2["cost"] += 1000
|
||||
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
score = duplicates.get_duplicate_score(
|
||||
flat1, flat2, TestDuplicates.IMAGE_CACHE
|
||||
)
|
||||
self.assertTrue(
|
||||
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
|
||||
)
|
||||
|
||||
def test_different_rooms(self):
|
||||
"""
|
||||
@ -166,8 +195,12 @@ class TestDuplicates(unittest.TestCase):
|
||||
flat2 = copy.deepcopy(flat1)
|
||||
flat2["rooms"] += 1
|
||||
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
score = duplicates.get_duplicate_score(
|
||||
flat1, flat2, TestDuplicates.IMAGE_CACHE
|
||||
)
|
||||
self.assertTrue(
|
||||
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
|
||||
)
|
||||
|
||||
def test_different_areas(self):
|
||||
"""
|
||||
@ -177,8 +210,12 @@ class TestDuplicates(unittest.TestCase):
|
||||
flat2 = copy.deepcopy(flat1)
|
||||
flat2["area"] += 10
|
||||
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
score = duplicates.get_duplicate_score(
|
||||
flat1, flat2, TestDuplicates.IMAGE_CACHE
|
||||
)
|
||||
self.assertTrue(
|
||||
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
|
||||
)
|
||||
|
||||
def test_different_areas_decimals(self):
|
||||
"""
|
||||
@ -190,45 +227,63 @@ class TestDuplicates(unittest.TestCase):
|
||||
flat1["area"] = 50.65
|
||||
flat2["area"] = 50.37
|
||||
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
score = duplicates.get_duplicate_score(
|
||||
flat1, flat2, TestDuplicates.IMAGE_CACHE
|
||||
)
|
||||
self.assertTrue(
|
||||
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
|
||||
)
|
||||
|
||||
def test_different_phones(self):
|
||||
"""
|
||||
Two flats with different phone numbers should not be detected as duplicates.
|
||||
Two flats with different phone numbers should not be detected as
|
||||
duplicates.
|
||||
"""
|
||||
flat1 = self.generate_fake_flat()
|
||||
flat2 = copy.deepcopy(flat1)
|
||||
flat2["phone"] = "0708091011"
|
||||
|
||||
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
|
||||
score = duplicates.get_duplicate_score(
|
||||
flat1, flat2, TestDuplicates.IMAGE_CACHE
|
||||
)
|
||||
self.assertTrue(
|
||||
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
|
||||
)
|
||||
|
||||
def test_real_duplicates(self):
|
||||
"""
|
||||
Two flats with same price, area and rooms quantity should be detected as
|
||||
duplicates.
|
||||
Two flats with same price, area and rooms quantity should be detected
|
||||
as duplicates.
|
||||
"""
|
||||
flats = self.load_files(
|
||||
"127028739@seloger",
|
||||
"14428129@explorimmo"
|
||||
)
|
||||
|
||||
score = duplicates.get_duplicate_score(flats[0], flats[1], TestDuplicates.IMAGE_CACHE)
|
||||
self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS)
|
||||
score = duplicates.get_duplicate_score(
|
||||
flats[0], flats[1], TestDuplicates.IMAGE_CACHE
|
||||
)
|
||||
self.assertTrue(
|
||||
score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS
|
||||
)
|
||||
|
||||
def run(config):
|
||||
|
||||
def run():
|
||||
"""
|
||||
Run all the tests
|
||||
|
||||
:param config: A config dict.
|
||||
"""
|
||||
LOGGER.info("Running tests…")
|
||||
try:
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestTexts)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
result = unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
assert result.wasSuccessful()
|
||||
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestPhoneNumbers)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
result = unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
assert result.wasSuccessful()
|
||||
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestDuplicates)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
result = unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
assert result.wasSuccessful()
|
||||
except AssertionError:
|
||||
sys.exit(1)
|
||||
|
@ -24,14 +24,22 @@ LOGGER = logging.getLogger(__name__)
|
||||
# Constants
|
||||
NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys"
|
||||
|
||||
class RomanNumbers():
|
||||
|
||||
class RomanNumbers(object):
|
||||
"""
|
||||
Utilities to check and convert roman numbers.
|
||||
Part of the convertions are based on
|
||||
|
||||
Part of the conversions is based on
|
||||
https://gist.github.com/riverrun/ac91218bb1678b857c12
|
||||
"""
|
||||
@staticmethod
|
||||
def check_valid(roman):
|
||||
"""
|
||||
Check whether a roman literal is a valid roman literal.
|
||||
|
||||
def check_valid(self, roman):
|
||||
:param roman: A roman literal, as string.
|
||||
:returns: ``True`` if it is a valid roman literal, ``False`` otherwise.
|
||||
"""
|
||||
if not re.match('^[MDCLXVI]+$', roman):
|
||||
return False
|
||||
|
||||
@ -42,25 +50,57 @@ class RomanNumbers():
|
||||
# TODO: check M does not appear after any other, etc.
|
||||
return True
|
||||
|
||||
def convert_to_arabic(self, roman):
|
||||
if not self.check_valid(roman):
|
||||
@staticmethod
|
||||
def convert_to_arabic(roman):
|
||||
"""
|
||||
Convert a roman literal to arabic one.
|
||||
|
||||
:param roman: A roman number, as string.
|
||||
:returns: The corresponding arabic one, as string.
|
||||
"""
|
||||
if not RomanNumbers.check_valid(roman):
|
||||
return roman
|
||||
|
||||
keys = ['IV', 'IX', 'XL', 'XC', 'CD', 'CM', 'I', 'V', 'X', 'L', 'C', 'D', 'M']
|
||||
to_arabic = {'IV': '4', 'IX': '9', 'XL': '40', 'XC': '90', 'CD': '400', 'CM': '900',
|
||||
'I': '1', 'V': '5', 'X': '10', 'L': '50', 'C': '100', 'D': '500', 'M': '1000'}
|
||||
keys = [
|
||||
'IV', 'IX', 'XL', 'XC', 'CD', 'CM', 'I', 'V',
|
||||
'X', 'L', 'C', 'D', 'M'
|
||||
]
|
||||
to_arabic = {
|
||||
'IV': '4',
|
||||
'IX': '9',
|
||||
'XL': '40',
|
||||
'XC': '90',
|
||||
'CD': '400',
|
||||
'CM': '900',
|
||||
'I': '1',
|
||||
'V': '5',
|
||||
'X': '10',
|
||||
'L': '50',
|
||||
'C': '100',
|
||||
'D': '500',
|
||||
'M': '1000'
|
||||
}
|
||||
for key in keys:
|
||||
if key in roman:
|
||||
roman = roman.replace(key, ' {}'.format(to_arabic.get(key)))
|
||||
return str(sum(int(num) for num in roman.split()))
|
||||
|
||||
def convert_to_arabic_in_text(self, text):
|
||||
@staticmethod
|
||||
def convert_to_arabic_in_text(text):
|
||||
"""
|
||||
Convert roman literals to arabic one in a text.
|
||||
|
||||
:param text: Some text to convert roman literals from.
|
||||
:returns: The corresponding text with roman literals converted to
|
||||
arabic.
|
||||
"""
|
||||
return re.sub(
|
||||
'(?<![\S])+([MDCLXVI]+)(?=[eè\s$])',
|
||||
lambda matchobj: self.convert_to_arabic(matchobj.group(0)),
|
||||
r'(?<![\S])+([MDCLXVI]+)(?=[eè\s$])',
|
||||
lambda matchobj: RomanNumbers.convert_to_arabic(matchobj.group(0)),
|
||||
text
|
||||
)
|
||||
|
||||
|
||||
def hash_dict(func):
|
||||
"""
|
||||
Decorator to use on functions accepting dict parameters, to transform them
|
||||
@ -190,7 +230,6 @@ def normalize_string(string):
|
||||
>>> normalize_string("tétéà 14ème-XIV, foobar")
|
||||
'tetea 14eme xiv, foobar'
|
||||
"""
|
||||
# TODO: Convert romanian numerals to decimal
|
||||
# ASCIIfy the string
|
||||
string = unidecode.unidecode(string)
|
||||
|
||||
|
@ -8,7 +8,6 @@ from __future__ import (
|
||||
|
||||
import functools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
import bottle
|
||||
|
@ -21,7 +21,7 @@ from flatisfy.models.postal_code import PostalCode
|
||||
FILTER_RE = re.compile(r"filter\[([A-z0-9_]+)\]")
|
||||
|
||||
|
||||
def JSONError(error_code, error_str):
|
||||
def JSONError(error_code, error_str): # pylint: disable=invalid-name
|
||||
"""
|
||||
Return an HTTP error with a JSON payload.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user