Make CI fail if a test fails + a bit of linting

This commit is contained in:
Lucas Verney 2018-01-18 14:48:28 +01:00
parent 6bcfb62e8d
commit d482d9b6fa
9 changed files with 171 additions and 60 deletions

View File

@ -71,6 +71,14 @@ flatisfy.fetch module
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
flatisfy.tests module
---------------------
.. automodule:: flatisfy.tests
:members:
:undoc-members:
:show-inheritance:
flatisfy.tools module flatisfy.tools module
--------------------- ---------------------

View File

@ -219,7 +219,7 @@ def main():
return return
# Tests command # Tests command
elif args.cmd == "test": elif args.cmd == "test":
tests.run(config) tests.run()
return return

View File

@ -3,21 +3,20 @@
Preprocessing functions to convert input opendata files into SQLAlchemy objects Preprocessing functions to convert input opendata files into SQLAlchemy objects
ready to be stored in the database. ready to be stored in the database.
""" """
import csv
import io import io
import json import json
import logging import logging
import os import os
import sys import sys
if sys.version_info >= (3,0): from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport
if sys.version_info >= (3, 0):
import csv import csv
else: else:
from backports import csv from backports import csv
from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
MODULE_DIR = os.path.dirname(os.path.realpath(__file__)) MODULE_DIR = os.path.dirname(os.path.realpath(__file__))

View File

@ -167,7 +167,17 @@ def detect(flats_list, key="id", merge=True, should_intersect=False):
return unique_flats_list, duplicate_flats return unique_flats_list, duplicate_flats
def get_duplicate_score(flat1, flat2, photo_cache): def get_duplicate_score(flat1, flat2, photo_cache):
"""
Compute the duplicate score between two flats. The higher the score, the
more likely the two flats to be duplicates.
:param flat1: First flat dict.
:param flat2: Second flat dict.
:param photo_cache: An instance of ``ImageCache`` to use to cache images.
:return: The duplicate score as ``int``.
"""
n_common_items = 0 n_common_items = 0
try: try:
# They should have the same area, up to one unit # They should have the same area, up to one unit
@ -265,6 +275,7 @@ def get_duplicate_score(flat1, flat2, photo_cache):
return n_common_items return n_common_items
def deep_detect(flats_list, config): def deep_detect(flats_list, config):
""" """
Deeper detection of duplicates based on any available data. Deeper detection of duplicates based on any available data.

View File

@ -5,8 +5,8 @@ This modules defines an SQLAlchemy ORM model for a flat.
# pylint: disable=locally-disabled,invalid-name,too-few-public-methods # pylint: disable=locally-disabled,invalid-name,too-few-public-methods
from __future__ import absolute_import, print_function, unicode_literals from __future__ import absolute_import, print_function, unicode_literals
import enum
import logging import logging
import enum
import arrow import arrow

View File

@ -2,13 +2,14 @@
""" """
This module contains unit testing functions. This module contains unit testing functions.
""" """
import random
import logging
import unittest
import copy import copy
import os
import json import json
import logging
import os
import random
import sys
import unittest
from flatisfy import tools from flatisfy import tools
from flatisfy.filters import duplicates from flatisfy.filters import duplicates
from flatisfy.filters.cache import ImageCache from flatisfy.filters.cache import ImageCache
@ -17,7 +18,11 @@ from flatisfy.constants import BACKENDS_BY_PRECEDENCE
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
TESTS_DATA_DIR = os.path.dirname(os.path.realpath(__file__)) + "/test_files/" TESTS_DATA_DIR = os.path.dirname(os.path.realpath(__file__)) + "/test_files/"
class TestTexts(unittest.TestCase): class TestTexts(unittest.TestCase):
"""
Checks string normalizations.
"""
def test_roman_numbers(self): def test_roman_numbers(self):
""" """
Checks roman numbers replacement. Checks roman numbers replacement.
@ -52,6 +57,10 @@ class TestTexts(unittest.TestCase):
) )
def test_roman_numbers_in_text(self): def test_roman_numbers_in_text(self):
"""
Checks conversion of roman numbers to arabic ones in string
normalization.
"""
self.assertEqual( self.assertEqual(
"dans le 15e arrondissement", "dans le 15e arrondissement",
tools.normalize_string("Dans le XVe arrondissement") tools.normalize_string("Dans le XVe arrondissement")
@ -75,7 +84,11 @@ class TestTexts(unittest.TestCase):
tools.normalize_string(u"éèêàüï") tools.normalize_string(u"éèêàüï")
) )
class TestPhoneNumbers(unittest.TestCase): class TestPhoneNumbers(unittest.TestCase):
"""
Checks phone numbers normalizations.
"""
def test_prefix(self): def test_prefix(self):
""" """
Checks phone numbers with international prefixes. Checks phone numbers with international prefixes.
@ -103,16 +116,23 @@ class TestPhoneNumbers(unittest.TestCase):
duplicates.homogeneize_phone_number("06 05 04 03 02") duplicates.homogeneize_phone_number("06 05 04 03 02")
) )
class TestDuplicates(unittest.TestCase):
DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 14
DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15
IMAGE_CACHE = ImageCache()
def generate_fake_flat(self): class TestDuplicates(unittest.TestCase):
"""
Checks duplicates detection.
"""
DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 14 # pylint: disable=invalid-name
DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15 # pylint: disable=invalid-name
IMAGE_CACHE = ImageCache() # pylint: disable=invalid-name
@staticmethod
def generate_fake_flat():
""" """
Generates a fake flat post. Generates a fake flat post.
""" """
backend = BACKENDS_BY_PRECEDENCE[random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)] backend = BACKENDS_BY_PRECEDENCE[
random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)
]
return { return {
"id": str(random.randint(100000, 199999)) + "@" + backend, "id": str(random.randint(100000, 199999)) + "@" + backend,
"phone": "0607080910", "phone": "0607080910",
@ -123,7 +143,8 @@ class TestDuplicates(unittest.TestCase):
"bedrooms": random.randint(1, 4) "bedrooms": random.randint(1, 4)
} }
def load_files(self, file1, file2): @staticmethod
def load_files(file1, file2):
""" """
Load two files Load two files
@ -143,8 +164,12 @@ class TestDuplicates(unittest.TestCase):
""" """
flat1 = self.generate_fake_flat() flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1) flat2 = copy.deepcopy(flat1)
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) score = duplicates.get_duplicate_score(
self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_different_prices(self): def test_different_prices(self):
""" """
@ -154,8 +179,12 @@ class TestDuplicates(unittest.TestCase):
flat2 = copy.deepcopy(flat1) flat2 = copy.deepcopy(flat1)
flat2["cost"] += 1000 flat2["cost"] += 1000
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) score = duplicates.get_duplicate_score(
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_different_rooms(self): def test_different_rooms(self):
""" """
@ -166,8 +195,12 @@ class TestDuplicates(unittest.TestCase):
flat2 = copy.deepcopy(flat1) flat2 = copy.deepcopy(flat1)
flat2["rooms"] += 1 flat2["rooms"] += 1
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) score = duplicates.get_duplicate_score(
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_different_areas(self): def test_different_areas(self):
""" """
@ -177,8 +210,12 @@ class TestDuplicates(unittest.TestCase):
flat2 = copy.deepcopy(flat1) flat2 = copy.deepcopy(flat1)
flat2["area"] += 10 flat2["area"] += 10
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) score = duplicates.get_duplicate_score(
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_different_areas_decimals(self): def test_different_areas_decimals(self):
""" """
@ -190,45 +227,63 @@ class TestDuplicates(unittest.TestCase):
flat1["area"] = 50.65 flat1["area"] = 50.65
flat2["area"] = 50.37 flat2["area"] = 50.37
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) score = duplicates.get_duplicate_score(
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_different_phones(self): def test_different_phones(self):
""" """
Two flats with different phone numbers should not be detected as duplicates. Two flats with different phone numbers should not be detected as
duplicates.
""" """
flat1 = self.generate_fake_flat() flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1) flat2 = copy.deepcopy(flat1)
flat2["phone"] = "0708091011" flat2["phone"] = "0708091011"
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) score = duplicates.get_duplicate_score(
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_real_duplicates(self): def test_real_duplicates(self):
""" """
Two flats with same price, area and rooms quantity should be detected as Two flats with same price, area and rooms quantity should be detected
duplicates. as duplicates.
""" """
flats = self.load_files( flats = self.load_files(
"127028739@seloger", "127028739@seloger",
"14428129@explorimmo" "14428129@explorimmo"
) )
score = duplicates.get_duplicate_score(flats[0], flats[1], TestDuplicates.IMAGE_CACHE) score = duplicates.get_duplicate_score(
self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS) flats[0], flats[1], TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS
)
def run(config):
def run():
""" """
Run all the tests Run all the tests
:param config: A config dict.
""" """
LOGGER.info("Running tests…") LOGGER.info("Running tests…")
try:
suite = unittest.TestLoader().loadTestsFromTestCase(TestTexts) suite = unittest.TestLoader().loadTestsFromTestCase(TestTexts)
unittest.TextTestRunner(verbosity=2).run(suite) result = unittest.TextTestRunner(verbosity=2).run(suite)
assert result.wasSuccessful()
suite = unittest.TestLoader().loadTestsFromTestCase(TestPhoneNumbers) suite = unittest.TestLoader().loadTestsFromTestCase(TestPhoneNumbers)
unittest.TextTestRunner(verbosity=2).run(suite) result = unittest.TextTestRunner(verbosity=2).run(suite)
assert result.wasSuccessful()
suite = unittest.TestLoader().loadTestsFromTestCase(TestDuplicates) suite = unittest.TestLoader().loadTestsFromTestCase(TestDuplicates)
unittest.TextTestRunner(verbosity=2).run(suite) result = unittest.TextTestRunner(verbosity=2).run(suite)
assert result.wasSuccessful()
except AssertionError:
sys.exit(1)

View File

@ -24,14 +24,22 @@ LOGGER = logging.getLogger(__name__)
# Constants # Constants
NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys" NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys"
class RomanNumbers():
class RomanNumbers(object):
""" """
Utilities to check and convert roman numbers. Utilities to check and convert roman numbers.
Part of the convertions are based on
Part of the conversions is based on
https://gist.github.com/riverrun/ac91218bb1678b857c12 https://gist.github.com/riverrun/ac91218bb1678b857c12
""" """
@staticmethod
def check_valid(roman):
"""
Check whether a roman literal is a valid roman literal.
def check_valid(self, roman): :param roman: A roman literal, as string.
:returns: ``True`` if it is a valid roman literal, ``False`` otherwise.
"""
if not re.match('^[MDCLXVI]+$', roman): if not re.match('^[MDCLXVI]+$', roman):
return False return False
@ -42,25 +50,57 @@ class RomanNumbers():
# TODO: check M does not appear after any other, etc. # TODO: check M does not appear after any other, etc.
return True return True
def convert_to_arabic(self, roman): @staticmethod
if not self.check_valid(roman): def convert_to_arabic(roman):
"""
Convert a roman literal to arabic one.
:param roman: A roman number, as string.
:returns: The corresponding arabic one, as string.
"""
if not RomanNumbers.check_valid(roman):
return roman return roman
keys = ['IV', 'IX', 'XL', 'XC', 'CD', 'CM', 'I', 'V', 'X', 'L', 'C', 'D', 'M'] keys = [
to_arabic = {'IV': '4', 'IX': '9', 'XL': '40', 'XC': '90', 'CD': '400', 'CM': '900', 'IV', 'IX', 'XL', 'XC', 'CD', 'CM', 'I', 'V',
'I': '1', 'V': '5', 'X': '10', 'L': '50', 'C': '100', 'D': '500', 'M': '1000'} 'X', 'L', 'C', 'D', 'M'
]
to_arabic = {
'IV': '4',
'IX': '9',
'XL': '40',
'XC': '90',
'CD': '400',
'CM': '900',
'I': '1',
'V': '5',
'X': '10',
'L': '50',
'C': '100',
'D': '500',
'M': '1000'
}
for key in keys: for key in keys:
if key in roman: if key in roman:
roman = roman.replace(key, ' {}'.format(to_arabic.get(key))) roman = roman.replace(key, ' {}'.format(to_arabic.get(key)))
return str(sum(int(num) for num in roman.split())) return str(sum(int(num) for num in roman.split()))
def convert_to_arabic_in_text(self, text): @staticmethod
def convert_to_arabic_in_text(text):
"""
Convert roman literals to arabic one in a text.
:param text: Some text to convert roman literals from.
:returns: The corresponding text with roman literals converted to
arabic.
"""
return re.sub( return re.sub(
'(?<![\S])+([MDCLXVI]+)(?=[eè\s$])', r'(?<![\S])+([MDCLXVI]+)(?=[eè\s$])',
lambda matchobj: self.convert_to_arabic(matchobj.group(0)), lambda matchobj: RomanNumbers.convert_to_arabic(matchobj.group(0)),
text text
) )
def hash_dict(func): def hash_dict(func):
""" """
Decorator to use on functions accepting dict parameters, to transform them Decorator to use on functions accepting dict parameters, to transform them
@ -190,7 +230,6 @@ def normalize_string(string):
>>> normalize_string("tétéà 14ème-XIV, foobar") >>> normalize_string("tétéà 14ème-XIV, foobar")
'tetea 14eme xiv, foobar' 'tetea 14eme xiv, foobar'
""" """
# TODO: Convert romanian numerals to decimal
# ASCIIfy the string # ASCIIfy the string
string = unidecode.unidecode(string) string = unidecode.unidecode(string)

View File

@ -8,7 +8,6 @@ from __future__ import (
import functools import functools
import json import json
import logging
import os import os
import bottle import bottle

View File

@ -21,7 +21,7 @@ from flatisfy.models.postal_code import PostalCode
FILTER_RE = re.compile(r"filter\[([A-z0-9_]+)\]") FILTER_RE = re.compile(r"filter\[([A-z0-9_]+)\]")
def JSONError(error_code, error_str): def JSONError(error_code, error_str): # pylint: disable=invalid-name
""" """
Return an HTTP error with a JSON payload. Return an HTTP error with a JSON payload.