Make CI fail if a test fails + a bit of linting

This commit is contained in:
Lucas Verney 2018-01-18 14:48:28 +01:00
parent 6bcfb62e8d
commit d482d9b6fa
9 changed files with 171 additions and 60 deletions

View File

@ -71,6 +71,14 @@ flatisfy.fetch module
:undoc-members:
:show-inheritance:
flatisfy.tests module
---------------------
.. automodule:: flatisfy.tests
:members:
:undoc-members:
:show-inheritance:
flatisfy.tools module
---------------------

View File

@ -219,7 +219,7 @@ def main():
return
# Tests command
elif args.cmd == "test":
tests.run(config)
tests.run()
return

View File

@ -3,21 +3,20 @@
Preprocessing functions to convert input opendata files into SQLAlchemy objects
ready to be stored in the database.
"""
import csv
import io
import json
import logging
import os
import sys
from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport
if sys.version_info >= (3, 0):
import csv
else:
from backports import csv
from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport
LOGGER = logging.getLogger(__name__)
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))

View File

@ -167,7 +167,17 @@ def detect(flats_list, key="id", merge=True, should_intersect=False):
return unique_flats_list, duplicate_flats
def get_duplicate_score(flat1, flat2, photo_cache):
"""
Compute the duplicate score between two flats. The higher the score, the
more likely the two flats to be duplicates.
:param flat1: First flat dict.
:param flat2: Second flat dict.
:param photo_cache: An instance of ``ImageCache`` to use to cache images.
:return: The duplicate score as ``int``.
"""
n_common_items = 0
try:
# They should have the same area, up to one unit
@ -265,6 +275,7 @@ def get_duplicate_score(flat1, flat2, photo_cache):
return n_common_items
def deep_detect(flats_list, config):
"""
Deeper detection of duplicates based on any available data.

View File

@ -5,8 +5,8 @@ This modules defines an SQLAlchemy ORM model for a flat.
# pylint: disable=locally-disabled,invalid-name,too-few-public-methods
from __future__ import absolute_import, print_function, unicode_literals
import enum
import logging
import enum
import arrow

View File

@ -2,13 +2,14 @@
"""
This module contains unit testing functions.
"""
import random
import logging
import unittest
import copy
import os
import json
import logging
import os
import random
import sys
import unittest
from flatisfy import tools
from flatisfy.filters import duplicates
from flatisfy.filters.cache import ImageCache
@ -17,7 +18,11 @@ from flatisfy.constants import BACKENDS_BY_PRECEDENCE
LOGGER = logging.getLogger(__name__)
TESTS_DATA_DIR = os.path.dirname(os.path.realpath(__file__)) + "/test_files/"
class TestTexts(unittest.TestCase):
"""
Checks string normalizations.
"""
def test_roman_numbers(self):
"""
Checks roman numbers replacement.
@ -52,6 +57,10 @@ class TestTexts(unittest.TestCase):
)
def test_roman_numbers_in_text(self):
"""
Checks conversion of roman numbers to arabic ones in string
normalization.
"""
self.assertEqual(
"dans le 15e arrondissement",
tools.normalize_string("Dans le XVe arrondissement")
@ -75,7 +84,11 @@ class TestTexts(unittest.TestCase):
tools.normalize_string(u"éèêàüï")
)
class TestPhoneNumbers(unittest.TestCase):
"""
Checks phone numbers normalizations.
"""
def test_prefix(self):
"""
Checks phone numbers with international prefixes.
@ -103,16 +116,23 @@ class TestPhoneNumbers(unittest.TestCase):
duplicates.homogeneize_phone_number("06 05 04 03 02")
)
class TestDuplicates(unittest.TestCase):
DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 14
DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15
IMAGE_CACHE = ImageCache()
def generate_fake_flat(self):
class TestDuplicates(unittest.TestCase):
"""
Checks duplicates detection.
"""
DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 14 # pylint: disable=invalid-name
DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15 # pylint: disable=invalid-name
IMAGE_CACHE = ImageCache() # pylint: disable=invalid-name
@staticmethod
def generate_fake_flat():
"""
Generates a fake flat post.
"""
backend = BACKENDS_BY_PRECEDENCE[random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)]
backend = BACKENDS_BY_PRECEDENCE[
random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)
]
return {
"id": str(random.randint(100000, 199999)) + "@" + backend,
"phone": "0607080910",
@ -123,7 +143,8 @@ class TestDuplicates(unittest.TestCase):
"bedrooms": random.randint(1, 4)
}
def load_files(self, file1, file2):
@staticmethod
def load_files(file1, file2):
"""
Load two files
@ -143,8 +164,12 @@ class TestDuplicates(unittest.TestCase):
"""
flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1)
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_different_prices(self):
"""
@ -154,8 +179,12 @@ class TestDuplicates(unittest.TestCase):
flat2 = copy.deepcopy(flat1)
flat2["cost"] += 1000
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_different_rooms(self):
"""
@ -166,8 +195,12 @@ class TestDuplicates(unittest.TestCase):
flat2 = copy.deepcopy(flat1)
flat2["rooms"] += 1
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_different_areas(self):
"""
@ -177,8 +210,12 @@ class TestDuplicates(unittest.TestCase):
flat2 = copy.deepcopy(flat1)
flat2["area"] += 10
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_different_areas_decimals(self):
"""
@ -190,45 +227,63 @@ class TestDuplicates(unittest.TestCase):
flat1["area"] = 50.65
flat2["area"] = 50.37
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_different_phones(self):
"""
Two flats with different phone numbers should not be detected as duplicates.
Two flats with different phone numbers should not be detected as
duplicates.
"""
flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1)
flat2["phone"] = "0708091011"
score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE)
self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
score = duplicates.get_duplicate_score(
flat1, flat2, TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS
)
def test_real_duplicates(self):
"""
Two flats with same price, area and rooms quantity should be detected as
duplicates.
Two flats with same price, area and rooms quantity should be detected
as duplicates.
"""
flats = self.load_files(
"127028739@seloger",
"14428129@explorimmo"
)
score = duplicates.get_duplicate_score(flats[0], flats[1], TestDuplicates.IMAGE_CACHE)
self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS)
score = duplicates.get_duplicate_score(
flats[0], flats[1], TestDuplicates.IMAGE_CACHE
)
self.assertTrue(
score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS
)
def run(config):
def run():
"""
Run all the tests
:param config: A config dict.
"""
LOGGER.info("Running tests…")
try:
suite = unittest.TestLoader().loadTestsFromTestCase(TestTexts)
unittest.TextTestRunner(verbosity=2).run(suite)
result = unittest.TextTestRunner(verbosity=2).run(suite)
assert result.wasSuccessful()
suite = unittest.TestLoader().loadTestsFromTestCase(TestPhoneNumbers)
unittest.TextTestRunner(verbosity=2).run(suite)
result = unittest.TextTestRunner(verbosity=2).run(suite)
assert result.wasSuccessful()
suite = unittest.TestLoader().loadTestsFromTestCase(TestDuplicates)
unittest.TextTestRunner(verbosity=2).run(suite)
result = unittest.TextTestRunner(verbosity=2).run(suite)
assert result.wasSuccessful()
except AssertionError:
sys.exit(1)

View File

@ -24,14 +24,22 @@ LOGGER = logging.getLogger(__name__)
# Constants
NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys"
class RomanNumbers():
class RomanNumbers(object):
"""
Utilities to check and convert roman numbers.
Part of the convertions are based on
Part of the conversions is based on
https://gist.github.com/riverrun/ac91218bb1678b857c12
"""
@staticmethod
def check_valid(roman):
"""
Check whether a roman literal is a valid roman literal.
def check_valid(self, roman):
:param roman: A roman literal, as string.
:returns: ``True`` if it is a valid roman literal, ``False`` otherwise.
"""
if not re.match('^[MDCLXVI]+$', roman):
return False
@ -42,25 +50,57 @@ class RomanNumbers():
# TODO: check M does not appear after any other, etc.
return True
def convert_to_arabic(self, roman):
if not self.check_valid(roman):
@staticmethod
def convert_to_arabic(roman):
"""
Convert a roman literal to arabic one.
:param roman: A roman number, as string.
:returns: The corresponding arabic one, as string.
"""
if not RomanNumbers.check_valid(roman):
return roman
keys = ['IV', 'IX', 'XL', 'XC', 'CD', 'CM', 'I', 'V', 'X', 'L', 'C', 'D', 'M']
to_arabic = {'IV': '4', 'IX': '9', 'XL': '40', 'XC': '90', 'CD': '400', 'CM': '900',
'I': '1', 'V': '5', 'X': '10', 'L': '50', 'C': '100', 'D': '500', 'M': '1000'}
keys = [
'IV', 'IX', 'XL', 'XC', 'CD', 'CM', 'I', 'V',
'X', 'L', 'C', 'D', 'M'
]
to_arabic = {
'IV': '4',
'IX': '9',
'XL': '40',
'XC': '90',
'CD': '400',
'CM': '900',
'I': '1',
'V': '5',
'X': '10',
'L': '50',
'C': '100',
'D': '500',
'M': '1000'
}
for key in keys:
if key in roman:
roman = roman.replace(key, ' {}'.format(to_arabic.get(key)))
return str(sum(int(num) for num in roman.split()))
def convert_to_arabic_in_text(self, text):
@staticmethod
def convert_to_arabic_in_text(text):
"""
Convert roman literals to arabic one in a text.
:param text: Some text to convert roman literals from.
:returns: The corresponding text with roman literals converted to
arabic.
"""
return re.sub(
'(?<![\S])+([MDCLXVI]+)(?=[eè\s$])',
lambda matchobj: self.convert_to_arabic(matchobj.group(0)),
r'(?<![\S])+([MDCLXVI]+)(?=[eè\s$])',
lambda matchobj: RomanNumbers.convert_to_arabic(matchobj.group(0)),
text
)
def hash_dict(func):
"""
Decorator to use on functions accepting dict parameters, to transform them
@ -190,7 +230,6 @@ def normalize_string(string):
>>> normalize_string("tétéà 14ème-XIV, foobar")
'tetea 14eme xiv, foobar'
"""
# TODO: Convert romanian numerals to decimal
# ASCIIfy the string
string = unidecode.unidecode(string)

View File

@ -8,7 +8,6 @@ from __future__ import (
import functools
import json
import logging
import os
import bottle

View File

@ -21,7 +21,7 @@ from flatisfy.models.postal_code import PostalCode
FILTER_RE = re.compile(r"filter\[([A-z0-9_]+)\]")
def JSONError(error_code, error_str):
def JSONError(error_code, error_str): # pylint: disable=invalid-name
"""
Return an HTTP error with a JSON payload.