diff --git a/doc/flatisfy.rst b/doc/flatisfy.rst index 88821ae..2ed4307 100644 --- a/doc/flatisfy.rst +++ b/doc/flatisfy.rst @@ -71,6 +71,14 @@ flatisfy.fetch module :undoc-members: :show-inheritance: +flatisfy.tests module +--------------------- + +.. automodule:: flatisfy.tests + :members: + :undoc-members: + :show-inheritance: + flatisfy.tools module --------------------- diff --git a/flatisfy/__main__.py b/flatisfy/__main__.py index 5c3bced..f80c4e9 100644 --- a/flatisfy/__main__.py +++ b/flatisfy/__main__.py @@ -219,7 +219,7 @@ def main(): return # Tests command elif args.cmd == "test": - tests.run(config) + tests.run() return diff --git a/flatisfy/data_files/__init__.py b/flatisfy/data_files/__init__.py index 66ccc36..f85d25b 100644 --- a/flatisfy/data_files/__init__.py +++ b/flatisfy/data_files/__init__.py @@ -3,21 +3,20 @@ Preprocessing functions to convert input opendata files into SQLAlchemy objects ready to be stored in the database. """ -import csv import io import json import logging import os import sys -if sys.version_info >= (3,0): +from flatisfy.models.postal_code import PostalCode +from flatisfy.models.public_transport import PublicTransport + +if sys.version_info >= (3, 0): import csv else: from backports import csv -from flatisfy.models.postal_code import PostalCode -from flatisfy.models.public_transport import PublicTransport - LOGGER = logging.getLogger(__name__) MODULE_DIR = os.path.dirname(os.path.realpath(__file__)) diff --git a/flatisfy/filters/duplicates.py b/flatisfy/filters/duplicates.py index b4a7467..4e98ad1 100644 --- a/flatisfy/filters/duplicates.py +++ b/flatisfy/filters/duplicates.py @@ -167,7 +167,17 @@ def detect(flats_list, key="id", merge=True, should_intersect=False): return unique_flats_list, duplicate_flats + def get_duplicate_score(flat1, flat2, photo_cache): + """ + Compute the duplicate score between two flats. The higher the score, the + more likely the two flats to be duplicates. + + :param flat1: First flat dict. + :param flat2: Second flat dict. + :param photo_cache: An instance of ``ImageCache`` to use to cache images. + :return: The duplicate score as ``int``. + """ n_common_items = 0 try: # They should have the same area, up to one unit @@ -265,6 +275,7 @@ def get_duplicate_score(flat1, flat2, photo_cache): return n_common_items + def deep_detect(flats_list, config): """ Deeper detection of duplicates based on any available data. diff --git a/flatisfy/models/flat.py b/flatisfy/models/flat.py index 340e7a6..9d2285b 100644 --- a/flatisfy/models/flat.py +++ b/flatisfy/models/flat.py @@ -5,8 +5,8 @@ This modules defines an SQLAlchemy ORM model for a flat. # pylint: disable=locally-disabled,invalid-name,too-few-public-methods from __future__ import absolute_import, print_function, unicode_literals -import enum import logging +import enum import arrow diff --git a/flatisfy/tests.py b/flatisfy/tests.py index 42554a0..76e8e27 100644 --- a/flatisfy/tests.py +++ b/flatisfy/tests.py @@ -2,13 +2,14 @@ """ This module contains unit testing functions. """ - -import random -import logging -import unittest import copy -import os import json +import logging +import os +import random +import sys +import unittest + from flatisfy import tools from flatisfy.filters import duplicates from flatisfy.filters.cache import ImageCache @@ -17,7 +18,11 @@ from flatisfy.constants import BACKENDS_BY_PRECEDENCE LOGGER = logging.getLogger(__name__) TESTS_DATA_DIR = os.path.dirname(os.path.realpath(__file__)) + "/test_files/" + class TestTexts(unittest.TestCase): + """ + Checks string normalizations. + """ def test_roman_numbers(self): """ Checks roman numbers replacement. @@ -52,6 +57,10 @@ class TestTexts(unittest.TestCase): ) def test_roman_numbers_in_text(self): + """ + Checks conversion of roman numbers to arabic ones in string + normalization. + """ self.assertEqual( "dans le 15e arrondissement", tools.normalize_string("Dans le XVe arrondissement") @@ -75,7 +84,11 @@ class TestTexts(unittest.TestCase): tools.normalize_string(u"éèêàüï") ) + class TestPhoneNumbers(unittest.TestCase): + """ + Checks phone numbers normalizations. + """ def test_prefix(self): """ Checks phone numbers with international prefixes. @@ -103,16 +116,23 @@ class TestPhoneNumbers(unittest.TestCase): duplicates.homogeneize_phone_number("06 05 04 03 02") ) -class TestDuplicates(unittest.TestCase): - DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 14 - DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15 - IMAGE_CACHE = ImageCache() - def generate_fake_flat(self): +class TestDuplicates(unittest.TestCase): + """ + Checks duplicates detection. + """ + DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 14 # pylint: disable=invalid-name + DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15 # pylint: disable=invalid-name + IMAGE_CACHE = ImageCache() # pylint: disable=invalid-name + + @staticmethod + def generate_fake_flat(): """ Generates a fake flat post. """ - backend = BACKENDS_BY_PRECEDENCE[random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)] + backend = BACKENDS_BY_PRECEDENCE[ + random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1) + ] return { "id": str(random.randint(100000, 199999)) + "@" + backend, "phone": "0607080910", @@ -123,7 +143,8 @@ class TestDuplicates(unittest.TestCase): "bedrooms": random.randint(1, 4) } - def load_files(self, file1, file2): + @staticmethod + def load_files(file1, file2): """ Load two files @@ -143,8 +164,12 @@ class TestDuplicates(unittest.TestCase): """ flat1 = self.generate_fake_flat() flat2 = copy.deepcopy(flat1) - score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) - self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) + score = duplicates.get_duplicate_score( + flat1, flat2, TestDuplicates.IMAGE_CACHE + ) + self.assertTrue( + score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS + ) def test_different_prices(self): """ @@ -154,8 +179,12 @@ class TestDuplicates(unittest.TestCase): flat2 = copy.deepcopy(flat1) flat2["cost"] += 1000 - score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) - self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) + score = duplicates.get_duplicate_score( + flat1, flat2, TestDuplicates.IMAGE_CACHE + ) + self.assertTrue( + score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS + ) def test_different_rooms(self): """ @@ -166,8 +195,12 @@ class TestDuplicates(unittest.TestCase): flat2 = copy.deepcopy(flat1) flat2["rooms"] += 1 - score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) - self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) + score = duplicates.get_duplicate_score( + flat1, flat2, TestDuplicates.IMAGE_CACHE + ) + self.assertTrue( + score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS + ) def test_different_areas(self): """ @@ -177,8 +210,12 @@ class TestDuplicates(unittest.TestCase): flat2 = copy.deepcopy(flat1) flat2["area"] += 10 - score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) - self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) + score = duplicates.get_duplicate_score( + flat1, flat2, TestDuplicates.IMAGE_CACHE + ) + self.assertTrue( + score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS + ) def test_different_areas_decimals(self): """ @@ -190,45 +227,63 @@ class TestDuplicates(unittest.TestCase): flat1["area"] = 50.65 flat2["area"] = 50.37 - score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) - self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) + score = duplicates.get_duplicate_score( + flat1, flat2, TestDuplicates.IMAGE_CACHE + ) + self.assertTrue( + score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS + ) def test_different_phones(self): """ - Two flats with different phone numbers should not be detected as duplicates. + Two flats with different phone numbers should not be detected as + duplicates. """ flat1 = self.generate_fake_flat() flat2 = copy.deepcopy(flat1) flat2["phone"] = "0708091011" - score = duplicates.get_duplicate_score(flat1, flat2, TestDuplicates.IMAGE_CACHE) - self.assertTrue(score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) + score = duplicates.get_duplicate_score( + flat1, flat2, TestDuplicates.IMAGE_CACHE + ) + self.assertTrue( + score < TestDuplicates.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS + ) def test_real_duplicates(self): """ - Two flats with same price, area and rooms quantity should be detected as - duplicates. + Two flats with same price, area and rooms quantity should be detected + as duplicates. """ flats = self.load_files( "127028739@seloger", "14428129@explorimmo" ) - score = duplicates.get_duplicate_score(flats[0], flats[1], TestDuplicates.IMAGE_CACHE) - self.assertTrue(score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS) + score = duplicates.get_duplicate_score( + flats[0], flats[1], TestDuplicates.IMAGE_CACHE + ) + self.assertTrue( + score >= TestDuplicates.DUPLICATES_MIN_SCORE_WITH_PHOTOS + ) -def run(config): + +def run(): """ Run all the tests - - :param config: A config dict. """ LOGGER.info("Running tests…") - suite = unittest.TestLoader().loadTestsFromTestCase(TestTexts) - unittest.TextTestRunner(verbosity=2).run(suite) + try: + suite = unittest.TestLoader().loadTestsFromTestCase(TestTexts) + result = unittest.TextTestRunner(verbosity=2).run(suite) + assert result.wasSuccessful() - suite = unittest.TestLoader().loadTestsFromTestCase(TestPhoneNumbers) - unittest.TextTestRunner(verbosity=2).run(suite) + suite = unittest.TestLoader().loadTestsFromTestCase(TestPhoneNumbers) + result = unittest.TextTestRunner(verbosity=2).run(suite) + assert result.wasSuccessful() - suite = unittest.TestLoader().loadTestsFromTestCase(TestDuplicates) - unittest.TextTestRunner(verbosity=2).run(suite) + suite = unittest.TestLoader().loadTestsFromTestCase(TestDuplicates) + result = unittest.TextTestRunner(verbosity=2).run(suite) + assert result.wasSuccessful() + except AssertionError: + sys.exit(1) diff --git a/flatisfy/tools.py b/flatisfy/tools.py index c33ceaa..266bd78 100644 --- a/flatisfy/tools.py +++ b/flatisfy/tools.py @@ -24,14 +24,22 @@ LOGGER = logging.getLogger(__name__) # Constants NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys" -class RomanNumbers(): + +class RomanNumbers(object): """ Utilities to check and convert roman numbers. - Part of the convertions are based on + + Part of the conversions is based on https://gist.github.com/riverrun/ac91218bb1678b857c12 """ - - def check_valid(self, roman): + @staticmethod + def check_valid(roman): + """ + Check whether a roman literal is a valid roman literal. + + :param roman: A roman literal, as string. + :returns: ``True`` if it is a valid roman literal, ``False`` otherwise. + """ if not re.match('^[MDCLXVI]+$', roman): return False @@ -42,25 +50,57 @@ class RomanNumbers(): # TODO: check M does not appear after any other, etc. return True - def convert_to_arabic(self, roman): - if not self.check_valid(roman): + @staticmethod + def convert_to_arabic(roman): + """ + Convert a roman literal to arabic one. + + :param roman: A roman number, as string. + :returns: The corresponding arabic one, as string. + """ + if not RomanNumbers.check_valid(roman): return roman - keys = ['IV', 'IX', 'XL', 'XC', 'CD', 'CM', 'I', 'V', 'X', 'L', 'C', 'D', 'M'] - to_arabic = {'IV': '4', 'IX': '9', 'XL': '40', 'XC': '90', 'CD': '400', 'CM': '900', - 'I': '1', 'V': '5', 'X': '10', 'L': '50', 'C': '100', 'D': '500', 'M': '1000'} + keys = [ + 'IV', 'IX', 'XL', 'XC', 'CD', 'CM', 'I', 'V', + 'X', 'L', 'C', 'D', 'M' + ] + to_arabic = { + 'IV': '4', + 'IX': '9', + 'XL': '40', + 'XC': '90', + 'CD': '400', + 'CM': '900', + 'I': '1', + 'V': '5', + 'X': '10', + 'L': '50', + 'C': '100', + 'D': '500', + 'M': '1000' + } for key in keys: if key in roman: roman = roman.replace(key, ' {}'.format(to_arabic.get(key))) return str(sum(int(num) for num in roman.split())) - def convert_to_arabic_in_text(self, text): + @staticmethod + def convert_to_arabic_in_text(text): + """ + Convert roman literals to arabic one in a text. + + :param text: Some text to convert roman literals from. + :returns: The corresponding text with roman literals converted to + arabic. + """ return re.sub( - '(?>> normalize_string("tétéà 14ème-XIV, foobar") 'tetea 14eme xiv, foobar' """ - # TODO: Convert romanian numerals to decimal # ASCIIfy the string string = unidecode.unidecode(string) diff --git a/flatisfy/web/app.py b/flatisfy/web/app.py index 581174e..fa25725 100644 --- a/flatisfy/web/app.py +++ b/flatisfy/web/app.py @@ -8,7 +8,6 @@ from __future__ import ( import functools import json -import logging import os import bottle diff --git a/flatisfy/web/routes/api.py b/flatisfy/web/routes/api.py index 5086ad2..d107cd5 100644 --- a/flatisfy/web/routes/api.py +++ b/flatisfy/web/routes/api.py @@ -21,7 +21,7 @@ from flatisfy.models.postal_code import PostalCode FILTER_RE = re.compile(r"filter\[([A-z0-9_]+)\]") -def JSONError(error_code, error_str): +def JSONError(error_code, error_str): # pylint: disable=invalid-name """ Return an HTTP error with a JSON payload.