From 46457b014a13a8a71c04fee529a8def54ff32db1 Mon Sep 17 00:00:00 2001 From: "Phyks (Lucas Verney)" Date: Wed, 10 Jan 2018 20:01:22 +0100 Subject: [PATCH] Avoid too broad ignoring of flats when looking for only some postal codes in a city covered by multiple postal codes. Fix #110. --- flatisfy/filters/metadata.py | 45 ++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/flatisfy/filters/metadata.py b/flatisfy/filters/metadata.py index c188ce0..2f79199 100644 --- a/flatisfy/filters/metadata.py +++ b/flatisfy/filters/metadata.py @@ -54,7 +54,8 @@ def fuzzy_match(query, choices, limit=3, threshold=75): :param query: The string to match. :param choices: The list of strings to match with. - :param limit: The maximum number of items to return. + :param limit: The maximum number of items to return. Set to ``None`` to + return all values above threshold. :param threshold: The score threshold to use. :return: Tuples of matching items and associated confidence. @@ -102,7 +103,9 @@ def fuzzy_match(query, choices, limit=3, threshold=75): ], key=lambda x: x[1], reverse=True - )[:limit] + ) + if limit: + matches = matches[:limit] # Update confidence if matches: @@ -173,20 +176,38 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000): postal_code = None # If not found, try to find a city - cities = {x.name: x for x in opendata["postal_codes"]} if not postal_code: - matched_city = fuzzy_match( + # Find all fuzzy-matching cities + matched_cities = fuzzy_match( location, - cities.keys(), - limit=1 + [x.name for x in opendata["postal_codes"]], + limit=None ) - if matched_city: - # Store the matching postal code - matched_city = matched_city[0] - matched_city_name = matched_city[0] - postal_code = ( - cities[matched_city_name].postal_code + if matched_cities: + # Find associated postal codes + matched_postal_codes = [] + for matched_city_name, _ in matched_cities: + postal_code_objects_for_city = [ + x for x in opendata["postal_codes"] + if x.name == matched_city_name + ] + matched_postal_codes.extend( + pc.postal_code + for pc in postal_code_objects_for_city + ) + # Try to match them with postal codes in config constraint + matched_postal_codes_in_config = ( + set(matched_postal_codes) & set(constraint["postal_codes"]) ) + if matched_postal_codes_in_config: + # If there are some matched postal codes which are also in + # config, use them preferentially. This avoid ignoring + # incorrectly some flats in cities with multiple postal + # codes, see #110. + postal_code = next(iter(matched_postal_codes_in_config)) + else: + # Otherwise, simply take any matched postal code. + postal_code = matched_postal_codes[0] LOGGER.info( ("Found postal code in location field through city lookup " "for flat %s: %s."),