Avoid too broad ignoring of flats when looking for only some postal codes in a city covered by multiple postal codes. Fix #110.

This commit is contained in:
Lucas Verney 2018-01-10 20:01:22 +01:00
parent 45c4eca775
commit 46457b014a

View File

@ -54,7 +54,8 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
:param query: The string to match. :param query: The string to match.
:param choices: The list of strings to match with. :param choices: The list of strings to match with.
:param limit: The maximum number of items to return. :param limit: The maximum number of items to return. Set to ``None`` to
return all values above threshold.
:param threshold: The score threshold to use. :param threshold: The score threshold to use.
:return: Tuples of matching items and associated confidence. :return: Tuples of matching items and associated confidence.
@ -102,7 +103,9 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
], ],
key=lambda x: x[1], key=lambda x: x[1],
reverse=True reverse=True
)[:limit] )
if limit:
matches = matches[:limit]
# Update confidence # Update confidence
if matches: if matches:
@ -173,20 +176,38 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
postal_code = None postal_code = None
# If not found, try to find a city # If not found, try to find a city
cities = {x.name: x for x in opendata["postal_codes"]}
if not postal_code: if not postal_code:
matched_city = fuzzy_match( # Find all fuzzy-matching cities
matched_cities = fuzzy_match(
location, location,
cities.keys(), [x.name for x in opendata["postal_codes"]],
limit=1 limit=None
) )
if matched_city: if matched_cities:
# Store the matching postal code # Find associated postal codes
matched_city = matched_city[0] matched_postal_codes = []
matched_city_name = matched_city[0] for matched_city_name, _ in matched_cities:
postal_code = ( postal_code_objects_for_city = [
cities[matched_city_name].postal_code x for x in opendata["postal_codes"]
if x.name == matched_city_name
]
matched_postal_codes.extend(
pc.postal_code
for pc in postal_code_objects_for_city
) )
# Try to match them with postal codes in config constraint
matched_postal_codes_in_config = (
set(matched_postal_codes) & set(constraint["postal_codes"])
)
if matched_postal_codes_in_config:
# If there are some matched postal codes which are also in
# config, use them preferentially. This avoid ignoring
# incorrectly some flats in cities with multiple postal
# codes, see #110.
postal_code = next(iter(matched_postal_codes_in_config))
else:
# Otherwise, simply take any matched postal code.
postal_code = matched_postal_codes[0]
LOGGER.info( LOGGER.info(
("Found postal code in location field through city lookup " ("Found postal code in location field through city lookup "
"for flat %s: %s."), "for flat %s: %s."),