Avoid too broad ignoring of flats when looking for only some postal codes in a city covered by multiple postal codes. Fix #110.
This commit is contained in:
parent
45c4eca775
commit
46457b014a
@ -54,7 +54,8 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
|
|||||||
|
|
||||||
:param query: The string to match.
|
:param query: The string to match.
|
||||||
:param choices: The list of strings to match with.
|
:param choices: The list of strings to match with.
|
||||||
:param limit: The maximum number of items to return.
|
:param limit: The maximum number of items to return. Set to ``None`` to
|
||||||
|
return all values above threshold.
|
||||||
:param threshold: The score threshold to use.
|
:param threshold: The score threshold to use.
|
||||||
|
|
||||||
:return: Tuples of matching items and associated confidence.
|
:return: Tuples of matching items and associated confidence.
|
||||||
@ -102,7 +103,9 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
|
|||||||
],
|
],
|
||||||
key=lambda x: x[1],
|
key=lambda x: x[1],
|
||||||
reverse=True
|
reverse=True
|
||||||
)[:limit]
|
)
|
||||||
|
if limit:
|
||||||
|
matches = matches[:limit]
|
||||||
|
|
||||||
# Update confidence
|
# Update confidence
|
||||||
if matches:
|
if matches:
|
||||||
@ -173,20 +176,38 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
|
|||||||
postal_code = None
|
postal_code = None
|
||||||
|
|
||||||
# If not found, try to find a city
|
# If not found, try to find a city
|
||||||
cities = {x.name: x for x in opendata["postal_codes"]}
|
|
||||||
if not postal_code:
|
if not postal_code:
|
||||||
matched_city = fuzzy_match(
|
# Find all fuzzy-matching cities
|
||||||
|
matched_cities = fuzzy_match(
|
||||||
location,
|
location,
|
||||||
cities.keys(),
|
[x.name for x in opendata["postal_codes"]],
|
||||||
limit=1
|
limit=None
|
||||||
)
|
)
|
||||||
if matched_city:
|
if matched_cities:
|
||||||
# Store the matching postal code
|
# Find associated postal codes
|
||||||
matched_city = matched_city[0]
|
matched_postal_codes = []
|
||||||
matched_city_name = matched_city[0]
|
for matched_city_name, _ in matched_cities:
|
||||||
postal_code = (
|
postal_code_objects_for_city = [
|
||||||
cities[matched_city_name].postal_code
|
x for x in opendata["postal_codes"]
|
||||||
|
if x.name == matched_city_name
|
||||||
|
]
|
||||||
|
matched_postal_codes.extend(
|
||||||
|
pc.postal_code
|
||||||
|
for pc in postal_code_objects_for_city
|
||||||
)
|
)
|
||||||
|
# Try to match them with postal codes in config constraint
|
||||||
|
matched_postal_codes_in_config = (
|
||||||
|
set(matched_postal_codes) & set(constraint["postal_codes"])
|
||||||
|
)
|
||||||
|
if matched_postal_codes_in_config:
|
||||||
|
# If there are some matched postal codes which are also in
|
||||||
|
# config, use them preferentially. This avoid ignoring
|
||||||
|
# incorrectly some flats in cities with multiple postal
|
||||||
|
# codes, see #110.
|
||||||
|
postal_code = next(iter(matched_postal_codes_in_config))
|
||||||
|
else:
|
||||||
|
# Otherwise, simply take any matched postal code.
|
||||||
|
postal_code = matched_postal_codes[0]
|
||||||
LOGGER.info(
|
LOGGER.info(
|
||||||
("Found postal code in location field through city lookup "
|
("Found postal code in location field through city lookup "
|
||||||
"for flat %s: %s."),
|
"for flat %s: %s."),
|
||||||
|
Loading…
Reference in New Issue
Block a user