Add INSEE filtering
This commit is contained in:
parent
b3e316cf5b
commit
9f328259a7
@ -32,6 +32,7 @@ DEFAULT_CONFIG = {
|
||||
"house_types": [], # List of house types, must be in APART, HOUSE,
|
||||
# PARKING, LAND, OTHER or UNKNOWN
|
||||
"postal_codes": [], # List of postal codes
|
||||
"insees": [], # List of postal codes
|
||||
"area": (None, None), # (min, max) in m^2
|
||||
"cost": (None, None), # (min, max) in currency unit
|
||||
"rooms": (None, None), # (min, max)
|
||||
@ -202,13 +203,22 @@ def validate_config(config, check_with_data):
|
||||
assert "postal_codes" in constraint
|
||||
assert constraint["postal_codes"]
|
||||
assert all(isinstance(x, str) for x in constraint["postal_codes"])
|
||||
if "insee_codes" in constraint:
|
||||
assert constraint["insee_codes"]
|
||||
assert all(isinstance(x, str) for x in constraint["insee_codes"])
|
||||
|
||||
if check_with_data:
|
||||
# Ensure data is built into db
|
||||
data.preprocess_data(config, force=False)
|
||||
# Check postal codes
|
||||
opendata_postal_codes = [x.postal_code for x in data.load_data(PostalCode, constraint, config)]
|
||||
opendata = data.load_data(PostalCode, constraint, config)
|
||||
opendata_postal_codes = [x.postal_code for x in opendata]
|
||||
opendata_insee_codes = [x.insee_code for x in opendata]
|
||||
for postal_code in constraint["postal_codes"]:
|
||||
assert postal_code in opendata_postal_codes # noqa: E501
|
||||
if "insee_codes" in constraint:
|
||||
for insee in constraint["insee_codes"]:
|
||||
assert insee in opendata_insee_codes # noqa: E501
|
||||
|
||||
assert "area" in constraint
|
||||
_check_constraints_bounds(constraint["area"])
|
||||
|
@ -151,7 +151,7 @@ def _preprocess_laposte():
|
||||
try:
|
||||
area = french_postal_codes_to_quarter(fields["code_postal"])
|
||||
if area is None:
|
||||
LOGGER.info(
|
||||
LOGGER.debug(
|
||||
"No matching area found for postal code %s, skipping it.",
|
||||
fields["code_postal"],
|
||||
)
|
||||
@ -167,6 +167,7 @@ def _preprocess_laposte():
|
||||
PostalCode(
|
||||
area=area,
|
||||
postal_code=fields["code_postal"],
|
||||
insee_code=fields["code_commune_insee"],
|
||||
name=name,
|
||||
lat=fields["coordonnees_gps"][0],
|
||||
lng=fields["coordonnees_gps"][1],
|
||||
|
@ -37,7 +37,22 @@ def refine_with_housing_criteria(flats_list, constraint):
|
||||
# Check postal code
|
||||
postal_code = flat["flatisfy"].get("postal_code", None)
|
||||
if postal_code and postal_code not in constraint["postal_codes"]:
|
||||
LOGGER.info("Postal code %s for flat %s is out of range.", postal_code, flat["id"])
|
||||
LOGGER.info(
|
||||
"Postal code %s for flat %s is out of range (%s).",
|
||||
postal_code,
|
||||
flat["id"],
|
||||
", ".join(constraint["postal_codes"]),
|
||||
)
|
||||
is_ok[i] = is_ok[i] and False
|
||||
# Check insee code
|
||||
insee_code = flat["flatisfy"].get("insee_code", None)
|
||||
if insee_code and "insee_codes" in constraint and insee_code not in constraint["insee_codes"]:
|
||||
LOGGER.info(
|
||||
"insee code %s for flat %s is out of range (%s).",
|
||||
insee_code,
|
||||
flat["id"],
|
||||
", ".join(constraint["insee_codes"]),
|
||||
)
|
||||
is_ok[i] = is_ok[i] and False
|
||||
|
||||
# Check time_to
|
||||
|
@ -88,8 +88,8 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
|
||||
[('denfert rochereau', 100), ('saint-jacques', 76)]
|
||||
"""
|
||||
# TODO: Is there a better confidence measure?
|
||||
normalized_query = tools.normalize_string(query)
|
||||
normalized_choices = [tools.normalize_string(choice) for choice in choices]
|
||||
normalized_query = tools.normalize_string(query).replace("saint", "st")
|
||||
normalized_choices = [tools.normalize_string(choice).replace("saint", "st") for choice in choices]
|
||||
|
||||
# Remove duplicates in the choices list
|
||||
unique_normalized_choices = tools.uniqify(normalized_choices)
|
||||
@ -116,10 +116,11 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
|
||||
return matches
|
||||
|
||||
|
||||
def guess_location_position(location, cities, constraint):
|
||||
def guess_location_position(location, cities, constraint, must_match):
|
||||
# try to find a city
|
||||
# Find all fuzzy-matching cities
|
||||
postal_code = None
|
||||
insee_code = None
|
||||
position = None
|
||||
|
||||
matched_cities = fuzzy_match(location, [x.name for x in cities], limit=None)
|
||||
@ -128,6 +129,7 @@ def guess_location_position(location, cities, constraint):
|
||||
matched_postal_codes = []
|
||||
for matched_city_name, _ in matched_cities:
|
||||
postal_code_objects_for_city = [x for x in cities if x.name == matched_city_name]
|
||||
insee_code = [pc.insee_code for pc in postal_code_objects_for_city][0]
|
||||
matched_postal_codes.extend(pc.postal_code for pc in postal_code_objects_for_city)
|
||||
# Try to match them with postal codes in config constraint
|
||||
matched_postal_codes_in_config = set(matched_postal_codes) & set(constraint["postal_codes"])
|
||||
@ -154,7 +156,15 @@ def guess_location_position(location, cities, constraint):
|
||||
LOGGER.debug(("Found position %s using city %s."), position, matched_city_name)
|
||||
break
|
||||
|
||||
return (postal_code, position)
|
||||
if not postal_code and must_match:
|
||||
postal_code = cities[0].postal_code
|
||||
position = {
|
||||
"lat": cities[0].lat,
|
||||
"lng": cities[0].lng,
|
||||
}
|
||||
insee_code = cities[0].insee_code
|
||||
|
||||
return (postal_code, insee_code, position)
|
||||
|
||||
|
||||
def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
|
||||
@ -189,6 +199,7 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
|
||||
continue
|
||||
|
||||
postal_code = None
|
||||
insee_code = None
|
||||
position = None
|
||||
|
||||
# Try to find a postal code directly
|
||||
@ -209,11 +220,12 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
|
||||
postal_code = None
|
||||
|
||||
# Then fetch position (and postal_code is couldn't be found earlier)
|
||||
cities = opendata["postal_codes"]
|
||||
if postal_code:
|
||||
cities = [x for x in opendata["postal_codes"] if x.postal_code == postal_code]
|
||||
(_, position) = guess_location_position(location, cities, constraint)
|
||||
else:
|
||||
(postal_code, position) = guess_location_position(location, opendata["postal_codes"], constraint)
|
||||
cities = [x for x in cities if x.postal_code == postal_code]
|
||||
(postal_code, insee_code, position) = guess_location_position(
|
||||
location, cities, constraint, postal_code is not None
|
||||
)
|
||||
|
||||
# Check that postal code is not too far from the ones listed in config,
|
||||
# limit bad fuzzy matching
|
||||
@ -257,6 +269,9 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
|
||||
else:
|
||||
LOGGER.info("No postal code found for flat %s.", flat["id"])
|
||||
|
||||
if insee_code:
|
||||
flat["flatisfy"]["insee_code"] = insee_code
|
||||
|
||||
if position:
|
||||
flat["flatisfy"]["position"] = position
|
||||
|
||||
|
@ -27,6 +27,7 @@ class PostalCode(BASE):
|
||||
# following ISO 3166-2.
|
||||
area = Column(String, index=True)
|
||||
postal_code = Column(String, index=True)
|
||||
insee_code = Column(String, index=True)
|
||||
name = Column(String, index=True)
|
||||
lat = Column(Float)
|
||||
lng = Column(Float)
|
||||
|
24
migrations/versions/9e58c66f1ac1_add_flat_insee_column.py
Normal file
24
migrations/versions/9e58c66f1ac1_add_flat_insee_column.py
Normal file
@ -0,0 +1,24 @@
|
||||
"""Add flat INSEE column
|
||||
|
||||
Revision ID: 9e58c66f1ac1
|
||||
Revises: d21933db9ad8
|
||||
Create Date: 2021-02-08 16:31:18.961186
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "9e58c66f1ac1"
|
||||
down_revision = "d21933db9ad8"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.add_column("postal_codes", sa.Column("insee_code", sa.String()))
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_column("postal_codes", "insee_code")
|
Loading…
Reference in New Issue
Block a user