Add INSEE filtering
This commit is contained in:
parent
b3e316cf5b
commit
9f328259a7
@ -32,6 +32,7 @@ DEFAULT_CONFIG = {
|
|||||||
"house_types": [], # List of house types, must be in APART, HOUSE,
|
"house_types": [], # List of house types, must be in APART, HOUSE,
|
||||||
# PARKING, LAND, OTHER or UNKNOWN
|
# PARKING, LAND, OTHER or UNKNOWN
|
||||||
"postal_codes": [], # List of postal codes
|
"postal_codes": [], # List of postal codes
|
||||||
|
"insees": [], # List of postal codes
|
||||||
"area": (None, None), # (min, max) in m^2
|
"area": (None, None), # (min, max) in m^2
|
||||||
"cost": (None, None), # (min, max) in currency unit
|
"cost": (None, None), # (min, max) in currency unit
|
||||||
"rooms": (None, None), # (min, max)
|
"rooms": (None, None), # (min, max)
|
||||||
@ -202,13 +203,22 @@ def validate_config(config, check_with_data):
|
|||||||
assert "postal_codes" in constraint
|
assert "postal_codes" in constraint
|
||||||
assert constraint["postal_codes"]
|
assert constraint["postal_codes"]
|
||||||
assert all(isinstance(x, str) for x in constraint["postal_codes"])
|
assert all(isinstance(x, str) for x in constraint["postal_codes"])
|
||||||
|
if "insee_codes" in constraint:
|
||||||
|
assert constraint["insee_codes"]
|
||||||
|
assert all(isinstance(x, str) for x in constraint["insee_codes"])
|
||||||
|
|
||||||
if check_with_data:
|
if check_with_data:
|
||||||
# Ensure data is built into db
|
# Ensure data is built into db
|
||||||
data.preprocess_data(config, force=False)
|
data.preprocess_data(config, force=False)
|
||||||
# Check postal codes
|
# Check postal codes
|
||||||
opendata_postal_codes = [x.postal_code for x in data.load_data(PostalCode, constraint, config)]
|
opendata = data.load_data(PostalCode, constraint, config)
|
||||||
|
opendata_postal_codes = [x.postal_code for x in opendata]
|
||||||
|
opendata_insee_codes = [x.insee_code for x in opendata]
|
||||||
for postal_code in constraint["postal_codes"]:
|
for postal_code in constraint["postal_codes"]:
|
||||||
assert postal_code in opendata_postal_codes # noqa: E501
|
assert postal_code in opendata_postal_codes # noqa: E501
|
||||||
|
if "insee_codes" in constraint:
|
||||||
|
for insee in constraint["insee_codes"]:
|
||||||
|
assert insee in opendata_insee_codes # noqa: E501
|
||||||
|
|
||||||
assert "area" in constraint
|
assert "area" in constraint
|
||||||
_check_constraints_bounds(constraint["area"])
|
_check_constraints_bounds(constraint["area"])
|
||||||
|
@ -151,7 +151,7 @@ def _preprocess_laposte():
|
|||||||
try:
|
try:
|
||||||
area = french_postal_codes_to_quarter(fields["code_postal"])
|
area = french_postal_codes_to_quarter(fields["code_postal"])
|
||||||
if area is None:
|
if area is None:
|
||||||
LOGGER.info(
|
LOGGER.debug(
|
||||||
"No matching area found for postal code %s, skipping it.",
|
"No matching area found for postal code %s, skipping it.",
|
||||||
fields["code_postal"],
|
fields["code_postal"],
|
||||||
)
|
)
|
||||||
@ -167,6 +167,7 @@ def _preprocess_laposte():
|
|||||||
PostalCode(
|
PostalCode(
|
||||||
area=area,
|
area=area,
|
||||||
postal_code=fields["code_postal"],
|
postal_code=fields["code_postal"],
|
||||||
|
insee_code=fields["code_commune_insee"],
|
||||||
name=name,
|
name=name,
|
||||||
lat=fields["coordonnees_gps"][0],
|
lat=fields["coordonnees_gps"][0],
|
||||||
lng=fields["coordonnees_gps"][1],
|
lng=fields["coordonnees_gps"][1],
|
||||||
|
@ -37,7 +37,22 @@ def refine_with_housing_criteria(flats_list, constraint):
|
|||||||
# Check postal code
|
# Check postal code
|
||||||
postal_code = flat["flatisfy"].get("postal_code", None)
|
postal_code = flat["flatisfy"].get("postal_code", None)
|
||||||
if postal_code and postal_code not in constraint["postal_codes"]:
|
if postal_code and postal_code not in constraint["postal_codes"]:
|
||||||
LOGGER.info("Postal code %s for flat %s is out of range.", postal_code, flat["id"])
|
LOGGER.info(
|
||||||
|
"Postal code %s for flat %s is out of range (%s).",
|
||||||
|
postal_code,
|
||||||
|
flat["id"],
|
||||||
|
", ".join(constraint["postal_codes"]),
|
||||||
|
)
|
||||||
|
is_ok[i] = is_ok[i] and False
|
||||||
|
# Check insee code
|
||||||
|
insee_code = flat["flatisfy"].get("insee_code", None)
|
||||||
|
if insee_code and "insee_codes" in constraint and insee_code not in constraint["insee_codes"]:
|
||||||
|
LOGGER.info(
|
||||||
|
"insee code %s for flat %s is out of range (%s).",
|
||||||
|
insee_code,
|
||||||
|
flat["id"],
|
||||||
|
", ".join(constraint["insee_codes"]),
|
||||||
|
)
|
||||||
is_ok[i] = is_ok[i] and False
|
is_ok[i] = is_ok[i] and False
|
||||||
|
|
||||||
# Check time_to
|
# Check time_to
|
||||||
|
@ -88,8 +88,8 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
|
|||||||
[('denfert rochereau', 100), ('saint-jacques', 76)]
|
[('denfert rochereau', 100), ('saint-jacques', 76)]
|
||||||
"""
|
"""
|
||||||
# TODO: Is there a better confidence measure?
|
# TODO: Is there a better confidence measure?
|
||||||
normalized_query = tools.normalize_string(query)
|
normalized_query = tools.normalize_string(query).replace("saint", "st")
|
||||||
normalized_choices = [tools.normalize_string(choice) for choice in choices]
|
normalized_choices = [tools.normalize_string(choice).replace("saint", "st") for choice in choices]
|
||||||
|
|
||||||
# Remove duplicates in the choices list
|
# Remove duplicates in the choices list
|
||||||
unique_normalized_choices = tools.uniqify(normalized_choices)
|
unique_normalized_choices = tools.uniqify(normalized_choices)
|
||||||
@ -116,10 +116,11 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
|
|||||||
return matches
|
return matches
|
||||||
|
|
||||||
|
|
||||||
def guess_location_position(location, cities, constraint):
|
def guess_location_position(location, cities, constraint, must_match):
|
||||||
# try to find a city
|
# try to find a city
|
||||||
# Find all fuzzy-matching cities
|
# Find all fuzzy-matching cities
|
||||||
postal_code = None
|
postal_code = None
|
||||||
|
insee_code = None
|
||||||
position = None
|
position = None
|
||||||
|
|
||||||
matched_cities = fuzzy_match(location, [x.name for x in cities], limit=None)
|
matched_cities = fuzzy_match(location, [x.name for x in cities], limit=None)
|
||||||
@ -128,6 +129,7 @@ def guess_location_position(location, cities, constraint):
|
|||||||
matched_postal_codes = []
|
matched_postal_codes = []
|
||||||
for matched_city_name, _ in matched_cities:
|
for matched_city_name, _ in matched_cities:
|
||||||
postal_code_objects_for_city = [x for x in cities if x.name == matched_city_name]
|
postal_code_objects_for_city = [x for x in cities if x.name == matched_city_name]
|
||||||
|
insee_code = [pc.insee_code for pc in postal_code_objects_for_city][0]
|
||||||
matched_postal_codes.extend(pc.postal_code for pc in postal_code_objects_for_city)
|
matched_postal_codes.extend(pc.postal_code for pc in postal_code_objects_for_city)
|
||||||
# Try to match them with postal codes in config constraint
|
# Try to match them with postal codes in config constraint
|
||||||
matched_postal_codes_in_config = set(matched_postal_codes) & set(constraint["postal_codes"])
|
matched_postal_codes_in_config = set(matched_postal_codes) & set(constraint["postal_codes"])
|
||||||
@ -154,7 +156,15 @@ def guess_location_position(location, cities, constraint):
|
|||||||
LOGGER.debug(("Found position %s using city %s."), position, matched_city_name)
|
LOGGER.debug(("Found position %s using city %s."), position, matched_city_name)
|
||||||
break
|
break
|
||||||
|
|
||||||
return (postal_code, position)
|
if not postal_code and must_match:
|
||||||
|
postal_code = cities[0].postal_code
|
||||||
|
position = {
|
||||||
|
"lat": cities[0].lat,
|
||||||
|
"lng": cities[0].lng,
|
||||||
|
}
|
||||||
|
insee_code = cities[0].insee_code
|
||||||
|
|
||||||
|
return (postal_code, insee_code, position)
|
||||||
|
|
||||||
|
|
||||||
def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
|
def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
|
||||||
@ -189,6 +199,7 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
postal_code = None
|
postal_code = None
|
||||||
|
insee_code = None
|
||||||
position = None
|
position = None
|
||||||
|
|
||||||
# Try to find a postal code directly
|
# Try to find a postal code directly
|
||||||
@ -209,11 +220,12 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
|
|||||||
postal_code = None
|
postal_code = None
|
||||||
|
|
||||||
# Then fetch position (and postal_code is couldn't be found earlier)
|
# Then fetch position (and postal_code is couldn't be found earlier)
|
||||||
|
cities = opendata["postal_codes"]
|
||||||
if postal_code:
|
if postal_code:
|
||||||
cities = [x for x in opendata["postal_codes"] if x.postal_code == postal_code]
|
cities = [x for x in cities if x.postal_code == postal_code]
|
||||||
(_, position) = guess_location_position(location, cities, constraint)
|
(postal_code, insee_code, position) = guess_location_position(
|
||||||
else:
|
location, cities, constraint, postal_code is not None
|
||||||
(postal_code, position) = guess_location_position(location, opendata["postal_codes"], constraint)
|
)
|
||||||
|
|
||||||
# Check that postal code is not too far from the ones listed in config,
|
# Check that postal code is not too far from the ones listed in config,
|
||||||
# limit bad fuzzy matching
|
# limit bad fuzzy matching
|
||||||
@ -257,6 +269,9 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
|
|||||||
else:
|
else:
|
||||||
LOGGER.info("No postal code found for flat %s.", flat["id"])
|
LOGGER.info("No postal code found for flat %s.", flat["id"])
|
||||||
|
|
||||||
|
if insee_code:
|
||||||
|
flat["flatisfy"]["insee_code"] = insee_code
|
||||||
|
|
||||||
if position:
|
if position:
|
||||||
flat["flatisfy"]["position"] = position
|
flat["flatisfy"]["position"] = position
|
||||||
|
|
||||||
|
@ -27,6 +27,7 @@ class PostalCode(BASE):
|
|||||||
# following ISO 3166-2.
|
# following ISO 3166-2.
|
||||||
area = Column(String, index=True)
|
area = Column(String, index=True)
|
||||||
postal_code = Column(String, index=True)
|
postal_code = Column(String, index=True)
|
||||||
|
insee_code = Column(String, index=True)
|
||||||
name = Column(String, index=True)
|
name = Column(String, index=True)
|
||||||
lat = Column(Float)
|
lat = Column(Float)
|
||||||
lng = Column(Float)
|
lng = Column(Float)
|
||||||
|
24
migrations/versions/9e58c66f1ac1_add_flat_insee_column.py
Normal file
24
migrations/versions/9e58c66f1ac1_add_flat_insee_column.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
"""Add flat INSEE column
|
||||||
|
|
||||||
|
Revision ID: 9e58c66f1ac1
|
||||||
|
Revises: d21933db9ad8
|
||||||
|
Create Date: 2021-02-08 16:31:18.961186
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = "9e58c66f1ac1"
|
||||||
|
down_revision = "d21933db9ad8"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
op.add_column("postal_codes", sa.Column("insee_code", sa.String()))
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
op.drop_column("postal_codes", "insee_code")
|
Loading…
Reference in New Issue
Block a user