Browse Source

Add INSEE filtering

Gautier P 10 months ago
parent
commit
9f328259a7

+ 11
- 1
flatisfy/config.py View File

@@ -32,6 +32,7 @@ DEFAULT_CONFIG = {
32 32
             "house_types": [],  # List of house types, must be in APART, HOUSE,
33 33
             # PARKING, LAND, OTHER or UNKNOWN
34 34
             "postal_codes": [],  # List of postal codes
35
+            "insees": [],  # List of postal codes
35 36
             "area": (None, None),  # (min, max) in m^2
36 37
             "cost": (None, None),  # (min, max) in currency unit
37 38
             "rooms": (None, None),  # (min, max)
@@ -202,13 +203,22 @@ def validate_config(config, check_with_data):
202 203
             assert "postal_codes" in constraint
203 204
             assert constraint["postal_codes"]
204 205
             assert all(isinstance(x, str) for x in constraint["postal_codes"])
206
+            if "insee_codes" in constraint:
207
+                assert constraint["insee_codes"]
208
+                assert all(isinstance(x, str) for x in constraint["insee_codes"])
209
+
205 210
             if check_with_data:
206 211
                 # Ensure data is built into db
207 212
                 data.preprocess_data(config, force=False)
208 213
                 # Check postal codes
209
-                opendata_postal_codes = [x.postal_code for x in data.load_data(PostalCode, constraint, config)]
214
+                opendata = data.load_data(PostalCode, constraint, config)
215
+                opendata_postal_codes = [x.postal_code for x in opendata]
216
+                opendata_insee_codes = [x.insee_code for x in opendata]
210 217
                 for postal_code in constraint["postal_codes"]:
211 218
                     assert postal_code in opendata_postal_codes  # noqa: E501
219
+                if "insee_codes" in constraint:
220
+                    for insee in constraint["insee_codes"]:
221
+                        assert insee in opendata_insee_codes  # noqa: E501
212 222
 
213 223
             assert "area" in constraint
214 224
             _check_constraints_bounds(constraint["area"])

+ 2
- 1
flatisfy/data_files/__init__.py View File

@@ -151,7 +151,7 @@ def _preprocess_laposte():
151 151
         try:
152 152
             area = french_postal_codes_to_quarter(fields["code_postal"])
153 153
             if area is None:
154
-                LOGGER.info(
154
+                LOGGER.debug(
155 155
                     "No matching area found for postal code %s, skipping it.",
156 156
                     fields["code_postal"],
157 157
                 )
@@ -167,6 +167,7 @@ def _preprocess_laposte():
167 167
                 PostalCode(
168 168
                     area=area,
169 169
                     postal_code=fields["code_postal"],
170
+                    insee_code=fields["code_commune_insee"],
170 171
                     name=name,
171 172
                     lat=fields["coordonnees_gps"][0],
172 173
                     lng=fields["coordonnees_gps"][1],

+ 16
- 1
flatisfy/filters/__init__.py View File

@@ -37,7 +37,22 @@ def refine_with_housing_criteria(flats_list, constraint):
37 37
         # Check postal code
38 38
         postal_code = flat["flatisfy"].get("postal_code", None)
39 39
         if postal_code and postal_code not in constraint["postal_codes"]:
40
-            LOGGER.info("Postal code %s for flat %s is out of range.", postal_code, flat["id"])
40
+            LOGGER.info(
41
+                "Postal code %s for flat %s is out of range (%s).",
42
+                postal_code,
43
+                flat["id"],
44
+                ", ".join(constraint["postal_codes"]),
45
+            )
46
+            is_ok[i] = is_ok[i] and False
47
+        # Check insee code
48
+        insee_code = flat["flatisfy"].get("insee_code", None)
49
+        if insee_code and "insee_codes" in constraint and insee_code not in constraint["insee_codes"]:
50
+            LOGGER.info(
51
+                "insee code %s for flat %s is out of range (%s).",
52
+                insee_code,
53
+                flat["id"],
54
+                ", ".join(constraint["insee_codes"]),
55
+            )
41 56
             is_ok[i] = is_ok[i] and False
42 57
 
43 58
         # Check time_to

+ 23
- 8
flatisfy/filters/metadata.py View File

@@ -88,8 +88,8 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
88 88
         [('denfert rochereau', 100), ('saint-jacques', 76)]
89 89
     """
90 90
     # TODO: Is there a better confidence measure?
91
-    normalized_query = tools.normalize_string(query)
92
-    normalized_choices = [tools.normalize_string(choice) for choice in choices]
91
+    normalized_query = tools.normalize_string(query).replace("saint", "st")
92
+    normalized_choices = [tools.normalize_string(choice).replace("saint", "st") for choice in choices]
93 93
 
94 94
     # Remove duplicates in the choices list
95 95
     unique_normalized_choices = tools.uniqify(normalized_choices)
@@ -116,10 +116,11 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
116 116
     return matches
117 117
 
118 118
 
119
-def guess_location_position(location, cities, constraint):
119
+def guess_location_position(location, cities, constraint, must_match):
120 120
     # try to find a city
121 121
     # Find all fuzzy-matching cities
122 122
     postal_code = None
123
+    insee_code = None
123 124
     position = None
124 125
 
125 126
     matched_cities = fuzzy_match(location, [x.name for x in cities], limit=None)
@@ -128,6 +129,7 @@ def guess_location_position(location, cities, constraint):
128 129
         matched_postal_codes = []
129 130
         for matched_city_name, _ in matched_cities:
130 131
             postal_code_objects_for_city = [x for x in cities if x.name == matched_city_name]
132
+            insee_code = [pc.insee_code for pc in postal_code_objects_for_city][0]
131 133
             matched_postal_codes.extend(pc.postal_code for pc in postal_code_objects_for_city)
132 134
         # Try to match them with postal codes in config constraint
133 135
         matched_postal_codes_in_config = set(matched_postal_codes) & set(constraint["postal_codes"])
@@ -154,7 +156,15 @@ def guess_location_position(location, cities, constraint):
154 156
                 LOGGER.debug(("Found position %s using city %s."), position, matched_city_name)
155 157
                 break
156 158
 
157
-    return (postal_code, position)
159
+    if not postal_code and must_match:
160
+        postal_code = cities[0].postal_code
161
+        position = {
162
+            "lat": cities[0].lat,
163
+            "lng": cities[0].lng,
164
+        }
165
+        insee_code = cities[0].insee_code
166
+
167
+    return (postal_code, insee_code, position)
158 168
 
159 169
 
160 170
 def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
@@ -189,6 +199,7 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
189 199
             continue
190 200
 
191 201
         postal_code = None
202
+        insee_code = None
192 203
         position = None
193 204
 
194 205
         # Try to find a postal code directly
@@ -209,11 +220,12 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
209 220
             postal_code = None
210 221
 
211 222
         # Then fetch position (and postal_code is couldn't be found earlier)
223
+        cities = opendata["postal_codes"]
212 224
         if postal_code:
213
-            cities = [x for x in opendata["postal_codes"] if x.postal_code == postal_code]
214
-            (_, position) = guess_location_position(location, cities, constraint)
215
-        else:
216
-            (postal_code, position) = guess_location_position(location, opendata["postal_codes"], constraint)
225
+            cities = [x for x in cities if x.postal_code == postal_code]
226
+        (postal_code, insee_code, position) = guess_location_position(
227
+            location, cities, constraint, postal_code is not None
228
+        )
217 229
 
218 230
         # Check that postal code is not too far from the ones listed in config,
219 231
         # limit bad fuzzy matching
@@ -257,6 +269,9 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
257 269
         else:
258 270
             LOGGER.info("No postal code found for flat %s.", flat["id"])
259 271
 
272
+        if insee_code:
273
+            flat["flatisfy"]["insee_code"] = insee_code
274
+
260 275
         if position:
261 276
             flat["flatisfy"]["position"] = position
262 277
 

+ 1
- 0
flatisfy/models/postal_code.py View File

@@ -27,6 +27,7 @@ class PostalCode(BASE):
27 27
     # following ISO 3166-2.
28 28
     area = Column(String, index=True)
29 29
     postal_code = Column(String, index=True)
30
+    insee_code = Column(String, index=True)
30 31
     name = Column(String, index=True)
31 32
     lat = Column(Float)
32 33
     lng = Column(Float)

+ 24
- 0
migrations/versions/9e58c66f1ac1_add_flat_insee_column.py View File

@@ -0,0 +1,24 @@
1
+"""Add flat INSEE column
2
+
3
+Revision ID: 9e58c66f1ac1
4
+Revises: d21933db9ad8
5
+Create Date: 2021-02-08 16:31:18.961186
6
+
7
+"""
8
+from alembic import op
9
+import sqlalchemy as sa
10
+
11
+
12
+# revision identifiers, used by Alembic.
13
+revision = "9e58c66f1ac1"
14
+down_revision = "d21933db9ad8"
15
+branch_labels = None
16
+depends_on = None
17
+
18
+
19
+def upgrade():
20
+    op.add_column("postal_codes", sa.Column("insee_code", sa.String()))
21
+
22
+
23
+def downgrade():
24
+    op.drop_column("postal_codes", "insee_code")