diff --git a/flatisfy/filters/metadata.py b/flatisfy/filters/metadata.py index 1b26cd8..569ccd0 100644 --- a/flatisfy/filters/metadata.py +++ b/flatisfy/filters/metadata.py @@ -126,6 +126,59 @@ def fuzzy_match(query, choices, limit=3, threshold=75): return matches +def guess_location_position(location, cities, constraint): + # try to find a city + # Find all fuzzy-matching cities + postal_code = None + position = None + + matched_cities = fuzzy_match( + location, + [x.name for x in cities], + limit=None + ) + if matched_cities: + # Find associated postal codes + matched_postal_codes = [] + for matched_city_name, _ in matched_cities: + postal_code_objects_for_city = [ + x for x in cities + if x.name == matched_city_name + ] + matched_postal_codes.extend( + pc.postal_code + for pc in postal_code_objects_for_city + ) + # Try to match them with postal codes in config constraint + matched_postal_codes_in_config = ( + set(matched_postal_codes) & set(constraint["postal_codes"]) + ) + if matched_postal_codes_in_config: + # If there are some matched postal codes which are also in + # config, use them preferentially. This avoid ignoring + # incorrectly some flats in cities with multiple postal + # codes, see #110. + postal_code = next(iter(matched_postal_codes_in_config)) + else: + # Otherwise, simply take any matched postal code. + postal_code = matched_postal_codes[0] + + # take the city position + for matched_city_name, _ in matched_cities: + postal_code_objects_for_city = [ + x for x in cities + if x.name == matched_city_name and x.postal_code == postal_code + ] + if len(postal_code_objects_for_city): + position = {"lat": postal_code_objects_for_city[0].lat, "lng": postal_code_objects_for_city[0].lng} + LOGGER.info( + ("Found position %s using city %s."), + position, matched_city_name + ) + break + + return (postal_code, position) + def guess_postal_code(flats_list, constraint, config, distance_threshold=20000): """ @@ -159,11 +212,13 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000): "code lookup. (%s)" ), flat["id"], - flat["address"] + flat.get("address") ) continue postal_code = None + position = None + # Try to find a postal code directly try: postal_code = re.search(r"[0-9]{5}", location) @@ -171,8 +226,7 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000): postal_code = postal_code.group(0) # Check the postal code is within the db - assert postal_code in [x.postal_code - for x in opendata["postal_codes"]] + assert postal_code in [x.postal_code for x in opendata["postal_codes"]] LOGGER.info( "Found postal code in location field for flat %s: %s.", @@ -181,44 +235,12 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000): except AssertionError: postal_code = None - # If not found, try to find a city - if not postal_code: - # Find all fuzzy-matching cities - matched_cities = fuzzy_match( - location, - [x.name for x in opendata["postal_codes"]], - limit=None - ) - if matched_cities: - # Find associated postal codes - matched_postal_codes = [] - for matched_city_name, _ in matched_cities: - postal_code_objects_for_city = [ - x for x in opendata["postal_codes"] - if x.name == matched_city_name - ] - matched_postal_codes.extend( - pc.postal_code - for pc in postal_code_objects_for_city - ) - # Try to match them with postal codes in config constraint - matched_postal_codes_in_config = ( - set(matched_postal_codes) & set(constraint["postal_codes"]) - ) - if matched_postal_codes_in_config: - # If there are some matched postal codes which are also in - # config, use them preferentially. This avoid ignoring - # incorrectly some flats in cities with multiple postal - # codes, see #110. - postal_code = next(iter(matched_postal_codes_in_config)) - else: - # Otherwise, simply take any matched postal code. - postal_code = matched_postal_codes[0] - LOGGER.info( - ("Found postal code in location field through city lookup " - "for flat %s: %s."), - flat["id"], postal_code - ) + # Then fetch position (and postal_code is couldn't be found earlier) + if postal_code: + cities = [x for x in opendata["postal_codes"] if x.postal_code == postal_code] + (_, position) = guess_location_position(location, cities, constraint) + else: + (postal_code, position) = guess_location_position(location, opendata["postal_codes"], constraint) # Check that postal code is not too far from the ones listed in config, # limit bad fuzzy matching @@ -241,16 +263,19 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000): if distance > distance_threshold: LOGGER.info( - ("Postal code %s found for flat %s is off-constraints " + ("Postal code %s found for flat %s @ %s is off-constraints " "(distance is %dm > %dm). Let's consider it is an " "artifact match and keep the post without this postal " - "code."), + "code. (%s)"), postal_code, flat["id"], + location, int(distance), - int(distance_threshold) + int(distance_threshold), + flat ) postal_code = None + position = None # Store it if postal_code: @@ -264,6 +289,9 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000): else: LOGGER.info("No postal code found for flat %s.", flat["id"]) + if position: + flat["flatisfy"]["position"] = position + return flats_list diff --git a/flatisfy/models/flat.py b/flatisfy/models/flat.py index fe4c0d0..64913e5 100644 --- a/flatisfy/models/flat.py +++ b/flatisfy/models/flat.py @@ -91,6 +91,7 @@ class Flat(BASE): flatisfy_postal_code = Column(String) flatisfy_time_to = Column(MagicJSON) flatisfy_constraint = Column(String) + flatisfy_position = Column(MagicJSON) # Status status = Column(Enum(FlatStatus), default=FlatStatus.new) @@ -183,6 +184,9 @@ class Flat(BASE): flat_dict["flatisfy_postal_code"] = ( flat_dict["flatisfy"].get("postal_code", None) ) + flat_dict["flatisfy_position"] = ( + flat_dict["flatisfy"].get("position", None) + ) flat_dict["flatisfy_time_to"] = ( flat_dict["flatisfy"].get("time_to", {}) ) diff --git a/flatisfy/web/js_src/tools/index.js b/flatisfy/web/js_src/tools/index.js index 38fb58e..ef16047 100644 --- a/flatisfy/web/js_src/tools/index.js +++ b/flatisfy/web/js_src/tools/index.js @@ -1,8 +1,10 @@ export function findFlatGPS (flat) { let gps - // Try to push a marker based on stations - if (flat.flatisfy_stations && flat.flatisfy_stations.length > 0) { + if (flat.flatisfy_position) { + gps = [flat.flatisfy_position.lat, flat.flatisfy_position.lng] + } else if (flat.flatisfy_stations && flat.flatisfy_stations.length > 0) { + // Try to push a marker based on stations gps = [0.0, 0.0] flat.flatisfy_stations.forEach(station => { gps = [gps[0] + station.gps[0], gps[1] + station.gps[1]] diff --git a/migrations/versions/d21933db9ad8_add_flat_position_column.py b/migrations/versions/d21933db9ad8_add_flat_position_column.py new file mode 100644 index 0000000..90961fb --- /dev/null +++ b/migrations/versions/d21933db9ad8_add_flat_position_column.py @@ -0,0 +1,69 @@ +"""Add flat position column + +Revision ID: d21933db9ad8 +Revises: 8155b83242eb +Create Date: 2021-02-08 16:26:37.190842 + +""" +from alembic import op +import sqlalchemy as sa +import sqlalchemy.types as types +import json + + +class StringyJSON(types.TypeDecorator): + """ + Stores and retrieves JSON as TEXT for SQLite. + + From + https://avacariu.me/articles/2016/compiling-json-as-text-for-sqlite-with-sqlalchemy. + + .. note :: + + The associated field is immutable. That is, changes to the data + (typically, changing the value of a dict field) will not trigger an + update on the SQL side upon ``commit`` as the reference to the object + will not have been updated. One should force the update by forcing an + update of the reference (by performing a ``copy`` operation on the dict + for instance). + """ + + impl = types.TEXT + + def process_bind_param(self, value, dialect): + """ + Process the bound param, serialize the object to JSON before saving + into database. + """ + if value is not None: + value = json.dumps(value) + return value + + def process_result_value(self, value, dialect): + """ + Process the value fetched from the database, deserialize the JSON + string before returning the object. + """ + if value is not None: + value = json.loads(value) + return value + + +# TypeEngine.with_variant says "use StringyJSON instead when +# connecting to 'sqlite'" +# pylint: disable=locally-disabled,invalid-name +MagicJSON = types.JSON().with_variant(StringyJSON, "sqlite") + +# revision identifiers, used by Alembic. +revision = "d21933db9ad8" +down_revision = "8155b83242eb" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column("flats", sa.Column("flatisfy_position", MagicJSON, default=False)) + + +def downgrade(): + op.drop_column("flats", "flatisfy_position")