From 82a88590b645565dd23fff7fdab7051d4ae87383 Mon Sep 17 00:00:00 2001 From: "Phyks (Lucas Verney)" Date: Thu, 15 Jun 2017 15:48:16 +0200 Subject: [PATCH] Rewrite data files management, to make it more modular This is to prepare for the integration of Tcl data, see #58. --- CONTRIBUTING.md | 17 +++ README.md | 3 +- flatisfy/__main__.py | 25 +++- flatisfy/cmds.py | 6 + flatisfy/config.py | 6 + flatisfy/data.py | 191 +++++++--------------------- flatisfy/data_files/__init__.py | 155 ++++++++++++++++++++++ flatisfy/database/__init__.py | 2 +- flatisfy/filters/metadata.py | 74 ++++++----- flatisfy/models/postal_code.py | 37 ++++++ flatisfy/models/public_transport.py | 35 +++++ flatisfy/web/routes/api.py | 37 ++++-- 12 files changed, 390 insertions(+), 198 deletions(-) create mode 100644 flatisfy/data_files/__init__.py create mode 100644 flatisfy/models/postal_code.py create mode 100644 flatisfy/models/public_transport.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ab6a079..a4b07d9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -56,3 +56,20 @@ the list of available backends in and update the list of `BACKEND_PRECEDENCES` for deduplication in [flatisfy/filters/duplicates.py#L24-31](https://git.phyks.me/Phyks/flatisfy/blob/master/flatisfy/filters/duplicates.py#L24-31). Thats' all! + + +## Adding new data files + +If you want to add new data files, especially for public transportation stops +(to cover more cities), please follow these steps: + +1. Download and put the **original** file in `flatisfy/data_files`. Please, + use the original data file to ease tracking licenses and be able to still + have a working pipeline, by letting the user download it and place it in + the right place, in case of license conflict. +2. Mention the added data file and its license in `README.md`, in the + dedicated section. +3. Write a preprocessing function in `flatisfy/data_files/__init__.py`. You + can have a look at the existing functions for a model. + +Thanks! diff --git a/README.md b/README.md index 0ff8a4d..54b0b62 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,8 @@ which covers Paris. If you want to run the script using some other location, you might have to change these files by matching datasets. * [LaPoste Hexasmal](https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5) for the list of cities and postal codes in France. -* [RATP stations](https://data.ratp.fr/explore/dataset/positions-geographiques-des-stations-du-reseau-ratp/table/?disjunctive.stop_name&disjunctive.code_postal&disjunctive.departement) for the list of subway stations with their positions in Paris and nearby areas. +* [RATP (Paris) stations](https://data.ratp.fr/explore/dataset/positions-geographiques-des-stations-du-reseau-ratp/table/?disjunctive.stop_name&disjunctive.code_postal&disjunctive.departement) for the list of subway/tram/bus stations with their positions in Paris and nearby areas. +* [Tcl (Lyon) stations](https://download.data.grandlyon.com/wfs/rdata?SERVICE=WFS&VERSION=2.0.0&outputformat=GEOJSON&maxfeatures=4601&request=GetFeature&typename=tcl_sytral.tclarret&SRSNAME=urn:ogc:def:crs:EPSG::4326) for the list of subway/tram/bus stations with their positions in Paris and nearby areas. Both datasets are licensed under the Open Data Commons Open Database License (ODbL): https://opendatacommons.org/licenses/odbl/. diff --git a/flatisfy/__main__.py b/flatisfy/__main__.py index 207ef8a..873c633 100644 --- a/flatisfy/__main__.py +++ b/flatisfy/__main__.py @@ -143,14 +143,24 @@ def main(): "you run Flatisfy.") sys.exit(1) + # Purge command + if args.cmd == "purge": + cmds.purge_db(config) + return + # Build data files try: + force = False + if args.cmd == "build-data": + force = True + + data.preprocess_data(config, force=force) + LOGGER.info("Done building data!") + if args.cmd == "build-data": - data.preprocess_data(config, force=True) sys.exit(0) - else: - data.preprocess_data(config) - except flatisfy.exceptions.DataBuildError: + except flatisfy.exceptions.DataBuildError as exc: + LOGGER.error("%s", exc) sys.exit(1) # Fetch command @@ -165,6 +175,7 @@ def main(): print( tools.pretty_json(flats_list) ) + return # Filter command elif args.cmd == "filter": # Load and filter flats list @@ -183,15 +194,15 @@ def main(): ) else: cmds.import_and_filter(config, load_from_db=True) + return # Import command elif args.cmd == "import": cmds.import_and_filter(config, load_from_db=False) - # Purge command - elif args.cmd == "purge": - cmds.purge_db(config) + return # Serve command elif args.cmd == "serve": cmds.serve(config) + return if __name__ == "__main__": diff --git a/flatisfy/cmds.py b/flatisfy/cmds.py index 2108fed..f5cbd92 100644 --- a/flatisfy/cmds.py +++ b/flatisfy/cmds.py @@ -10,6 +10,8 @@ import logging import flatisfy.filters from flatisfy import database from flatisfy.models import flat as flat_model +from flatisfy.models import postal_code as postal_code_model +from flatisfy.models import public_transport as public_transport_model from flatisfy import fetch from flatisfy import tools from flatisfy.filters import metadata @@ -157,6 +159,10 @@ def purge_db(config): # Use (slower) deletion by object, to ensure whoosh index is # updated session.delete(flat) + LOGGER.info("Purge all postal codes from the database.") + session.query(postal_code_model.PostalCode).delete() + LOGGER.info("Purge all public transportations from the database.") + session.query(public_transport_model.PublicTransport).delete() def serve(config): diff --git a/flatisfy/config.py b/flatisfy/config.py index 090b735..03d36f2 100644 --- a/flatisfy/config.py +++ b/flatisfy/config.py @@ -134,6 +134,7 @@ def validate_config(config): assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0) # noqa: E501 assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501 + assert os.path.isdir(config["data_directory"]) assert isinstance(config["search_index"], str) assert config["modules_path"] is None or isinstance(config["modules_path"], str) # noqa: E501 @@ -206,6 +207,11 @@ def load_config(args=None): LOGGER.debug("Using default XDG data directory: %s.", config_data["data_directory"]) + if not os.path.isdir(config_data["data_directory"]): + LOGGER.info("Creating data directory according to config: %s", + config_data["data_directory"]) + os.mkdir(config_data["data_directory"]) + if config_data["database"] is None: config_data["database"] = "sqlite:///" + os.path.join( config_data["data_directory"], diff --git a/flatisfy/data.py b/flatisfy/data.py index 4024dec..b860b13 100644 --- a/flatisfy/data.py +++ b/flatisfy/data.py @@ -5,17 +5,16 @@ the source opendata files. """ from __future__ import absolute_import, print_function, unicode_literals -import collections -import json import logging -import os - import flatisfy.exceptions +from flatisfy import database +from flatisfy import data_files +from flatisfy.models.postal_code import PostalCode +from flatisfy.models.public_transport import PublicTransport LOGGER = logging.getLogger(__name__) -MODULE_DIR = os.path.dirname(os.path.realpath(__file__)) # Try to load lru_cache try: @@ -24,7 +23,8 @@ except ImportError: try: from functools32 import lru_cache except ImportError: - lru_cache = lambda maxsize=None: lambda func: func + def lru_cache(maxsize=None): + return lambda func: func LOGGER.warning( "`functools.lru_cache` is not available on your system. Consider " "installing `functools32` Python module if using Python2 for " @@ -32,156 +32,59 @@ except ImportError: ) -def _preprocess_ratp(output_dir): - """ - Build RATP file from the RATP data. - - :param output_dir: Directory in which the output file should reside. - :return: ``True`` on successful build, ``False`` otherwise. - """ - ratp_data_raw = [] - # Load opendata file - try: - with open(os.path.join(MODULE_DIR, "data_files/ratp.json"), "r") as fh: - ratp_data_raw = json.load(fh) - except (IOError, ValueError): - LOGGER.error("Invalid raw RATP opendata file.") - return False - - # Process it - ratp_data = collections.defaultdict(list) - for item in ratp_data_raw: - stop_name = item["fields"]["stop_name"].lower() - ratp_data[stop_name].append({ - "gps": item["fields"]["coord"], - "name": item["fields"]["stop_name"] - }) - - # Output it - with open(os.path.join(output_dir, "ratp.json"), "w") as fh: - json.dump(ratp_data, fh) - - return True - - -def _preprocess_laposte(output_dir): - """ - Build JSON files from the postal codes data. - - :param output_dir: Directory in which the output file should reside. - :return: ``True`` on successful build, ``False`` otherwise. - """ - raw_laposte_data = [] - # Load opendata file - try: - with open( - os.path.join(MODULE_DIR, "data_files/laposte.json"), "r" - ) as fh: - raw_laposte_data = json.load(fh) - except (IOError, ValueError): - LOGGER.error("Invalid raw LaPoste opendata file.") - return False - - # Build postal codes to other infos file - postal_codes_data = {} - for item in raw_laposte_data: - try: - postal_codes_data[item["fields"]["code_postal"]] = { - "gps": item["fields"]["coordonnees_gps"], - "nom": item["fields"]["nom_de_la_commune"].title() - } - except KeyError: - LOGGER.info("Missing data for postal code %s, skipping it.", - item["fields"]["code_postal"]) - with open(os.path.join(output_dir, "postal_codes.json"), "w") as fh: - json.dump(postal_codes_data, fh) - - # Build city name to postal codes and other infos file - cities_data = {} - for item in raw_laposte_data: - try: - cities_data[item["fields"]["nom_de_la_commune"].title()] = { - "gps": item["fields"]["coordonnees_gps"], - "postal_code": item["fields"]["code_postal"] - } - except KeyError: - LOGGER.info("Missing data for city %s, skipping it.", - item["fields"]["nom_de_la_commune"]) - with open(os.path.join(output_dir, "cities.json"), "w") as fh: - json.dump(cities_data, fh) - - return True - - -DATA_FILES = { - "ratp.json": { - "preprocess": _preprocess_ratp, - "output": ["ratp.json"] - }, - "laposte.json": { - "preprocess": _preprocess_laposte, - "output": ["cities.json", "postal_codes.json"] - }, -} - - def preprocess_data(config, force=False): """ - Ensures that all the necessary data files have been built from the raw + Ensures that all the necessary data have been inserted in db from the raw opendata files. :params config: A config dictionary. :params force: Whether to force rebuild or not. """ - LOGGER.debug("Data directory is %s.", config["data_directory"]) - opendata_directory = os.path.join(config["data_directory"], "opendata") - try: - LOGGER.info("Ensuring the data directory exists.") - os.makedirs(opendata_directory) - LOGGER.debug("Created opendata directory at %s.", opendata_directory) - except OSError: - LOGGER.debug("Opendata directory already existed, doing nothing.") - - # Build all the necessary data files - for data_file in DATA_FILES: - # Check if already built - is_built = all( - os.path.isfile( - os.path.join(opendata_directory, output) - ) for output in DATA_FILES[data_file]["output"] + # Check if a build is required + get_session = database.init_db(config["database"], config["search_index"]) + with get_session() as session: + is_built = ( + session.query(PublicTransport).count() > 0 and + session.query(PostalCode).count > 0 ) - if not is_built or force: - # Build if needed - LOGGER.info("Building from {} data.".format(data_file)) - if not DATA_FILES[data_file]["preprocess"](opendata_directory): - raise flatisfy.exceptions.DataBuildError( - "Error with {} data.".format(data_file) - ) + if is_built and not force: + # No need to rebuild the database, skip + return + # Otherwise, purge all existing data + session.query(PublicTransport).delete() + session.query(PostalCode).delete() + + # Build all opendata files + for preprocess in data_files.PREPROCESSING_FUNCTIONS: + data_objects = preprocess() + if not data_objects: + raise flatisfy.exceptions.DataBuildError( + "Error with %s." % preprocess.__name__ + ) + with get_session() as session: + session.add_all(data_objects) @lru_cache(maxsize=5) -def load_data(data_type, config): +def load_data(model, config): """ - Load a given built data file. This function is memoized. + Load data of the specified model from the database. Only load data for the + specific areas of the postal codes in config. - :param data_type: A valid data identifier. + :param model: SQLAlchemy model to load. :param config: A config dictionary. - :return: The loaded data. ``None`` if the query is incorrect. + :returns: A list of loaded SQLAlchemy objects from the db """ - opendata_directory = os.path.join(config["data_directory"], "opendata") - datafile_path = os.path.join(opendata_directory, "%s.json" % data_type) - data = {} - try: - with open(datafile_path, "r") as fh: - data = json.load(fh) - except IOError: - LOGGER.error("No such data file: %s.", datafile_path) - return None - except ValueError: - LOGGER.error("Invalid JSON data file: %s.", datafile_path) - return None - - if not data: - LOGGER.warning("Loading empty data for %s.", data_type) - - return data + get_session = database.init_db(config["database"], config["search_index"]) + results = [] + with get_session() as session: + for postal_code in config["constraints"]["postal_codes"]: + area = data_files.french_postal_codes_to_iso_3166(postal_code) + results.extend( + session.query(model) + .filter(model.area == area).all() + ) + # Expunge loaded data from the session to be able to use them + # afterwards + session.expunge_all() + return results diff --git a/flatisfy/data_files/__init__.py b/flatisfy/data_files/__init__.py new file mode 100644 index 0000000..c9b9f6d --- /dev/null +++ b/flatisfy/data_files/__init__.py @@ -0,0 +1,155 @@ +# coding : utf-8 +""" +Preprocessing functions to convert input opendata files into SQLAlchemy objects +ready to be stored in the database. +""" +import json +import logging +import os + +from flatisfy.models.postal_code import PostalCode +from flatisfy.models.public_transport import PublicTransport + + +LOGGER = logging.getLogger(__name__) +MODULE_DIR = os.path.dirname(os.path.realpath(__file__)) + + +def french_postal_codes_to_iso_3166(postal_code): + """ + Convert a French postal code to the main subdivision in French this postal + code belongs to (ISO 3166-2 code). + + :param postal_code: The postal code to convert. + :returns: The ISO 3166-2 code of the subdivision or ``None``. + """ + # Mapping between areas (main subdivisions in French, ISO 3166-2) and + # French departements + # Taken from Wikipedia data. + AREA_TO_DEPARTEMENT = { + "FR-ARA": ["01", "03", "07", "15", "26", "38", "42", "43", "63", "69", + "73", "74"], + "FR-BFC": ["21", "25", "39", "58", "70", "71", "89", "90"], + "FR-BRE": ["22", "29", "35", "44", "56"], + "FR-CVL": ["18", "28", "36", "37", "41", "45"], + "FR-COR": ["20"], + "FR-GES": ["08", "10", "51", "52", "54", "55", "57", "67", "68", "88"], + "FR-HDF": ["02", "59", "60", "62", "80"], + "FR-IDF": ["75", "77", "78", "91", "92", "93", "94", "95"], + "FR-NOR": ["14", "27", "50", "61", "76"], + "FR-NAQ": ["16", "17", "19", "23", "24", "33", "40", "47", "64", "79", + "86", "87"], + "FR-OCC": ["09", "11", "12", "30", "31", "32", "34", "46", "48", "65", + "66", "81", "82"], + "FR-PDL": ["44", "49", "53", "72", "85"], + "FR-PAC": ["04", "05", "06", "13", "83", "84"] + } + + departement = postal_code[:2] + return next( + ( + i + for i in AREA_TO_DEPARTEMENT + if departement in AREA_TO_DEPARTEMENT[i] + ), + None + ) + + + +def _preprocess_laposte(): + """ + Build SQLAlchemy objects from the postal codes data. + + :return: A list of ``PostalCode`` objects to be inserted in database. + """ + data_file = "laposte.json" + LOGGER.info("Building from %s data.", data_file) + + raw_laposte_data = [] + # Load opendata file + try: + with open( + os.path.join(MODULE_DIR, data_file), "r" + ) as fh: + raw_laposte_data = json.load(fh) + except (IOError, ValueError): + LOGGER.error("Invalid raw LaPoste opendata file.") + return [] + + # Build postal codes to other infos file + postal_codes_data = [] + for item in raw_laposte_data: + fields = item["fields"] + try: + area = french_postal_codes_to_iso_3166(fields["code_postal"]) + if area is None: + LOGGER.info( + "No matching area found for postal code %s, skipping it.", + fields["code_postal"] + ) + continue + + postal_codes_data.append(PostalCode( + area=area, + postal_code=fields["code_postal"], + name=fields["nom_de_la_commune"].title(), + lat=fields["coordonnees_gps"][0], + lng=fields["coordonnees_gps"][1] + )) + except KeyError: + LOGGER.info("Missing data for postal code %s, skipping it.", + fields["code_postal"]) + + return postal_codes_data + + +def _preprocess_ratp(): + """ + Build SQLAlchemy objects from the RATP data (public transport in Paris, + France). + + :return: A list of ``PublicTransport`` objects to be inserted in database. + """ + data_file = "ratp.json" + LOGGER.info("Building from %s data.", data_file) + + ratp_data_raw = [] + # Load opendata file + try: + with open(os.path.join(MODULE_DIR, data_file), "r") as fh: + ratp_data_raw = json.load(fh) + except (IOError, ValueError): + LOGGER.error("Invalid raw RATP opendata file.") + return [] + + # Process it + ratp_data = [] + for item in ratp_data_raw: + fields = item["fields"] + ratp_data.append(PublicTransport( + name=fields["stop_name"], + area="FR-IDF", + lat=fields["coord"][0], + lng=fields["coord"][1] + )) + return ratp_data + + +def _preprocess_tcl(): + """ + Build SQLAlchemy objects from the Tcl data (public transport in Lyon, + France). + + :return: A list of ``PublicTransport`` objects to be inserted in database. + """ + # TODO: Tcl + return [] + + +# List of all the available preprocessing functions. Order can be important. +PREPROCESSING_FUNCTIONS = [ + _preprocess_laposte, + _preprocess_ratp, + #_preprocess_tcl +] diff --git a/flatisfy/database/__init__.py b/flatisfy/database/__init__.py index 5c9704d..f627717 100644 --- a/flatisfy/database/__init__.py +++ b/flatisfy/database/__init__.py @@ -11,7 +11,7 @@ from contextlib import contextmanager from sqlalchemy import event, create_engine from sqlalchemy.engine import Engine from sqlalchemy.orm import sessionmaker -from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.exc import OperationalError, SQLAlchemyError import flatisfy.models.flat # noqa: F401 from flatisfy.database.base import BASE diff --git a/flatisfy/filters/metadata.py b/flatisfy/filters/metadata.py index 76d7b83..0613d11 100644 --- a/flatisfy/filters/metadata.py +++ b/flatisfy/filters/metadata.py @@ -12,6 +12,8 @@ import re from flatisfy import data from flatisfy import tools +from flatisfy.models.postal_code import PostalCode +from flatisfy.models.public_transport import PublicTransport LOGGER = logging.getLogger(__name__) @@ -130,8 +132,7 @@ def guess_postal_code(flats_list, config, distance_threshold=20000): :return: An updated list of flats dict with guessed postal code. """ opendata = { - "cities": data.load_data("cities", config), - "postal_codes": data.load_data("postal_codes", config) + "postal_codes": data.load_data(PostalCode, config) } for flat in flats_list: @@ -155,7 +156,8 @@ def guess_postal_code(flats_list, config, distance_threshold=20000): postal_code = postal_code.group(0) # Check the postal code is within the db - assert postal_code in opendata["postal_codes"] + assert postal_code in [x.postal_code + for x in opendata["postal_codes"]] LOGGER.info( "Found postal code in location field for flat %s: %s.", @@ -165,10 +167,11 @@ def guess_postal_code(flats_list, config, distance_threshold=20000): postal_code = None # If not found, try to find a city + cities = {x.name: x for x in opendata["postal_codes"]} if not postal_code: matched_city = fuzzy_match( location, - opendata["cities"].keys(), + cities.keys(), limit=1 ) if matched_city: @@ -176,7 +179,7 @@ def guess_postal_code(flats_list, config, distance_threshold=20000): matched_city = matched_city[0] matched_city_name = matched_city[0] postal_code = ( - opendata["cities"][matched_city_name]["postal_code"] + cities[matched_city_name].postal_code ) LOGGER.info( ("Found postal code in location field through city lookup " @@ -189,8 +192,16 @@ def guess_postal_code(flats_list, config, distance_threshold=20000): if postal_code and distance_threshold: distance = min( tools.distance( - opendata["postal_codes"][postal_code]["gps"], - opendata["postal_codes"][constraint]["gps"], + next( + (x.lat, x.lng) + for x in opendata["postal_codes"] + if x.postal_code == postal_code + ), + next( + (x.lat, x.lng) + for x in opendata["postal_codes"] + if x.postal_code == constraint + ) ) for constraint in config["constraints"]["postal_codes"] ) @@ -229,9 +240,10 @@ def guess_stations(flats_list, config, distance_threshold=1500): :return: An updated list of flats dict with guessed nearby stations. """ + # TODO: opendata["stations"] opendata = { - "postal_codes": data.load_data("postal_codes", config), - "stations": data.load_data("ratp", config) + "postal_codes": data.load_data(PostalCode, config), + "stations": data.load_data(PublicTransport, config) } for flat in flats_list: @@ -247,7 +259,7 @@ def guess_stations(flats_list, config, distance_threshold=1500): matched_stations = fuzzy_match( flat_station, - opendata["stations"].keys(), + [x.name for x in opendata["stations"]], limit=10, threshold=50 ) @@ -259,24 +271,32 @@ def guess_stations(flats_list, config, distance_threshold=1500): if postal_code: # If there is a postal code, check that the matched station is # closed to it - postal_code_gps = opendata["postal_codes"][postal_code]["gps"] + postal_code_gps = next( + (x.lat, x.lng) + for x in opendata["postal_codes"] + if x.postal_code == postal_code + ) for station in matched_stations: - # opendata["stations"] is a dict mapping station names to list - # of coordinates, for efficiency. Note that multiple stations - # with the same name exist in a city, hence the list of - # coordinates. - for station_data in opendata["stations"][station[0]]: - distance = tools.distance(station_data["gps"], - postal_code_gps) + # Note that multiple stations with the same name exist in a + # city, hence the list of stations objects for a given matching + # station name. + stations_objects = [ + x for x in opendata["stations"] if x.name == station[0] + ] + for station_data in stations_objects: + distance = tools.distance( + (station_data.lat, station_data.lng), + postal_code_gps + ) if distance < distance_threshold: # If at least one of the coordinates for a given # station is close enough, that's ok and we can add # the station good_matched_stations.append({ "key": station[0], - "name": station_data["name"], + "name": station_data.name, "confidence": station[1], - "gps": station_data["gps"] + "gps": (station_data.lat, station_data.lng) }) break LOGGER.debug( @@ -285,21 +305,9 @@ def guess_stations(flats_list, config, distance_threshold=1500): ) else: LOGGER.info( - ("No postal code for flat %s, keeping all the matched " - "stations with half confidence."), + "No postal code for flat %s, skipping stations detection.", flat["id"] ) - # Otherwise, we keep every matching station but with half - # confidence - good_matched_stations = [ - { - "name": station[0], - "confidence": station[1] * 0.5, - "gps": station_gps - } - for station in matched_stations - for station_gps in opendata["stations"][station[0]] - ] # Store matched stations and the associated confidence LOGGER.info( diff --git a/flatisfy/models/postal_code.py b/flatisfy/models/postal_code.py new file mode 100644 index 0000000..10619c9 --- /dev/null +++ b/flatisfy/models/postal_code.py @@ -0,0 +1,37 @@ +# coding: utf-8 +""" +This modules defines an SQLAlchemy ORM model for a postal code opendata. +""" +# pylint: disable=locally-disabled,invalid-name,too-few-public-methods +from __future__ import absolute_import, print_function, unicode_literals + +import logging + +from sqlalchemy import ( + Column, Float, Integer, String, UniqueConstraint +) + +from flatisfy.database.base import BASE + + +LOGGER = logging.getLogger(__name__) + + +class PostalCode(BASE): + """ + SQLAlchemy ORM model to store a postal code opendata. + """ + __tablename__ = "postal_codes" + + id = Column(Integer, primary_key=True) + # Area is an identifier to prevent loading unnecessary stops. For now it is + # following ISO 3166-2. + area = Column(String, index=True) + postal_code = Column(String, index=True) + name = Column(String, index=True) + lat = Column(Float) + lng = Column(Float) + UniqueConstraint("postal_code", "name") + + def __repr__(self): + return "" % self.id diff --git a/flatisfy/models/public_transport.py b/flatisfy/models/public_transport.py new file mode 100644 index 0000000..d1b584d --- /dev/null +++ b/flatisfy/models/public_transport.py @@ -0,0 +1,35 @@ +# coding: utf-8 +""" +This modules defines an SQLAlchemy ORM model for public transport opendata. +""" +# pylint: disable=locally-disabled,invalid-name,too-few-public-methods +from __future__ import absolute_import, print_function, unicode_literals + +import logging + +from sqlalchemy import ( + Column, Float, Integer, String +) + +from flatisfy.database.base import BASE + + +LOGGER = logging.getLogger(__name__) + + +class PublicTransport(BASE): + """ + SQLAlchemy ORM model to store public transport opendata. + """ + __tablename__ = "public_transports" + + id = Column(Integer, primary_key=True) + # Area is an identifier to prevent loading unnecessary stops. For now it is + # following ISO 3166-2. + area = Column(String, index=True) + name = Column(String) + lat = Column(Float) + lng = Column(Float) + + def __repr__(self): + return "" % self.id diff --git a/flatisfy/web/routes/api.py b/flatisfy/web/routes/api.py index e4e1018..297e2a0 100644 --- a/flatisfy/web/routes/api.py +++ b/flatisfy/web/routes/api.py @@ -12,6 +12,7 @@ import bottle import flatisfy.data from flatisfy.models import flat as flat_model +from flatisfy.models.postal_code import PostalCode # TODO: Flat post-processing code should be factorized @@ -38,7 +39,7 @@ def flats_v1(config, db): :return: The available flats objects in a JSON ``data`` dict. """ - postal_codes = flatisfy.data.load_data("postal_codes", config) + postal_codes = flatisfy.data.load_data(PostalCode, config) flats = [ flat.json_api_repr() @@ -47,11 +48,15 @@ def flats_v1(config, db): for flat in flats: if flat["flatisfy_postal_code"]: - postal_code_data = postal_codes[flat["flatisfy_postal_code"]] + postal_code_data = next( + x + for x in postal_codes + if x.postal_code == flat["flatisfy_postal_code"] + ) flat["flatisfy_postal_code"] = { "postal_code": flat["flatisfy_postal_code"], - "name": postal_code_data["nom"], - "gps": postal_code_data["gps"] + "name": postal_code_data["name"], + "gps": (postal_code_data["lat"], postal_code_data["lng"]) } else: flat["flatisfy_postal_code"] = {} @@ -94,7 +99,7 @@ def flat_v1(flat_id, config, db): :return: The flat object in a JSON ``data`` dict. """ - postal_codes = flatisfy.data.load_data("postal_codes", config) + postal_codes = flatisfy.data.load_data(PostalCode, config) flat = db.query(flat_model.Flat).filter_by(id=flat_id).first() @@ -104,11 +109,15 @@ def flat_v1(flat_id, config, db): flat = flat.json_api_repr() if flat["flatisfy_postal_code"]: - postal_code_data = postal_codes[flat["flatisfy_postal_code"]] + postal_code_data = next( + x + for x in postal_codes + if x.postal_code == flat["flatisfy_postal_code"] + ) flat["flatisfy_postal_code"] = { "postal_code": flat["flatisfy_postal_code"], - "name": postal_code_data["nom"], - "gps": postal_code_data["gps"] + "name": postal_code_data["name"], + "gps": (postal_code_data["lat"], postal_code_data["lng"]) } else: flat["flatisfy_postal_code"] = {} @@ -231,7 +240,7 @@ def search_v1(db, config): :return: The matching flat objects in a JSON ``data`` dict. """ - postal_codes = flatisfy.data.load_data("postal_codes", config) + postal_codes = flatisfy.data.load_data(PostalCode, config) try: query = json.load(bottle.request.body)["query"] @@ -246,11 +255,15 @@ def search_v1(db, config): for flat in flats: if flat["flatisfy_postal_code"]: - postal_code_data = postal_codes[flat["flatisfy_postal_code"]] + postal_code_data = next( + x + for x in postal_codes + if x.postal_code == flat["flatisfy_postal_code"] + ) flat["flatisfy_postal_code"] = { "postal_code": flat["flatisfy_postal_code"], - "name": postal_code_data["nom"], - "gps": postal_code_data["gps"] + "name": postal_code_data["name"], + "gps": (postal_code_data["lat"], postal_code_data["lng"]) } else: flat["flatisfy_postal_code"] = {}