Rewrite data files management, to make it more modular

This is to prepare for the integration of Tcl data, see #58.
2017-06-15 15:48:16 +02:00 · 2017-06-15 15:48:16 +02:00 · 82a88590b6
parent e04e8a0eab
commit 82a88590b6
12 changed files with 390 additions and 198 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -56,3 +56,20 @@ the list of available backends in
 and update the list of `BACKEND_PRECEDENCES` for deduplication in
 [flatisfy/filters/duplicates.py#L24-31](https://git.phyks.me/Phyks/flatisfy/blob/master/flatisfy/filters/duplicates.py#L24-31).
 Thats' all!
 ## Adding new data files
 If you want to add new data files, especially for public transportation stops
 (to cover more cities), please follow these steps:
 1. Download and put the **original** file in `flatisfy/data_files`. Please,
   use the original data file to ease tracking licenses and be able to still
   have a working pipeline, by letting the user download it and place it in
   the right place, in case of license conflict.
 2. Mention the added data file and its license in `README.md`, in the
   dedicated section.
 3. Write a preprocessing function in `flatisfy/data_files/__init__.py`. You
   can have a look at the existing functions for a model.
 Thanks!
--- a/README.md
+++ b/README.md
@ -73,7 +73,8 @@ which covers Paris. If you want to run the script using some other location,
 you might have to change these files by matching datasets.
 * [LaPoste Hexasmal](https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5) for the list of cities and postal codes in France.
-* [RATP stations](https://data.ratp.fr/explore/dataset/positions-geographiques-des-stations-du-reseau-ratp/table/?disjunctive.stop_name&disjunctive.code_postal&disjunctive.departement) for the list of subway stations with their positions in Paris and nearby areas.
+* [RATP (Paris) stations](https://data.ratp.fr/explore/dataset/positions-geographiques-des-stations-du-reseau-ratp/table/?disjunctive.stop_name&disjunctive.code_postal&disjunctive.departement) for the list of subway/tram/bus stations with their positions in Paris and nearby areas.
 * [Tcl (Lyon) stations](https://download.data.grandlyon.com/wfs/rdata?SERVICE=WFS&VERSION=2.0.0&outputformat=GEOJSON&maxfeatures=4601&request=GetFeature&typename=tcl_sytral.tclarret&SRSNAME=urn:ogc:def:crs:EPSG::4326) for the list of subway/tram/bus stations with their positions in Paris and nearby areas.
 Both datasets are licensed under the Open Data Commons Open Database License
 (ODbL): https://opendatacommons.org/licenses/odbl/.
--- a/flatisfy/main.py
+++ b/flatisfy/main.py
@ -143,14 +143,24 @@ def main():
                         "you run Flatisfy.")
            sys.exit(1)
    # Purge command
    if args.cmd == "purge":
        cmds.purge_db(config)
        return
    # Build data files
    try:
        force = False
        if args.cmd == "build-data":
            force = True
        data.preprocess_data(config, force=force)
        LOGGER.info("Done building data!")
        if args.cmd == "build-data":
            data.preprocess_data(config, force=True)
            sys.exit(0)
-        else:
+    except flatisfy.exceptions.DataBuildError as exc:
-            data.preprocess_data(config)
+        LOGGER.error("%s", exc)
    except flatisfy.exceptions.DataBuildError:
        sys.exit(1)
    # Fetch command
@ -165,6 +175,7 @@ def main():
        print(
            tools.pretty_json(flats_list)
        )
        return
    # Filter command
    elif args.cmd == "filter":
        # Load and filter flats list
@ -183,15 +194,15 @@ def main():
            )
        else:
            cmds.import_and_filter(config, load_from_db=True)
        return
    # Import command
    elif args.cmd == "import":
        cmds.import_and_filter(config, load_from_db=False)
-    # Purge command
+        return
    elif args.cmd == "purge":
        cmds.purge_db(config)
    # Serve command
    elif args.cmd == "serve":
        cmds.serve(config)
        return
 if __name__ == "__main__":
--- a/flatisfy/cmds.py
+++ b/flatisfy/cmds.py
@ -10,6 +10,8 @@ import logging
 import flatisfy.filters
 from flatisfy import database
 from flatisfy.models import flat as flat_model
 from flatisfy.models import postal_code as postal_code_model
 from flatisfy.models import public_transport as public_transport_model
 from flatisfy import fetch
 from flatisfy import tools
 from flatisfy.filters import metadata
@ -157,6 +159,10 @@ def purge_db(config):
            # Use (slower) deletion by object, to ensure whoosh index is
            # updated
            session.delete(flat)
        LOGGER.info("Purge all postal codes from the database.")
        session.query(postal_code_model.PostalCode).delete()
        LOGGER.info("Purge all public transportations from the database.")
        session.query(public_transport_model.PublicTransport).delete()
 def serve(config):
--- a/flatisfy/config.py
+++ b/flatisfy/config.py
@ -134,6 +134,7 @@ def validate_config(config):
        assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0)  # noqa: E501
        assert config["data_directory"] is None or isinstance(config["data_directory"], str)  # noqa: E501
        assert os.path.isdir(config["data_directory"])
        assert isinstance(config["search_index"], str)
        assert config["modules_path"] is None or isinstance(config["modules_path"], str)  # noqa: E501
@ -206,6 +207,11 @@ def load_config(args=None):
        LOGGER.debug("Using default XDG data directory: %s.",
                     config_data["data_directory"])
    if not os.path.isdir(config_data["data_directory"]):
        LOGGER.info("Creating data directory according to config: %s",
                    config_data["data_directory"])
        os.mkdir(config_data["data_directory"])
    if config_data["database"] is None:
        config_data["database"] = "sqlite:///" + os.path.join(
            config_data["data_directory"],
--- a/flatisfy/data.py
+++ b/flatisfy/data.py
@ -5,17 +5,16 @@ the source opendata files.
 """
 from __future__ import absolute_import, print_function, unicode_literals
 import collections
 import json
 import logging
 import os
 import flatisfy.exceptions
 from flatisfy import database
 from flatisfy import data_files
 from flatisfy.models.postal_code import PostalCode
 from flatisfy.models.public_transport import PublicTransport
 LOGGER = logging.getLogger(__name__)
 MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
 # Try to load lru_cache
 try:
@ -24,7 +23,8 @@ except ImportError:
    try:
        from functools32 import lru_cache
    except ImportError:
-        lru_cache = lambda maxsize=None: lambda func: func
+        def lru_cache(maxsize=None):
            return lambda func: func
        LOGGER.warning(
            "`functools.lru_cache` is not available on your system. Consider "
            "installing `functools32` Python module if using Python2 for "
@ -32,156 +32,59 @@ except ImportError:
        )
 def _preprocess_ratp(output_dir):
    """
    Build RATP file from the RATP data.
    :param output_dir: Directory in which the output file should reside.
    :return: ``True`` on successful build, ``False`` otherwise.
    """
    ratp_data_raw = []
    # Load opendata file
    try:
        with open(os.path.join(MODULE_DIR, "data_files/ratp.json"), "r") as fh:
            ratp_data_raw = json.load(fh)
    except (IOError, ValueError):
        LOGGER.error("Invalid raw RATP opendata file.")
        return False
    # Process it
    ratp_data = collections.defaultdict(list)
    for item in ratp_data_raw:
        stop_name = item["fields"]["stop_name"].lower()
        ratp_data[stop_name].append({
            "gps": item["fields"]["coord"],
            "name": item["fields"]["stop_name"]
        })
    # Output it
    with open(os.path.join(output_dir, "ratp.json"), "w") as fh:
        json.dump(ratp_data, fh)
    return True
 def _preprocess_laposte(output_dir):
    """
    Build JSON files from the postal codes data.
    :param output_dir: Directory in which the output file should reside.
    :return: ``True`` on successful build, ``False`` otherwise.
    """
    raw_laposte_data = []
    # Load opendata file
    try:
        with open(
            os.path.join(MODULE_DIR, "data_files/laposte.json"), "r"
        ) as fh:
            raw_laposte_data = json.load(fh)
    except (IOError, ValueError):
        LOGGER.error("Invalid raw LaPoste opendata file.")
        return False
    # Build postal codes to other infos file
    postal_codes_data = {}
    for item in raw_laposte_data:
        try:
            postal_codes_data[item["fields"]["code_postal"]] = {
                "gps": item["fields"]["coordonnees_gps"],
                "nom": item["fields"]["nom_de_la_commune"].title()
            }
        except KeyError:
            LOGGER.info("Missing data for postal code %s, skipping it.",
                        item["fields"]["code_postal"])
    with open(os.path.join(output_dir, "postal_codes.json"), "w") as fh:
        json.dump(postal_codes_data, fh)
    # Build city name to postal codes and other infos file
    cities_data = {}
    for item in raw_laposte_data:
        try:
            cities_data[item["fields"]["nom_de_la_commune"].title()] = {
                "gps": item["fields"]["coordonnees_gps"],
                "postal_code": item["fields"]["code_postal"]
            }
        except KeyError:
            LOGGER.info("Missing data for city %s, skipping it.",
                        item["fields"]["nom_de_la_commune"])
    with open(os.path.join(output_dir, "cities.json"), "w") as fh:
        json.dump(cities_data, fh)
    return True
 DATA_FILES = {
    "ratp.json": {
        "preprocess": _preprocess_ratp,
        "output": ["ratp.json"]
    },
    "laposte.json": {
        "preprocess": _preprocess_laposte,
        "output": ["cities.json", "postal_codes.json"]
    },
 }
 def preprocess_data(config, force=False):
    """
-    Ensures that all the necessary data files have been built from the raw
+    Ensures that all the necessary data have been inserted in db from the raw
    opendata files.
    :params config: A config dictionary.
    :params force: Whether to force rebuild or not.
    """
-    LOGGER.debug("Data directory is %s.", config["data_directory"])
+    # Check if a build is required
-    opendata_directory = os.path.join(config["data_directory"], "opendata")
+    get_session = database.init_db(config["database"], config["search_index"])
-    try:
+    with get_session() as session:
-        LOGGER.info("Ensuring the data directory exists.")
+        is_built = (
-        os.makedirs(opendata_directory)
+            session.query(PublicTransport).count() > 0 and
-        LOGGER.debug("Created opendata directory at %s.", opendata_directory)
+            session.query(PostalCode).count > 0
    except OSError:
        LOGGER.debug("Opendata directory already existed, doing nothing.")
    # Build all the necessary data files
    for data_file in DATA_FILES:
        # Check if already built
        is_built = all(
            os.path.isfile(
                os.path.join(opendata_directory, output)
            ) for output in DATA_FILES[data_file]["output"]
        )
-        if not is_built or force:
+        if is_built and not force:
-            # Build if needed
+            # No need to rebuild the database, skip
-            LOGGER.info("Building from {} data.".format(data_file))
+            return
-            if not DATA_FILES[data_file]["preprocess"](opendata_directory):
+        # Otherwise, purge all existing data
-                raise flatisfy.exceptions.DataBuildError(
+        session.query(PublicTransport).delete()
-                    "Error with {} data.".format(data_file)
+        session.query(PostalCode).delete()
-                )
+
    # Build all opendata files
    for preprocess in data_files.PREPROCESSING_FUNCTIONS:
        data_objects = preprocess()
        if not data_objects:
            raise flatisfy.exceptions.DataBuildError(
                "Error with %s." % preprocess.__name__
            )
        with get_session() as session:
            session.add_all(data_objects)
@lru_cache(maxsize=5)
-def load_data(data_type, config):
+def load_data(model, config):
    """
-    Load a given built data file. This function is memoized.
+    Load data of the specified model from the database. Only load data for the
    specific areas of the postal codes in config.
-    :param data_type: A valid data identifier.
+    :param model: SQLAlchemy model to load.
    :param config: A config dictionary.
-    :return: The loaded data. ``None`` if the query is incorrect.
+    :returns: A list of loaded SQLAlchemy objects from the db
    """
-    opendata_directory = os.path.join(config["data_directory"], "opendata")
+    get_session = database.init_db(config["database"], config["search_index"])
-    datafile_path = os.path.join(opendata_directory, "%s.json" % data_type)
+    results = []
-    data = {}
+    with get_session() as session:
-    try:
+        for postal_code in config["constraints"]["postal_codes"]:
-        with open(datafile_path, "r") as fh:
+            area = data_files.french_postal_codes_to_iso_3166(postal_code)
-            data = json.load(fh)
+            results.extend(
-    except IOError:
+                session.query(model)
-        LOGGER.error("No such data file: %s.", datafile_path)
+                .filter(model.area == area).all()
-        return None
+            )
-    except ValueError:
+        # Expunge loaded data from the session to be able to use them
-        LOGGER.error("Invalid JSON data file: %s.", datafile_path)
+        # afterwards
-        return None
+        session.expunge_all()
-
+    return results
    if not data:
        LOGGER.warning("Loading empty data for %s.", data_type)
    return data
--- a/flatisfy/data_files/init.py
+++ b/flatisfy/data_files/init.py
@ -0,0 +1,155 @@
 # coding : utf-8
 """
 Preprocessing functions to convert input opendata files into SQLAlchemy objects
 ready to be stored in the database.
 """
 import json
 import logging
 import os
 from flatisfy.models.postal_code import PostalCode
 from flatisfy.models.public_transport import PublicTransport
 LOGGER = logging.getLogger(__name__)
 MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
 def french_postal_codes_to_iso_3166(postal_code):
    """
    Convert a French postal code to the main subdivision in French this postal
    code belongs to (ISO 3166-2 code).
    :param postal_code: The postal code to convert.
    :returns: The ISO 3166-2 code of the subdivision or ``None``.
    """
    # Mapping between areas (main subdivisions in French, ISO 3166-2) and
    # French departements
    # Taken from Wikipedia data.
    AREA_TO_DEPARTEMENT = {
        "FR-ARA": ["01", "03", "07", "15", "26", "38", "42", "43", "63", "69",
                   "73", "74"],
        "FR-BFC": ["21", "25", "39", "58", "70", "71", "89", "90"],
        "FR-BRE": ["22", "29", "35", "44", "56"],
        "FR-CVL": ["18", "28", "36", "37", "41", "45"],
        "FR-COR": ["20"],
        "FR-GES": ["08", "10", "51", "52", "54", "55", "57", "67", "68", "88"],
        "FR-HDF": ["02", "59", "60", "62", "80"],
        "FR-IDF": ["75", "77", "78", "91", "92", "93", "94", "95"],
        "FR-NOR": ["14", "27", "50", "61", "76"],
        "FR-NAQ": ["16", "17", "19", "23", "24", "33", "40", "47", "64", "79",
                  "86", "87"],
        "FR-OCC": ["09", "11", "12", "30", "31", "32", "34", "46", "48", "65",
                  "66", "81", "82"],
        "FR-PDL": ["44", "49", "53", "72", "85"],
        "FR-PAC": ["04", "05", "06", "13", "83", "84"]
    }
    departement = postal_code[:2]
    return next(
        (
            i
            for i in AREA_TO_DEPARTEMENT
            if departement in AREA_TO_DEPARTEMENT[i]
        ),
        None
    )
 def _preprocess_laposte():
    """
    Build SQLAlchemy objects from the postal codes data.
    :return: A list of ``PostalCode`` objects to be inserted in database.
    """
    data_file = "laposte.json"
    LOGGER.info("Building from %s data.", data_file)
    raw_laposte_data = []
    # Load opendata file
    try:
        with open(
            os.path.join(MODULE_DIR, data_file), "r"
        ) as fh:
            raw_laposte_data = json.load(fh)
    except (IOError, ValueError):
        LOGGER.error("Invalid raw LaPoste opendata file.")
        return []
    # Build postal codes to other infos file
    postal_codes_data = []
    for item in raw_laposte_data:
        fields = item["fields"]
        try:
            area = french_postal_codes_to_iso_3166(fields["code_postal"])
            if area is None:
                LOGGER.info(
                    "No matching area found for postal code %s, skipping it.",
                    fields["code_postal"]
                )
                continue
            postal_codes_data.append(PostalCode(
                area=area,
                postal_code=fields["code_postal"],
                name=fields["nom_de_la_commune"].title(),
                lat=fields["coordonnees_gps"][0],
                lng=fields["coordonnees_gps"][1]
            ))
        except KeyError:
            LOGGER.info("Missing data for postal code %s, skipping it.",
                        fields["code_postal"])
    return postal_codes_data
 def _preprocess_ratp():
    """
    Build SQLAlchemy objects from the RATP data (public transport in Paris,
    France).
    :return: A list of ``PublicTransport`` objects to be inserted in database.
    """
    data_file = "ratp.json"
    LOGGER.info("Building from %s data.", data_file)
    ratp_data_raw = []
    # Load opendata file
    try:
        with open(os.path.join(MODULE_DIR, data_file), "r") as fh:
            ratp_data_raw = json.load(fh)
    except (IOError, ValueError):
        LOGGER.error("Invalid raw RATP opendata file.")
        return []
    # Process it
    ratp_data = []
    for item in ratp_data_raw:
        fields = item["fields"]
        ratp_data.append(PublicTransport(
            name=fields["stop_name"],
            area="FR-IDF",
            lat=fields["coord"][0],
            lng=fields["coord"][1]
        ))
    return ratp_data
 def _preprocess_tcl():
    """
    Build SQLAlchemy objects from the Tcl data (public transport in Lyon,
    France).
    :return: A list of ``PublicTransport`` objects to be inserted in database.
    """
    # TODO: Tcl
    return []
 # List of all the available preprocessing functions. Order can be important.
 PREPROCESSING_FUNCTIONS = [
    _preprocess_laposte,
    _preprocess_ratp,
    #_preprocess_tcl
 ]
--- a/flatisfy/database/init.py
+++ b/flatisfy/database/init.py
@ -11,7 +11,7 @@ from contextlib import contextmanager
 from sqlalchemy import event, create_engine
 from sqlalchemy.engine import Engine
 from sqlalchemy.orm import sessionmaker
-from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.exc import OperationalError, SQLAlchemyError
 import flatisfy.models.flat  # noqa: F401
 from flatisfy.database.base import BASE
--- a/flatisfy/filters/metadata.py
+++ b/flatisfy/filters/metadata.py
@ -12,6 +12,8 @@ import re
 from flatisfy import data
 from flatisfy import tools
 from flatisfy.models.postal_code import PostalCode
 from flatisfy.models.public_transport import PublicTransport
 LOGGER = logging.getLogger(__name__)
@ -130,8 +132,7 @@ def guess_postal_code(flats_list, config, distance_threshold=20000):
    :return: An updated list of flats dict with guessed postal code.
    """
    opendata = {
-        "cities": data.load_data("cities", config),
+        "postal_codes": data.load_data(PostalCode, config)
        "postal_codes": data.load_data("postal_codes", config)
    }
    for flat in flats_list:
@ -155,7 +156,8 @@ def guess_postal_code(flats_list, config, distance_threshold=20000):
            postal_code = postal_code.group(0)
            # Check the postal code is within the db
-            assert postal_code in opendata["postal_codes"]
+            assert postal_code in [x.postal_code
                                   for x in opendata["postal_codes"]]
            LOGGER.info(
                "Found postal code in location field for flat %s: %s.",
@ -165,10 +167,11 @@ def guess_postal_code(flats_list, config, distance_threshold=20000):
            postal_code = None
        # If not found, try to find a city
        cities = {x.name: x for x in opendata["postal_codes"]}
        if not postal_code:
            matched_city = fuzzy_match(
                location,
-                opendata["cities"].keys(),
+                cities.keys(),
                limit=1
            )
            if matched_city:
@ -176,7 +179,7 @@ def guess_postal_code(flats_list, config, distance_threshold=20000):
                matched_city = matched_city[0]
                matched_city_name = matched_city[0]
                postal_code = (
-                    opendata["cities"][matched_city_name]["postal_code"]
+                    cities[matched_city_name].postal_code
                )
                LOGGER.info(
                    ("Found postal code in location field through city lookup "
@ -189,8 +192,16 @@ def guess_postal_code(flats_list, config, distance_threshold=20000):
        if postal_code and distance_threshold:
            distance = min(
                tools.distance(
-                    opendata["postal_codes"][postal_code]["gps"],
+                    next(
-                    opendata["postal_codes"][constraint]["gps"],
+                        (x.lat, x.lng)
                        for x in opendata["postal_codes"]
                        if x.postal_code == postal_code
                    ),
                    next(
                        (x.lat, x.lng)
                        for x in opendata["postal_codes"]
                        if x.postal_code == constraint
                    )
                )
                for constraint in config["constraints"]["postal_codes"]
            )
@ -229,9 +240,10 @@ def guess_stations(flats_list, config, distance_threshold=1500):
    :return: An updated list of flats dict with guessed nearby stations.
    """
    # TODO: opendata["stations"]
    opendata = {
-        "postal_codes": data.load_data("postal_codes", config),
+        "postal_codes": data.load_data(PostalCode, config),
-        "stations": data.load_data("ratp", config)
+        "stations": data.load_data(PublicTransport, config)
    }
    for flat in flats_list:
@ -247,7 +259,7 @@ def guess_stations(flats_list, config, distance_threshold=1500):
        matched_stations = fuzzy_match(
            flat_station,
-            opendata["stations"].keys(),
+            [x.name for x in opendata["stations"]],
            limit=10,
            threshold=50
        )
@ -259,24 +271,32 @@ def guess_stations(flats_list, config, distance_threshold=1500):
        if postal_code:
            # If there is a postal code, check that the matched station is
            # closed to it
-            postal_code_gps = opendata["postal_codes"][postal_code]["gps"]
+            postal_code_gps = next(
                (x.lat, x.lng)
                for x in opendata["postal_codes"]
                if x.postal_code == postal_code
            )
            for station in matched_stations:
-                # opendata["stations"] is a dict mapping station names to list
+                # Note that multiple stations with the same name exist in a
-                # of coordinates, for efficiency. Note that multiple stations
+                # city, hence the list of stations objects for a given matching
-                # with the same name exist in a city, hence the list of
+                # station name.
-                # coordinates.
+                stations_objects = [
-                for station_data in opendata["stations"][station[0]]:
+                    x for x in opendata["stations"] if x.name == station[0]
-                    distance = tools.distance(station_data["gps"],
+                ]
-                                              postal_code_gps)
+                for station_data in stations_objects:
                    distance = tools.distance(
                        (station_data.lat, station_data.lng),
                        postal_code_gps
                    )
                    if distance < distance_threshold:
                        # If at least one of the coordinates for a given
                        # station is close enough, that's ok and we can add
                        # the station
                        good_matched_stations.append({
                            "key": station[0],
-                            "name": station_data["name"],
+                            "name": station_data.name,
                            "confidence": station[1],
-                            "gps": station_data["gps"]
+                            "gps": (station_data.lat, station_data.lng)
                        })
                        break
                    LOGGER.debug(
@ -285,21 +305,9 @@ def guess_stations(flats_list, config, distance_threshold=1500):
                    )
        else:
            LOGGER.info(
-                ("No postal code for flat %s, keeping all the matched "
+                "No postal code for flat %s, skipping stations detection.",
                 "stations with half confidence."),
                flat["id"]
            )
            # Otherwise, we keep every matching station but with half
            # confidence
            good_matched_stations = [
                {
                    "name": station[0],
                    "confidence": station[1] * 0.5,
                    "gps": station_gps
                }
                for station in matched_stations
                for station_gps in opendata["stations"][station[0]]
            ]
        # Store matched stations and the associated confidence
        LOGGER.info(
--- a/flatisfy/models/postal_code.py
+++ b/flatisfy/models/postal_code.py
@ -0,0 +1,37 @@
 # coding: utf-8
 """
 This modules defines an SQLAlchemy ORM model for a postal code opendata.
 """
 # pylint: disable=locally-disabled,invalid-name,too-few-public-methods
 from __future__ import absolute_import, print_function, unicode_literals
 import logging
 from sqlalchemy import (
    Column, Float, Integer, String, UniqueConstraint
 )
 from flatisfy.database.base import BASE
 LOGGER = logging.getLogger(__name__)
 class PostalCode(BASE):
    """
    SQLAlchemy ORM model to store a postal code opendata.
    """
    __tablename__ = "postal_codes"
    id = Column(Integer, primary_key=True)
    # Area is an identifier to prevent loading unnecessary stops. For now it is
    # following ISO 3166-2.
    area = Column(String, index=True)
    postal_code = Column(String, index=True)
    name = Column(String, index=True)
    lat = Column(Float)
    lng = Column(Float)
    UniqueConstraint("postal_code", "name")
    def __repr__(self):
        return "<PostalCode(id=%s)>" % self.id
--- a/flatisfy/models/public_transport.py
+++ b/flatisfy/models/public_transport.py
@ -0,0 +1,35 @@
 # coding: utf-8
 """
 This modules defines an SQLAlchemy ORM model for public transport opendata.
 """
 # pylint: disable=locally-disabled,invalid-name,too-few-public-methods
 from __future__ import absolute_import, print_function, unicode_literals
 import logging
 from sqlalchemy import (
    Column, Float, Integer, String
 )
 from flatisfy.database.base import BASE
 LOGGER = logging.getLogger(__name__)
 class PublicTransport(BASE):
    """
    SQLAlchemy ORM model to store public transport opendata.
    """
    __tablename__ = "public_transports"
    id = Column(Integer, primary_key=True)
    # Area is an identifier to prevent loading unnecessary stops. For now it is
    # following ISO 3166-2.
    area = Column(String, index=True)
    name = Column(String)
    lat = Column(Float)
    lng = Column(Float)
    def __repr__(self):
        return "<PublicTransport(id=%s)>" % self.id
--- a/flatisfy/web/routes/api.py
+++ b/flatisfy/web/routes/api.py
@ -12,6 +12,7 @@ import bottle
 import flatisfy.data
 from flatisfy.models import flat as flat_model
 from flatisfy.models.postal_code import PostalCode
 # TODO: Flat post-processing code should be factorized
@ -38,7 +39,7 @@ def flats_v1(config, db):
    :return: The available flats objects in a JSON ``data`` dict.
    """
-    postal_codes = flatisfy.data.load_data("postal_codes", config)
+    postal_codes = flatisfy.data.load_data(PostalCode, config)
    flats = [
        flat.json_api_repr()
@ -47,11 +48,15 @@ def flats_v1(config, db):
    for flat in flats:
        if flat["flatisfy_postal_code"]:
-            postal_code_data = postal_codes[flat["flatisfy_postal_code"]]
+            postal_code_data = next(
                x
                for x in postal_codes
                if x.postal_code == flat["flatisfy_postal_code"]
            )
            flat["flatisfy_postal_code"] = {
                "postal_code": flat["flatisfy_postal_code"],
-                "name": postal_code_data["nom"],
+                "name": postal_code_data["name"],
-                "gps": postal_code_data["gps"]
+                "gps": (postal_code_data["lat"], postal_code_data["lng"])
            }
        else:
            flat["flatisfy_postal_code"] = {}
@ -94,7 +99,7 @@ def flat_v1(flat_id, config, db):
    :return: The flat object in a JSON ``data`` dict.
    """
-    postal_codes = flatisfy.data.load_data("postal_codes", config)
+    postal_codes = flatisfy.data.load_data(PostalCode, config)
    flat = db.query(flat_model.Flat).filter_by(id=flat_id).first()
@ -104,11 +109,15 @@ def flat_v1(flat_id, config, db):
    flat = flat.json_api_repr()
    if flat["flatisfy_postal_code"]:
-        postal_code_data = postal_codes[flat["flatisfy_postal_code"]]
+        postal_code_data = next(
            x
            for x in postal_codes
            if x.postal_code == flat["flatisfy_postal_code"]
        )
        flat["flatisfy_postal_code"] = {
            "postal_code": flat["flatisfy_postal_code"],
-            "name": postal_code_data["nom"],
+            "name": postal_code_data["name"],
-            "gps": postal_code_data["gps"]
+            "gps": (postal_code_data["lat"], postal_code_data["lng"])
        }
    else:
        flat["flatisfy_postal_code"] = {}
@ -231,7 +240,7 @@ def search_v1(db, config):
    :return: The matching flat objects in a JSON ``data`` dict.
    """
-    postal_codes = flatisfy.data.load_data("postal_codes", config)
+    postal_codes = flatisfy.data.load_data(PostalCode, config)
    try:
        query = json.load(bottle.request.body)["query"]
@ -246,11 +255,15 @@ def search_v1(db, config):
    for flat in flats:
        if flat["flatisfy_postal_code"]:
-            postal_code_data = postal_codes[flat["flatisfy_postal_code"]]
+            postal_code_data = next(
                x
                for x in postal_codes
                if x.postal_code == flat["flatisfy_postal_code"]
            )
            flat["flatisfy_postal_code"] = {
                "postal_code": flat["flatisfy_postal_code"],
-                "name": postal_code_data["nom"],
+                "name": postal_code_data["name"],
-                "gps": postal_code_data["gps"]
+                "gps": (postal_code_data["lat"], postal_code_data["lng"])
            }
        else:
            flat["flatisfy_postal_code"] = {}