Rewrite data files management, to make it more modular

This is to prepare for the integration of Tcl data, see #58.
This commit is contained in:
Lucas Verney 2017-06-15 15:48:16 +02:00
parent e04e8a0eab
commit 82a88590b6
12 changed files with 390 additions and 198 deletions

View File

@ -56,3 +56,20 @@ the list of available backends in
and update the list of `BACKEND_PRECEDENCES` for deduplication in and update the list of `BACKEND_PRECEDENCES` for deduplication in
[flatisfy/filters/duplicates.py#L24-31](https://git.phyks.me/Phyks/flatisfy/blob/master/flatisfy/filters/duplicates.py#L24-31). [flatisfy/filters/duplicates.py#L24-31](https://git.phyks.me/Phyks/flatisfy/blob/master/flatisfy/filters/duplicates.py#L24-31).
Thats' all! Thats' all!
## Adding new data files
If you want to add new data files, especially for public transportation stops
(to cover more cities), please follow these steps:
1. Download and put the **original** file in `flatisfy/data_files`. Please,
use the original data file to ease tracking licenses and be able to still
have a working pipeline, by letting the user download it and place it in
the right place, in case of license conflict.
2. Mention the added data file and its license in `README.md`, in the
dedicated section.
3. Write a preprocessing function in `flatisfy/data_files/__init__.py`. You
can have a look at the existing functions for a model.
Thanks!

View File

@ -73,7 +73,8 @@ which covers Paris. If you want to run the script using some other location,
you might have to change these files by matching datasets. you might have to change these files by matching datasets.
* [LaPoste Hexasmal](https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5) for the list of cities and postal codes in France. * [LaPoste Hexasmal](https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5) for the list of cities and postal codes in France.
* [RATP stations](https://data.ratp.fr/explore/dataset/positions-geographiques-des-stations-du-reseau-ratp/table/?disjunctive.stop_name&disjunctive.code_postal&disjunctive.departement) for the list of subway stations with their positions in Paris and nearby areas. * [RATP (Paris) stations](https://data.ratp.fr/explore/dataset/positions-geographiques-des-stations-du-reseau-ratp/table/?disjunctive.stop_name&disjunctive.code_postal&disjunctive.departement) for the list of subway/tram/bus stations with their positions in Paris and nearby areas.
* [Tcl (Lyon) stations](https://download.data.grandlyon.com/wfs/rdata?SERVICE=WFS&VERSION=2.0.0&outputformat=GEOJSON&maxfeatures=4601&request=GetFeature&typename=tcl_sytral.tclarret&SRSNAME=urn:ogc:def:crs:EPSG::4326) for the list of subway/tram/bus stations with their positions in Paris and nearby areas.
Both datasets are licensed under the Open Data Commons Open Database License Both datasets are licensed under the Open Data Commons Open Database License
(ODbL): https://opendatacommons.org/licenses/odbl/. (ODbL): https://opendatacommons.org/licenses/odbl/.

View File

@ -143,14 +143,24 @@ def main():
"you run Flatisfy.") "you run Flatisfy.")
sys.exit(1) sys.exit(1)
# Purge command
if args.cmd == "purge":
cmds.purge_db(config)
return
# Build data files # Build data files
try: try:
force = False
if args.cmd == "build-data":
force = True
data.preprocess_data(config, force=force)
LOGGER.info("Done building data!")
if args.cmd == "build-data": if args.cmd == "build-data":
data.preprocess_data(config, force=True)
sys.exit(0) sys.exit(0)
else: except flatisfy.exceptions.DataBuildError as exc:
data.preprocess_data(config) LOGGER.error("%s", exc)
except flatisfy.exceptions.DataBuildError:
sys.exit(1) sys.exit(1)
# Fetch command # Fetch command
@ -165,6 +175,7 @@ def main():
print( print(
tools.pretty_json(flats_list) tools.pretty_json(flats_list)
) )
return
# Filter command # Filter command
elif args.cmd == "filter": elif args.cmd == "filter":
# Load and filter flats list # Load and filter flats list
@ -183,15 +194,15 @@ def main():
) )
else: else:
cmds.import_and_filter(config, load_from_db=True) cmds.import_and_filter(config, load_from_db=True)
return
# Import command # Import command
elif args.cmd == "import": elif args.cmd == "import":
cmds.import_and_filter(config, load_from_db=False) cmds.import_and_filter(config, load_from_db=False)
# Purge command return
elif args.cmd == "purge":
cmds.purge_db(config)
# Serve command # Serve command
elif args.cmd == "serve": elif args.cmd == "serve":
cmds.serve(config) cmds.serve(config)
return
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -10,6 +10,8 @@ import logging
import flatisfy.filters import flatisfy.filters
from flatisfy import database from flatisfy import database
from flatisfy.models import flat as flat_model from flatisfy.models import flat as flat_model
from flatisfy.models import postal_code as postal_code_model
from flatisfy.models import public_transport as public_transport_model
from flatisfy import fetch from flatisfy import fetch
from flatisfy import tools from flatisfy import tools
from flatisfy.filters import metadata from flatisfy.filters import metadata
@ -157,6 +159,10 @@ def purge_db(config):
# Use (slower) deletion by object, to ensure whoosh index is # Use (slower) deletion by object, to ensure whoosh index is
# updated # updated
session.delete(flat) session.delete(flat)
LOGGER.info("Purge all postal codes from the database.")
session.query(postal_code_model.PostalCode).delete()
LOGGER.info("Purge all public transportations from the database.")
session.query(public_transport_model.PublicTransport).delete()
def serve(config): def serve(config):

View File

@ -134,6 +134,7 @@ def validate_config(config):
assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0) # noqa: E501 assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0) # noqa: E501
assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501 assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501
assert os.path.isdir(config["data_directory"])
assert isinstance(config["search_index"], str) assert isinstance(config["search_index"], str)
assert config["modules_path"] is None or isinstance(config["modules_path"], str) # noqa: E501 assert config["modules_path"] is None or isinstance(config["modules_path"], str) # noqa: E501
@ -206,6 +207,11 @@ def load_config(args=None):
LOGGER.debug("Using default XDG data directory: %s.", LOGGER.debug("Using default XDG data directory: %s.",
config_data["data_directory"]) config_data["data_directory"])
if not os.path.isdir(config_data["data_directory"]):
LOGGER.info("Creating data directory according to config: %s",
config_data["data_directory"])
os.mkdir(config_data["data_directory"])
if config_data["database"] is None: if config_data["database"] is None:
config_data["database"] = "sqlite:///" + os.path.join( config_data["database"] = "sqlite:///" + os.path.join(
config_data["data_directory"], config_data["data_directory"],

View File

@ -5,17 +5,16 @@ the source opendata files.
""" """
from __future__ import absolute_import, print_function, unicode_literals from __future__ import absolute_import, print_function, unicode_literals
import collections
import json
import logging import logging
import os
import flatisfy.exceptions import flatisfy.exceptions
from flatisfy import database
from flatisfy import data_files
from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
# Try to load lru_cache # Try to load lru_cache
try: try:
@ -24,7 +23,8 @@ except ImportError:
try: try:
from functools32 import lru_cache from functools32 import lru_cache
except ImportError: except ImportError:
lru_cache = lambda maxsize=None: lambda func: func def lru_cache(maxsize=None):
return lambda func: func
LOGGER.warning( LOGGER.warning(
"`functools.lru_cache` is not available on your system. Consider " "`functools.lru_cache` is not available on your system. Consider "
"installing `functools32` Python module if using Python2 for " "installing `functools32` Python module if using Python2 for "
@ -32,156 +32,59 @@ except ImportError:
) )
def _preprocess_ratp(output_dir):
"""
Build RATP file from the RATP data.
:param output_dir: Directory in which the output file should reside.
:return: ``True`` on successful build, ``False`` otherwise.
"""
ratp_data_raw = []
# Load opendata file
try:
with open(os.path.join(MODULE_DIR, "data_files/ratp.json"), "r") as fh:
ratp_data_raw = json.load(fh)
except (IOError, ValueError):
LOGGER.error("Invalid raw RATP opendata file.")
return False
# Process it
ratp_data = collections.defaultdict(list)
for item in ratp_data_raw:
stop_name = item["fields"]["stop_name"].lower()
ratp_data[stop_name].append({
"gps": item["fields"]["coord"],
"name": item["fields"]["stop_name"]
})
# Output it
with open(os.path.join(output_dir, "ratp.json"), "w") as fh:
json.dump(ratp_data, fh)
return True
def _preprocess_laposte(output_dir):
"""
Build JSON files from the postal codes data.
:param output_dir: Directory in which the output file should reside.
:return: ``True`` on successful build, ``False`` otherwise.
"""
raw_laposte_data = []
# Load opendata file
try:
with open(
os.path.join(MODULE_DIR, "data_files/laposte.json"), "r"
) as fh:
raw_laposte_data = json.load(fh)
except (IOError, ValueError):
LOGGER.error("Invalid raw LaPoste opendata file.")
return False
# Build postal codes to other infos file
postal_codes_data = {}
for item in raw_laposte_data:
try:
postal_codes_data[item["fields"]["code_postal"]] = {
"gps": item["fields"]["coordonnees_gps"],
"nom": item["fields"]["nom_de_la_commune"].title()
}
except KeyError:
LOGGER.info("Missing data for postal code %s, skipping it.",
item["fields"]["code_postal"])
with open(os.path.join(output_dir, "postal_codes.json"), "w") as fh:
json.dump(postal_codes_data, fh)
# Build city name to postal codes and other infos file
cities_data = {}
for item in raw_laposte_data:
try:
cities_data[item["fields"]["nom_de_la_commune"].title()] = {
"gps": item["fields"]["coordonnees_gps"],
"postal_code": item["fields"]["code_postal"]
}
except KeyError:
LOGGER.info("Missing data for city %s, skipping it.",
item["fields"]["nom_de_la_commune"])
with open(os.path.join(output_dir, "cities.json"), "w") as fh:
json.dump(cities_data, fh)
return True
DATA_FILES = {
"ratp.json": {
"preprocess": _preprocess_ratp,
"output": ["ratp.json"]
},
"laposte.json": {
"preprocess": _preprocess_laposte,
"output": ["cities.json", "postal_codes.json"]
},
}
def preprocess_data(config, force=False): def preprocess_data(config, force=False):
""" """
Ensures that all the necessary data files have been built from the raw Ensures that all the necessary data have been inserted in db from the raw
opendata files. opendata files.
:params config: A config dictionary. :params config: A config dictionary.
:params force: Whether to force rebuild or not. :params force: Whether to force rebuild or not.
""" """
LOGGER.debug("Data directory is %s.", config["data_directory"]) # Check if a build is required
opendata_directory = os.path.join(config["data_directory"], "opendata") get_session = database.init_db(config["database"], config["search_index"])
try: with get_session() as session:
LOGGER.info("Ensuring the data directory exists.") is_built = (
os.makedirs(opendata_directory) session.query(PublicTransport).count() > 0 and
LOGGER.debug("Created opendata directory at %s.", opendata_directory) session.query(PostalCode).count > 0
except OSError: )
LOGGER.debug("Opendata directory already existed, doing nothing.") if is_built and not force:
# No need to rebuild the database, skip
return
# Otherwise, purge all existing data
session.query(PublicTransport).delete()
session.query(PostalCode).delete()
# Build all the necessary data files # Build all opendata files
for data_file in DATA_FILES: for preprocess in data_files.PREPROCESSING_FUNCTIONS:
# Check if already built data_objects = preprocess()
is_built = all( if not data_objects:
os.path.isfile(
os.path.join(opendata_directory, output)
) for output in DATA_FILES[data_file]["output"]
)
if not is_built or force:
# Build if needed
LOGGER.info("Building from {} data.".format(data_file))
if not DATA_FILES[data_file]["preprocess"](opendata_directory):
raise flatisfy.exceptions.DataBuildError( raise flatisfy.exceptions.DataBuildError(
"Error with {} data.".format(data_file) "Error with %s." % preprocess.__name__
) )
with get_session() as session:
session.add_all(data_objects)
@lru_cache(maxsize=5) @lru_cache(maxsize=5)
def load_data(data_type, config): def load_data(model, config):
""" """
Load a given built data file. This function is memoized. Load data of the specified model from the database. Only load data for the
specific areas of the postal codes in config.
:param data_type: A valid data identifier. :param model: SQLAlchemy model to load.
:param config: A config dictionary. :param config: A config dictionary.
:return: The loaded data. ``None`` if the query is incorrect. :returns: A list of loaded SQLAlchemy objects from the db
""" """
opendata_directory = os.path.join(config["data_directory"], "opendata") get_session = database.init_db(config["database"], config["search_index"])
datafile_path = os.path.join(opendata_directory, "%s.json" % data_type) results = []
data = {} with get_session() as session:
try: for postal_code in config["constraints"]["postal_codes"]:
with open(datafile_path, "r") as fh: area = data_files.french_postal_codes_to_iso_3166(postal_code)
data = json.load(fh) results.extend(
except IOError: session.query(model)
LOGGER.error("No such data file: %s.", datafile_path) .filter(model.area == area).all()
return None )
except ValueError: # Expunge loaded data from the session to be able to use them
LOGGER.error("Invalid JSON data file: %s.", datafile_path) # afterwards
return None session.expunge_all()
return results
if not data:
LOGGER.warning("Loading empty data for %s.", data_type)
return data

View File

@ -0,0 +1,155 @@
# coding : utf-8
"""
Preprocessing functions to convert input opendata files into SQLAlchemy objects
ready to be stored in the database.
"""
import json
import logging
import os
from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport
LOGGER = logging.getLogger(__name__)
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
def french_postal_codes_to_iso_3166(postal_code):
"""
Convert a French postal code to the main subdivision in French this postal
code belongs to (ISO 3166-2 code).
:param postal_code: The postal code to convert.
:returns: The ISO 3166-2 code of the subdivision or ``None``.
"""
# Mapping between areas (main subdivisions in French, ISO 3166-2) and
# French departements
# Taken from Wikipedia data.
AREA_TO_DEPARTEMENT = {
"FR-ARA": ["01", "03", "07", "15", "26", "38", "42", "43", "63", "69",
"73", "74"],
"FR-BFC": ["21", "25", "39", "58", "70", "71", "89", "90"],
"FR-BRE": ["22", "29", "35", "44", "56"],
"FR-CVL": ["18", "28", "36", "37", "41", "45"],
"FR-COR": ["20"],
"FR-GES": ["08", "10", "51", "52", "54", "55", "57", "67", "68", "88"],
"FR-HDF": ["02", "59", "60", "62", "80"],
"FR-IDF": ["75", "77", "78", "91", "92", "93", "94", "95"],
"FR-NOR": ["14", "27", "50", "61", "76"],
"FR-NAQ": ["16", "17", "19", "23", "24", "33", "40", "47", "64", "79",
"86", "87"],
"FR-OCC": ["09", "11", "12", "30", "31", "32", "34", "46", "48", "65",
"66", "81", "82"],
"FR-PDL": ["44", "49", "53", "72", "85"],
"FR-PAC": ["04", "05", "06", "13", "83", "84"]
}
departement = postal_code[:2]
return next(
(
i
for i in AREA_TO_DEPARTEMENT
if departement in AREA_TO_DEPARTEMENT[i]
),
None
)
def _preprocess_laposte():
"""
Build SQLAlchemy objects from the postal codes data.
:return: A list of ``PostalCode`` objects to be inserted in database.
"""
data_file = "laposte.json"
LOGGER.info("Building from %s data.", data_file)
raw_laposte_data = []
# Load opendata file
try:
with open(
os.path.join(MODULE_DIR, data_file), "r"
) as fh:
raw_laposte_data = json.load(fh)
except (IOError, ValueError):
LOGGER.error("Invalid raw LaPoste opendata file.")
return []
# Build postal codes to other infos file
postal_codes_data = []
for item in raw_laposte_data:
fields = item["fields"]
try:
area = french_postal_codes_to_iso_3166(fields["code_postal"])
if area is None:
LOGGER.info(
"No matching area found for postal code %s, skipping it.",
fields["code_postal"]
)
continue
postal_codes_data.append(PostalCode(
area=area,
postal_code=fields["code_postal"],
name=fields["nom_de_la_commune"].title(),
lat=fields["coordonnees_gps"][0],
lng=fields["coordonnees_gps"][1]
))
except KeyError:
LOGGER.info("Missing data for postal code %s, skipping it.",
fields["code_postal"])
return postal_codes_data
def _preprocess_ratp():
"""
Build SQLAlchemy objects from the RATP data (public transport in Paris,
France).
:return: A list of ``PublicTransport`` objects to be inserted in database.
"""
data_file = "ratp.json"
LOGGER.info("Building from %s data.", data_file)
ratp_data_raw = []
# Load opendata file
try:
with open(os.path.join(MODULE_DIR, data_file), "r") as fh:
ratp_data_raw = json.load(fh)
except (IOError, ValueError):
LOGGER.error("Invalid raw RATP opendata file.")
return []
# Process it
ratp_data = []
for item in ratp_data_raw:
fields = item["fields"]
ratp_data.append(PublicTransport(
name=fields["stop_name"],
area="FR-IDF",
lat=fields["coord"][0],
lng=fields["coord"][1]
))
return ratp_data
def _preprocess_tcl():
"""
Build SQLAlchemy objects from the Tcl data (public transport in Lyon,
France).
:return: A list of ``PublicTransport`` objects to be inserted in database.
"""
# TODO: Tcl
return []
# List of all the available preprocessing functions. Order can be important.
PREPROCESSING_FUNCTIONS = [
_preprocess_laposte,
_preprocess_ratp,
#_preprocess_tcl
]

View File

@ -11,7 +11,7 @@ from contextlib import contextmanager
from sqlalchemy import event, create_engine from sqlalchemy import event, create_engine
from sqlalchemy.engine import Engine from sqlalchemy.engine import Engine
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import OperationalError, SQLAlchemyError
import flatisfy.models.flat # noqa: F401 import flatisfy.models.flat # noqa: F401
from flatisfy.database.base import BASE from flatisfy.database.base import BASE

View File

@ -12,6 +12,8 @@ import re
from flatisfy import data from flatisfy import data
from flatisfy import tools from flatisfy import tools
from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
@ -130,8 +132,7 @@ def guess_postal_code(flats_list, config, distance_threshold=20000):
:return: An updated list of flats dict with guessed postal code. :return: An updated list of flats dict with guessed postal code.
""" """
opendata = { opendata = {
"cities": data.load_data("cities", config), "postal_codes": data.load_data(PostalCode, config)
"postal_codes": data.load_data("postal_codes", config)
} }
for flat in flats_list: for flat in flats_list:
@ -155,7 +156,8 @@ def guess_postal_code(flats_list, config, distance_threshold=20000):
postal_code = postal_code.group(0) postal_code = postal_code.group(0)
# Check the postal code is within the db # Check the postal code is within the db
assert postal_code in opendata["postal_codes"] assert postal_code in [x.postal_code
for x in opendata["postal_codes"]]
LOGGER.info( LOGGER.info(
"Found postal code in location field for flat %s: %s.", "Found postal code in location field for flat %s: %s.",
@ -165,10 +167,11 @@ def guess_postal_code(flats_list, config, distance_threshold=20000):
postal_code = None postal_code = None
# If not found, try to find a city # If not found, try to find a city
cities = {x.name: x for x in opendata["postal_codes"]}
if not postal_code: if not postal_code:
matched_city = fuzzy_match( matched_city = fuzzy_match(
location, location,
opendata["cities"].keys(), cities.keys(),
limit=1 limit=1
) )
if matched_city: if matched_city:
@ -176,7 +179,7 @@ def guess_postal_code(flats_list, config, distance_threshold=20000):
matched_city = matched_city[0] matched_city = matched_city[0]
matched_city_name = matched_city[0] matched_city_name = matched_city[0]
postal_code = ( postal_code = (
opendata["cities"][matched_city_name]["postal_code"] cities[matched_city_name].postal_code
) )
LOGGER.info( LOGGER.info(
("Found postal code in location field through city lookup " ("Found postal code in location field through city lookup "
@ -189,8 +192,16 @@ def guess_postal_code(flats_list, config, distance_threshold=20000):
if postal_code and distance_threshold: if postal_code and distance_threshold:
distance = min( distance = min(
tools.distance( tools.distance(
opendata["postal_codes"][postal_code]["gps"], next(
opendata["postal_codes"][constraint]["gps"], (x.lat, x.lng)
for x in opendata["postal_codes"]
if x.postal_code == postal_code
),
next(
(x.lat, x.lng)
for x in opendata["postal_codes"]
if x.postal_code == constraint
)
) )
for constraint in config["constraints"]["postal_codes"] for constraint in config["constraints"]["postal_codes"]
) )
@ -229,9 +240,10 @@ def guess_stations(flats_list, config, distance_threshold=1500):
:return: An updated list of flats dict with guessed nearby stations. :return: An updated list of flats dict with guessed nearby stations.
""" """
# TODO: opendata["stations"]
opendata = { opendata = {
"postal_codes": data.load_data("postal_codes", config), "postal_codes": data.load_data(PostalCode, config),
"stations": data.load_data("ratp", config) "stations": data.load_data(PublicTransport, config)
} }
for flat in flats_list: for flat in flats_list:
@ -247,7 +259,7 @@ def guess_stations(flats_list, config, distance_threshold=1500):
matched_stations = fuzzy_match( matched_stations = fuzzy_match(
flat_station, flat_station,
opendata["stations"].keys(), [x.name for x in opendata["stations"]],
limit=10, limit=10,
threshold=50 threshold=50
) )
@ -259,24 +271,32 @@ def guess_stations(flats_list, config, distance_threshold=1500):
if postal_code: if postal_code:
# If there is a postal code, check that the matched station is # If there is a postal code, check that the matched station is
# closed to it # closed to it
postal_code_gps = opendata["postal_codes"][postal_code]["gps"] postal_code_gps = next(
(x.lat, x.lng)
for x in opendata["postal_codes"]
if x.postal_code == postal_code
)
for station in matched_stations: for station in matched_stations:
# opendata["stations"] is a dict mapping station names to list # Note that multiple stations with the same name exist in a
# of coordinates, for efficiency. Note that multiple stations # city, hence the list of stations objects for a given matching
# with the same name exist in a city, hence the list of # station name.
# coordinates. stations_objects = [
for station_data in opendata["stations"][station[0]]: x for x in opendata["stations"] if x.name == station[0]
distance = tools.distance(station_data["gps"], ]
postal_code_gps) for station_data in stations_objects:
distance = tools.distance(
(station_data.lat, station_data.lng),
postal_code_gps
)
if distance < distance_threshold: if distance < distance_threshold:
# If at least one of the coordinates for a given # If at least one of the coordinates for a given
# station is close enough, that's ok and we can add # station is close enough, that's ok and we can add
# the station # the station
good_matched_stations.append({ good_matched_stations.append({
"key": station[0], "key": station[0],
"name": station_data["name"], "name": station_data.name,
"confidence": station[1], "confidence": station[1],
"gps": station_data["gps"] "gps": (station_data.lat, station_data.lng)
}) })
break break
LOGGER.debug( LOGGER.debug(
@ -285,21 +305,9 @@ def guess_stations(flats_list, config, distance_threshold=1500):
) )
else: else:
LOGGER.info( LOGGER.info(
("No postal code for flat %s, keeping all the matched " "No postal code for flat %s, skipping stations detection.",
"stations with half confidence."),
flat["id"] flat["id"]
) )
# Otherwise, we keep every matching station but with half
# confidence
good_matched_stations = [
{
"name": station[0],
"confidence": station[1] * 0.5,
"gps": station_gps
}
for station in matched_stations
for station_gps in opendata["stations"][station[0]]
]
# Store matched stations and the associated confidence # Store matched stations and the associated confidence
LOGGER.info( LOGGER.info(

View File

@ -0,0 +1,37 @@
# coding: utf-8
"""
This modules defines an SQLAlchemy ORM model for a postal code opendata.
"""
# pylint: disable=locally-disabled,invalid-name,too-few-public-methods
from __future__ import absolute_import, print_function, unicode_literals
import logging
from sqlalchemy import (
Column, Float, Integer, String, UniqueConstraint
)
from flatisfy.database.base import BASE
LOGGER = logging.getLogger(__name__)
class PostalCode(BASE):
"""
SQLAlchemy ORM model to store a postal code opendata.
"""
__tablename__ = "postal_codes"
id = Column(Integer, primary_key=True)
# Area is an identifier to prevent loading unnecessary stops. For now it is
# following ISO 3166-2.
area = Column(String, index=True)
postal_code = Column(String, index=True)
name = Column(String, index=True)
lat = Column(Float)
lng = Column(Float)
UniqueConstraint("postal_code", "name")
def __repr__(self):
return "<PostalCode(id=%s)>" % self.id

View File

@ -0,0 +1,35 @@
# coding: utf-8
"""
This modules defines an SQLAlchemy ORM model for public transport opendata.
"""
# pylint: disable=locally-disabled,invalid-name,too-few-public-methods
from __future__ import absolute_import, print_function, unicode_literals
import logging
from sqlalchemy import (
Column, Float, Integer, String
)
from flatisfy.database.base import BASE
LOGGER = logging.getLogger(__name__)
class PublicTransport(BASE):
"""
SQLAlchemy ORM model to store public transport opendata.
"""
__tablename__ = "public_transports"
id = Column(Integer, primary_key=True)
# Area is an identifier to prevent loading unnecessary stops. For now it is
# following ISO 3166-2.
area = Column(String, index=True)
name = Column(String)
lat = Column(Float)
lng = Column(Float)
def __repr__(self):
return "<PublicTransport(id=%s)>" % self.id

View File

@ -12,6 +12,7 @@ import bottle
import flatisfy.data import flatisfy.data
from flatisfy.models import flat as flat_model from flatisfy.models import flat as flat_model
from flatisfy.models.postal_code import PostalCode
# TODO: Flat post-processing code should be factorized # TODO: Flat post-processing code should be factorized
@ -38,7 +39,7 @@ def flats_v1(config, db):
:return: The available flats objects in a JSON ``data`` dict. :return: The available flats objects in a JSON ``data`` dict.
""" """
postal_codes = flatisfy.data.load_data("postal_codes", config) postal_codes = flatisfy.data.load_data(PostalCode, config)
flats = [ flats = [
flat.json_api_repr() flat.json_api_repr()
@ -47,11 +48,15 @@ def flats_v1(config, db):
for flat in flats: for flat in flats:
if flat["flatisfy_postal_code"]: if flat["flatisfy_postal_code"]:
postal_code_data = postal_codes[flat["flatisfy_postal_code"]] postal_code_data = next(
x
for x in postal_codes
if x.postal_code == flat["flatisfy_postal_code"]
)
flat["flatisfy_postal_code"] = { flat["flatisfy_postal_code"] = {
"postal_code": flat["flatisfy_postal_code"], "postal_code": flat["flatisfy_postal_code"],
"name": postal_code_data["nom"], "name": postal_code_data["name"],
"gps": postal_code_data["gps"] "gps": (postal_code_data["lat"], postal_code_data["lng"])
} }
else: else:
flat["flatisfy_postal_code"] = {} flat["flatisfy_postal_code"] = {}
@ -94,7 +99,7 @@ def flat_v1(flat_id, config, db):
:return: The flat object in a JSON ``data`` dict. :return: The flat object in a JSON ``data`` dict.
""" """
postal_codes = flatisfy.data.load_data("postal_codes", config) postal_codes = flatisfy.data.load_data(PostalCode, config)
flat = db.query(flat_model.Flat).filter_by(id=flat_id).first() flat = db.query(flat_model.Flat).filter_by(id=flat_id).first()
@ -104,11 +109,15 @@ def flat_v1(flat_id, config, db):
flat = flat.json_api_repr() flat = flat.json_api_repr()
if flat["flatisfy_postal_code"]: if flat["flatisfy_postal_code"]:
postal_code_data = postal_codes[flat["flatisfy_postal_code"]] postal_code_data = next(
x
for x in postal_codes
if x.postal_code == flat["flatisfy_postal_code"]
)
flat["flatisfy_postal_code"] = { flat["flatisfy_postal_code"] = {
"postal_code": flat["flatisfy_postal_code"], "postal_code": flat["flatisfy_postal_code"],
"name": postal_code_data["nom"], "name": postal_code_data["name"],
"gps": postal_code_data["gps"] "gps": (postal_code_data["lat"], postal_code_data["lng"])
} }
else: else:
flat["flatisfy_postal_code"] = {} flat["flatisfy_postal_code"] = {}
@ -231,7 +240,7 @@ def search_v1(db, config):
:return: The matching flat objects in a JSON ``data`` dict. :return: The matching flat objects in a JSON ``data`` dict.
""" """
postal_codes = flatisfy.data.load_data("postal_codes", config) postal_codes = flatisfy.data.load_data(PostalCode, config)
try: try:
query = json.load(bottle.request.body)["query"] query = json.load(bottle.request.body)["query"]
@ -246,11 +255,15 @@ def search_v1(db, config):
for flat in flats: for flat in flats:
if flat["flatisfy_postal_code"]: if flat["flatisfy_postal_code"]:
postal_code_data = postal_codes[flat["flatisfy_postal_code"]] postal_code_data = next(
x
for x in postal_codes
if x.postal_code == flat["flatisfy_postal_code"]
)
flat["flatisfy_postal_code"] = { flat["flatisfy_postal_code"] = {
"postal_code": flat["flatisfy_postal_code"], "postal_code": flat["flatisfy_postal_code"],
"name": postal_code_data["nom"], "name": postal_code_data["name"],
"gps": postal_code_data["gps"] "gps": (postal_code_data["lat"], postal_code_data["lng"])
} }
else: else:
flat["flatisfy_postal_code"] = {} flat["flatisfy_postal_code"] = {}