2018-01-19 11:50:11 +01:00
|
|
|
# coding: utf-8
|
2017-04-03 17:29:29 +02:00
|
|
|
"""
|
|
|
|
This module contains all the code related to building necessary data files from
|
|
|
|
the source opendata files.
|
|
|
|
"""
|
|
|
|
from __future__ import absolute_import, print_function, unicode_literals
|
|
|
|
|
|
|
|
import logging
|
2017-06-14 15:29:33 +02:00
|
|
|
|
2017-04-03 17:29:29 +02:00
|
|
|
import flatisfy.exceptions
|
|
|
|
|
2017-06-15 15:48:16 +02:00
|
|
|
from flatisfy import database
|
|
|
|
from flatisfy import data_files
|
|
|
|
from flatisfy.models.postal_code import PostalCode
|
|
|
|
from flatisfy.models.public_transport import PublicTransport
|
2017-09-24 23:10:07 +02:00
|
|
|
from flatisfy.tools import hash_dict
|
2017-04-03 17:29:29 +02:00
|
|
|
|
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
|
|
|
2017-06-14 15:29:33 +02:00
|
|
|
# Try to load lru_cache
|
|
|
|
try:
|
|
|
|
from functools import lru_cache
|
|
|
|
except ImportError:
|
|
|
|
try:
|
|
|
|
from functools32 import lru_cache
|
|
|
|
except ImportError:
|
2017-06-20 13:37:22 +02:00
|
|
|
def lru_cache(maxsize=None): # pylint: disable=unused-argument
|
2017-06-16 16:21:13 +02:00
|
|
|
"""
|
|
|
|
Identity implementation of ``lru_cache`` for fallback.
|
|
|
|
"""
|
2017-06-15 15:48:16 +02:00
|
|
|
return lambda func: func
|
2017-06-14 15:29:33 +02:00
|
|
|
LOGGER.warning(
|
|
|
|
"`functools.lru_cache` is not available on your system. Consider "
|
|
|
|
"installing `functools32` Python module if using Python2 for "
|
|
|
|
"better performances."
|
|
|
|
)
|
|
|
|
|
2017-04-03 17:29:29 +02:00
|
|
|
|
|
|
|
def preprocess_data(config, force=False):
|
|
|
|
"""
|
2017-06-15 15:48:16 +02:00
|
|
|
Ensures that all the necessary data have been inserted in db from the raw
|
2017-04-03 17:29:29 +02:00
|
|
|
opendata files.
|
|
|
|
|
|
|
|
:params config: A config dictionary.
|
|
|
|
:params force: Whether to force rebuild or not.
|
2017-12-03 19:36:00 +01:00
|
|
|
:return bool: Whether data have been built or not.
|
2017-04-03 17:29:29 +02:00
|
|
|
"""
|
2017-06-15 15:48:16 +02:00
|
|
|
# Check if a build is required
|
|
|
|
get_session = database.init_db(config["database"], config["search_index"])
|
|
|
|
with get_session() as session:
|
|
|
|
is_built = (
|
|
|
|
session.query(PublicTransport).count() > 0 and
|
2017-09-23 20:21:35 +02:00
|
|
|
session.query(PostalCode).count() > 0
|
2017-06-14 15:29:33 +02:00
|
|
|
)
|
2017-06-15 15:48:16 +02:00
|
|
|
if is_built and not force:
|
|
|
|
# No need to rebuild the database, skip
|
2017-12-03 19:36:00 +01:00
|
|
|
return False
|
2017-06-15 15:48:16 +02:00
|
|
|
# Otherwise, purge all existing data
|
|
|
|
session.query(PublicTransport).delete()
|
|
|
|
session.query(PostalCode).delete()
|
|
|
|
|
|
|
|
# Build all opendata files
|
2017-12-06 19:16:24 +01:00
|
|
|
LOGGER.info("Rebuilding data...")
|
2017-06-15 15:48:16 +02:00
|
|
|
for preprocess in data_files.PREPROCESSING_FUNCTIONS:
|
|
|
|
data_objects = preprocess()
|
|
|
|
if not data_objects:
|
|
|
|
raise flatisfy.exceptions.DataBuildError(
|
|
|
|
"Error with %s." % preprocess.__name__
|
|
|
|
)
|
|
|
|
with get_session() as session:
|
|
|
|
session.add_all(data_objects)
|
2017-12-06 19:16:24 +01:00
|
|
|
LOGGER.info("Done building data!")
|
2017-12-03 19:36:00 +01:00
|
|
|
return True
|
2017-06-14 15:29:33 +02:00
|
|
|
|
|
|
|
|
2017-09-24 23:10:07 +02:00
|
|
|
@hash_dict
|
2017-06-14 15:29:33 +02:00
|
|
|
@lru_cache(maxsize=5)
|
2017-06-16 16:21:13 +02:00
|
|
|
def load_data(model, constraint, config):
|
2017-04-03 17:29:29 +02:00
|
|
|
"""
|
2017-06-15 15:48:16 +02:00
|
|
|
Load data of the specified model from the database. Only load data for the
|
|
|
|
specific areas of the postal codes in config.
|
2017-04-03 17:29:29 +02:00
|
|
|
|
2017-06-15 15:48:16 +02:00
|
|
|
:param model: SQLAlchemy model to load.
|
2017-06-16 16:21:13 +02:00
|
|
|
:param constraint: A constraint from configuration to limit the spatial
|
|
|
|
extension of the loaded data.
|
2017-04-03 17:29:29 +02:00
|
|
|
:param config: A config dictionary.
|
2017-06-15 15:48:16 +02:00
|
|
|
:returns: A list of loaded SQLAlchemy objects from the db
|
2017-04-03 17:29:29 +02:00
|
|
|
"""
|
2017-06-15 15:48:16 +02:00
|
|
|
get_session = database.init_db(config["database"], config["search_index"])
|
|
|
|
results = []
|
|
|
|
with get_session() as session:
|
2017-06-19 12:01:10 +02:00
|
|
|
areas = []
|
|
|
|
# Get areas to fetch from, using postal codes
|
2017-06-16 16:21:13 +02:00
|
|
|
for postal_code in constraint["postal_codes"]:
|
2017-12-03 19:36:00 +01:00
|
|
|
areas.append(data_files.french_postal_codes_to_quarter(postal_code))
|
2017-06-19 12:01:10 +02:00
|
|
|
# Load data for each area
|
|
|
|
areas = list(set(areas))
|
|
|
|
for area in areas:
|
2017-06-15 15:48:16 +02:00
|
|
|
results.extend(
|
|
|
|
session.query(model)
|
|
|
|
.filter(model.area == area).all()
|
|
|
|
)
|
|
|
|
# Expunge loaded data from the session to be able to use them
|
|
|
|
# afterwards
|
|
|
|
session.expunge_all()
|
|
|
|
return results
|