Phyks (Lucas Verney)
b187a106e4
There was a bug in `data.py` which made it load the same data as many times as the number of postal codes in constraints. This is now fixed.
101 lines
3.3 KiB
Python
101 lines
3.3 KiB
Python
# coding : utf-8
|
|
"""
|
|
This module contains all the code related to building necessary data files from
|
|
the source opendata files.
|
|
"""
|
|
from __future__ import absolute_import, print_function, unicode_literals
|
|
|
|
import logging
|
|
|
|
import flatisfy.exceptions
|
|
|
|
from flatisfy import database
|
|
from flatisfy import data_files
|
|
from flatisfy.models.postal_code import PostalCode
|
|
from flatisfy.models.public_transport import PublicTransport
|
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
|
|
# Try to load lru_cache
|
|
try:
|
|
from functools import lru_cache
|
|
except ImportError:
|
|
try:
|
|
from functools32 import lru_cache
|
|
except ImportError:
|
|
def lru_cache(maxsize=None):
|
|
"""
|
|
Identity implementation of ``lru_cache`` for fallback.
|
|
"""
|
|
return lambda func: func
|
|
LOGGER.warning(
|
|
"`functools.lru_cache` is not available on your system. Consider "
|
|
"installing `functools32` Python module if using Python2 for "
|
|
"better performances."
|
|
)
|
|
|
|
|
|
def preprocess_data(config, force=False):
|
|
"""
|
|
Ensures that all the necessary data have been inserted in db from the raw
|
|
opendata files.
|
|
|
|
:params config: A config dictionary.
|
|
:params force: Whether to force rebuild or not.
|
|
"""
|
|
# Check if a build is required
|
|
get_session = database.init_db(config["database"], config["search_index"])
|
|
with get_session() as session:
|
|
is_built = (
|
|
session.query(PublicTransport).count() > 0 and
|
|
session.query(PostalCode).count > 0
|
|
)
|
|
if is_built and not force:
|
|
# No need to rebuild the database, skip
|
|
return
|
|
# Otherwise, purge all existing data
|
|
session.query(PublicTransport).delete()
|
|
session.query(PostalCode).delete()
|
|
|
|
# Build all opendata files
|
|
for preprocess in data_files.PREPROCESSING_FUNCTIONS:
|
|
data_objects = preprocess()
|
|
if not data_objects:
|
|
raise flatisfy.exceptions.DataBuildError(
|
|
"Error with %s." % preprocess.__name__
|
|
)
|
|
with get_session() as session:
|
|
session.add_all(data_objects)
|
|
|
|
|
|
@lru_cache(maxsize=5)
|
|
def load_data(model, constraint, config):
|
|
"""
|
|
Load data of the specified model from the database. Only load data for the
|
|
specific areas of the postal codes in config.
|
|
|
|
:param model: SQLAlchemy model to load.
|
|
:param constraint: A constraint from configuration to limit the spatial
|
|
extension of the loaded data.
|
|
:param config: A config dictionary.
|
|
:returns: A list of loaded SQLAlchemy objects from the db
|
|
"""
|
|
get_session = database.init_db(config["database"], config["search_index"])
|
|
results = []
|
|
with get_session() as session:
|
|
areas = []
|
|
# Get areas to fetch from, using postal codes
|
|
for postal_code in constraint["postal_codes"]:
|
|
areas.append(data_files.french_postal_codes_to_iso_3166(postal_code))
|
|
# Load data for each area
|
|
areas = list(set(areas))
|
|
for area in areas:
|
|
results.extend(
|
|
session.query(model)
|
|
.filter(model.area == area).all()
|
|
)
|
|
# Expunge loaded data from the session to be able to use them
|
|
# afterwards
|
|
session.expunge_all()
|
|
return results
|