flatisfy/flatisfy/data.py
Phyks (Lucas Verney) b187a106e4 Avoid loading multiple times the same data
There was a bug in `data.py` which made it load the same data as many
times as the number of postal codes in constraints. This is now fixed.
2017-06-19 12:01:10 +02:00

101 lines
3.3 KiB
Python

# coding : utf-8
"""
This module contains all the code related to building necessary data files from
the source opendata files.
"""
from __future__ import absolute_import, print_function, unicode_literals
import logging
import flatisfy.exceptions
from flatisfy import database
from flatisfy import data_files
from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport
LOGGER = logging.getLogger(__name__)
# Try to load lru_cache
try:
from functools import lru_cache
except ImportError:
try:
from functools32 import lru_cache
except ImportError:
def lru_cache(maxsize=None):
"""
Identity implementation of ``lru_cache`` for fallback.
"""
return lambda func: func
LOGGER.warning(
"`functools.lru_cache` is not available on your system. Consider "
"installing `functools32` Python module if using Python2 for "
"better performances."
)
def preprocess_data(config, force=False):
"""
Ensures that all the necessary data have been inserted in db from the raw
opendata files.
:params config: A config dictionary.
:params force: Whether to force rebuild or not.
"""
# Check if a build is required
get_session = database.init_db(config["database"], config["search_index"])
with get_session() as session:
is_built = (
session.query(PublicTransport).count() > 0 and
session.query(PostalCode).count > 0
)
if is_built and not force:
# No need to rebuild the database, skip
return
# Otherwise, purge all existing data
session.query(PublicTransport).delete()
session.query(PostalCode).delete()
# Build all opendata files
for preprocess in data_files.PREPROCESSING_FUNCTIONS:
data_objects = preprocess()
if not data_objects:
raise flatisfy.exceptions.DataBuildError(
"Error with %s." % preprocess.__name__
)
with get_session() as session:
session.add_all(data_objects)
@lru_cache(maxsize=5)
def load_data(model, constraint, config):
"""
Load data of the specified model from the database. Only load data for the
specific areas of the postal codes in config.
:param model: SQLAlchemy model to load.
:param constraint: A constraint from configuration to limit the spatial
extension of the loaded data.
:param config: A config dictionary.
:returns: A list of loaded SQLAlchemy objects from the db
"""
get_session = database.init_db(config["database"], config["search_index"])
results = []
with get_session() as session:
areas = []
# Get areas to fetch from, using postal codes
for postal_code in constraint["postal_codes"]:
areas.append(data_files.french_postal_codes_to_iso_3166(postal_code))
# Load data for each area
areas = list(set(areas))
for area in areas:
results.extend(
session.query(model)
.filter(model.area == area).all()
)
# Expunge loaded data from the session to be able to use them
# afterwards
session.expunge_all()
return results