flatisfy/flatisfy/data.py

101 lines
3.4 KiB
Python

# coding: utf-8
"""
This module contains all the code related to building necessary data files from
the source opendata files.
"""
from __future__ import absolute_import, print_function, unicode_literals
import logging
import flatisfy.exceptions
from flatisfy import database
from flatisfy import data_files
from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport
from flatisfy.tools import hash_dict
LOGGER = logging.getLogger(__name__)
# Try to load lru_cache
try:
from functools import lru_cache
except ImportError:
try:
from functools32 import lru_cache
except ImportError:
def lru_cache(maxsize=None): # pylint: disable=unused-argument
"""
Identity implementation of ``lru_cache`` for fallback.
"""
return lambda func: func
LOGGER.warning(
"`functools.lru_cache` is not available on your system. Consider "
"installing `functools32` Python module if using Python2 for "
"better performances."
)
def preprocess_data(config, force=False):
"""
Ensures that all the necessary data have been inserted in db from the raw
opendata files.
:params config: A config dictionary.
:params force: Whether to force rebuild or not.
:return bool: Whether data have been built or not.
"""
# Check if a build is required
get_session = database.init_db(config["database"], config["search_index"])
with get_session() as session:
is_built = session.query(PublicTransport).count() > 0 and session.query(PostalCode).count() > 0
if is_built and not force:
# No need to rebuild the database, skip
return False
# Otherwise, purge all existing data
session.query(PublicTransport).delete()
session.query(PostalCode).delete()
# Build all opendata files
LOGGER.info("Rebuilding data...")
for preprocess in data_files.PREPROCESSING_FUNCTIONS:
data_objects = preprocess()
if not data_objects:
raise flatisfy.exceptions.DataBuildError("Error with %s." % preprocess.__name__)
with get_session() as session:
session.add_all(data_objects)
LOGGER.info("Done building data!")
return True
@hash_dict
@lru_cache(maxsize=5)
def load_data(model, constraint, config):
"""
Load data of the specified model from the database. Only load data for the
specific areas of the postal codes in config.
:param model: SQLAlchemy model to load.
:param constraint: A constraint from configuration to limit the spatial
extension of the loaded data.
:param config: A config dictionary.
:returns: A list of loaded SQLAlchemy objects from the db
"""
get_session = database.init_db(config["database"], config["search_index"])
results = []
with get_session() as session:
areas = []
# Get areas to fetch from, using postal codes
for postal_code in constraint["postal_codes"]:
areas.append(data_files.french_postal_codes_to_quarter(postal_code))
# Load data for each area
areas = list(set(areas))
for area in areas:
results.extend(session.query(model).filter(model.area == area).all())
# Expunge loaded data from the session to be able to use them
# afterwards
session.expunge_all()
return results