You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
100 lines
3.4 KiB
100 lines
3.4 KiB
# coding: utf-8 |
|
""" |
|
This module contains all the code related to building necessary data files from |
|
the source opendata files. |
|
""" |
|
from __future__ import absolute_import, print_function, unicode_literals |
|
|
|
import logging |
|
|
|
import flatisfy.exceptions |
|
|
|
from flatisfy import database |
|
from flatisfy import data_files |
|
from flatisfy.models.postal_code import PostalCode |
|
from flatisfy.models.public_transport import PublicTransport |
|
from flatisfy.tools import hash_dict |
|
|
|
LOGGER = logging.getLogger(__name__) |
|
|
|
# Try to load lru_cache |
|
try: |
|
from functools import lru_cache |
|
except ImportError: |
|
try: |
|
from functools32 import lru_cache |
|
except ImportError: |
|
|
|
def lru_cache(maxsize=None): # pylint: disable=unused-argument |
|
""" |
|
Identity implementation of ``lru_cache`` for fallback. |
|
""" |
|
return lambda func: func |
|
|
|
LOGGER.warning( |
|
"`functools.lru_cache` is not available on your system. Consider " |
|
"installing `functools32` Python module if using Python2 for " |
|
"better performances." |
|
) |
|
|
|
|
|
def preprocess_data(config, force=False): |
|
""" |
|
Ensures that all the necessary data have been inserted in db from the raw |
|
opendata files. |
|
|
|
:params config: A config dictionary. |
|
:params force: Whether to force rebuild or not. |
|
:return bool: Whether data have been built or not. |
|
""" |
|
# Check if a build is required |
|
get_session = database.init_db(config["database"], config["search_index"]) |
|
with get_session() as session: |
|
is_built = session.query(PublicTransport).count() > 0 and session.query(PostalCode).count() > 0 |
|
if is_built and not force: |
|
# No need to rebuild the database, skip |
|
return False |
|
# Otherwise, purge all existing data |
|
session.query(PublicTransport).delete() |
|
session.query(PostalCode).delete() |
|
|
|
# Build all opendata files |
|
LOGGER.info("Rebuilding data...") |
|
for preprocess in data_files.PREPROCESSING_FUNCTIONS: |
|
data_objects = preprocess() |
|
if not data_objects: |
|
raise flatisfy.exceptions.DataBuildError("Error with %s." % preprocess.__name__) |
|
with get_session() as session: |
|
session.add_all(data_objects) |
|
LOGGER.info("Done building data!") |
|
return True |
|
|
|
|
|
@hash_dict |
|
@lru_cache(maxsize=5) |
|
def load_data(model, constraint, config): |
|
""" |
|
Load data of the specified model from the database. Only load data for the |
|
specific areas of the postal codes in config. |
|
|
|
:param model: SQLAlchemy model to load. |
|
:param constraint: A constraint from configuration to limit the spatial |
|
extension of the loaded data. |
|
:param config: A config dictionary. |
|
:returns: A list of loaded SQLAlchemy objects from the db |
|
""" |
|
get_session = database.init_db(config["database"], config["search_index"]) |
|
results = [] |
|
with get_session() as session: |
|
areas = [] |
|
# Get areas to fetch from, using postal codes |
|
for postal_code in constraint["postal_codes"]: |
|
areas.append(data_files.french_postal_codes_to_quarter(postal_code)) |
|
# Load data for each area |
|
areas = list(set(areas)) |
|
for area in areas: |
|
results.extend(session.query(model).filter(model.area == area).all()) |
|
# Expunge loaded data from the session to be able to use them |
|
# afterwards |
|
session.expunge_all() |
|
return results
|
|
|