Towards more modular system for data files
Also use `lru_cache` to do some memoization on data files loading function, and speed up everything a bit.
This commit is contained in:
parent
b3ae71a8be
commit
3469035f4a
@ -8,6 +8,8 @@ import argparse
|
|||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
logging.basicConfig()
|
||||||
|
|
||||||
import flatisfy.config
|
import flatisfy.config
|
||||||
from flatisfy import cmds
|
from flatisfy import cmds
|
||||||
from flatisfy import data
|
from flatisfy import data
|
||||||
@ -118,14 +120,14 @@ def main():
|
|||||||
|
|
||||||
# Set logger
|
# Set logger
|
||||||
if args.vv:
|
if args.vv:
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.getLogger('').setLevel(logging.DEBUG)
|
||||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG)
|
logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG)
|
||||||
elif args.verbose:
|
elif args.verbose:
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.getLogger('').setLevel(logging.INFO)
|
||||||
# sqlalchemy INFO level is way too loud, just stick with WARNING
|
# sqlalchemy INFO level is way too loud, just stick with WARNING
|
||||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
|
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
|
||||||
else:
|
else:
|
||||||
logging.basicConfig(level=logging.WARNING)
|
logging.getLogger('').setLevel(logging.WARNING)
|
||||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
|
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
|
||||||
|
|
||||||
# Init-config command
|
# Init-config command
|
||||||
|
@ -10,12 +10,27 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
import flatisfy.exceptions
|
import flatisfy.exceptions
|
||||||
|
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
|
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
# Try to load lru_cache
|
||||||
|
try:
|
||||||
|
from functools import lru_cache
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
from functools32 import lru_cache
|
||||||
|
except ImportError:
|
||||||
|
lru_cache = lambda maxsize=None: lambda func: func
|
||||||
|
LOGGER.warning(
|
||||||
|
"`functools.lru_cache` is not available on your system. Consider "
|
||||||
|
"installing `functools32` Python module if using Python2 for "
|
||||||
|
"better performances."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _preprocess_ratp(output_dir):
|
def _preprocess_ratp(output_dir):
|
||||||
"""
|
"""
|
||||||
@ -98,6 +113,18 @@ def _preprocess_laposte(output_dir):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
DATA_FILES = {
|
||||||
|
"ratp.json": {
|
||||||
|
"preprocess": _preprocess_ratp,
|
||||||
|
"output": ["ratp.json"]
|
||||||
|
},
|
||||||
|
"laposte.json": {
|
||||||
|
"preprocess": _preprocess_laposte,
|
||||||
|
"output": ["cities.json", "postal_codes.json"]
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def preprocess_data(config, force=False):
|
def preprocess_data(config, force=False):
|
||||||
"""
|
"""
|
||||||
Ensures that all the necessary data files have been built from the raw
|
Ensures that all the necessary data files have been built from the raw
|
||||||
@ -115,38 +142,32 @@ def preprocess_data(config, force=False):
|
|||||||
except OSError:
|
except OSError:
|
||||||
LOGGER.debug("Opendata directory already existed, doing nothing.")
|
LOGGER.debug("Opendata directory already existed, doing nothing.")
|
||||||
|
|
||||||
is_built_ratp = os.path.isfile(
|
# Build all the necessary data files
|
||||||
os.path.join(opendata_directory, "ratp.json")
|
for data_file in DATA_FILES:
|
||||||
|
# Check if already built
|
||||||
|
is_built = all(
|
||||||
|
os.path.isfile(
|
||||||
|
os.path.join(opendata_directory, output)
|
||||||
|
) for output in DATA_FILES[data_file]["output"]
|
||||||
)
|
)
|
||||||
if not is_built_ratp or force:
|
if not is_built or force:
|
||||||
LOGGER.info("Building from RATP data.")
|
# Build if needed
|
||||||
if not _preprocess_ratp(opendata_directory):
|
LOGGER.info("Building from {} data.".format(data_file))
|
||||||
raise flatisfy.exceptions.DataBuildError("Error with RATP data.")
|
if not DATA_FILES[data_file]["preprocess"](opendata_directory):
|
||||||
|
|
||||||
is_built_laposte = (
|
|
||||||
os.path.isfile(os.path.join(opendata_directory, "cities.json")) and
|
|
||||||
os.path.isfile(os.path.join(opendata_directory, "postal_codes.json"))
|
|
||||||
)
|
|
||||||
if not is_built_laposte or force:
|
|
||||||
LOGGER.info("Building from LaPoste data.")
|
|
||||||
if not _preprocess_laposte(opendata_directory):
|
|
||||||
raise flatisfy.exceptions.DataBuildError(
|
raise flatisfy.exceptions.DataBuildError(
|
||||||
"Error with LaPoste data."
|
"Error with {} data.".format(data_file)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=5)
|
||||||
def load_data(data_type, config):
|
def load_data(data_type, config):
|
||||||
"""
|
"""
|
||||||
Load a given built data file.
|
Load a given built data file. This function is memoized.
|
||||||
|
|
||||||
:param data_type: A valid data identifier.
|
:param data_type: A valid data identifier.
|
||||||
:param config: A config dictionary.
|
:param config: A config dictionary.
|
||||||
:return: The loaded data. ``None`` if the query is incorrect.
|
:return: The loaded data. ``None`` if the query is incorrect.
|
||||||
"""
|
"""
|
||||||
if data_type not in ["postal_codes", "cities", "ratp"]:
|
|
||||||
LOGGER.error("Invalid request. No %s data file.", data_type)
|
|
||||||
return None
|
|
||||||
|
|
||||||
opendata_directory = os.path.join(config["data_directory"], "opendata")
|
opendata_directory = os.path.join(config["data_directory"], "opendata")
|
||||||
datafile_path = os.path.join(opendata_directory, "%s.json" % data_type)
|
datafile_path = os.path.join(opendata_directory, "%s.json" % data_type)
|
||||||
data = {}
|
data = {}
|
||||||
|
Loading…
Reference in New Issue
Block a user