You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
354 lines
14 KiB
354 lines
14 KiB
# coding: utf-8 |
|
""" |
|
This module handles the configuration management for Flatisfy. |
|
|
|
It loads the default configuration, then overloads it with the provided config |
|
file and then overloads it with command-line options. |
|
""" |
|
from __future__ import absolute_import, print_function, unicode_literals |
|
from builtins import str |
|
|
|
import json |
|
import logging |
|
import os |
|
import sys |
|
import traceback |
|
|
|
import appdirs |
|
from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES |
|
|
|
from flatisfy import data |
|
from flatisfy import tools |
|
from flatisfy.constants import TimeToModes |
|
from flatisfy.models.postal_code import PostalCode |
|
|
|
|
|
# Default configuration |
|
DEFAULT_CONFIG = { |
|
# Constraints to match |
|
"constraints": { |
|
"default": { |
|
"type": None, # RENT, SALE, SHARING |
|
"house_types": [], # List of house types, must be in APART, HOUSE, |
|
# PARKING, LAND, OTHER or UNKNOWN |
|
"postal_codes": [], # List of postal codes |
|
"insees": [], # List of postal codes |
|
"area": (None, None), # (min, max) in m^2 |
|
"cost": (None, None), # (min, max) in currency unit |
|
"rooms": (None, None), # (min, max) |
|
"bedrooms": (None, None), # (min, max) |
|
"minimum_nb_photos": None, # min number of photos |
|
"description_should_contain": [], # list of terms |
|
"description_should_not_contain": [ |
|
"vendu", |
|
"Vendu", |
|
"VENDU", |
|
"recherche", |
|
], |
|
"time_to": {} # Dict mapping names to {"gps": [lat, lng], |
|
# "time": (min, max), |
|
# "mode": Valid mode } |
|
# Time is in seconds |
|
} |
|
}, |
|
# Whether or not to store personal data from housing posts (phone number |
|
# etc) |
|
"store_personal_data": False, |
|
# Max distance between an housing and a found station, to avoid |
|
# false-positive |
|
"max_distance_housing_station": 1500, |
|
# Score to consider two flats as being duplicates |
|
"duplicate_threshold": 15, |
|
# Score to consider two images as being duplicates through hash comparison |
|
"duplicate_image_hash_threshold": 10, |
|
# Whether images should be downloaded and served locally |
|
"serve_images_locally": True, |
|
# Navitia API key |
|
"navitia_api_key": None, |
|
# Mapbox API key |
|
"mapbox_api_key": None, |
|
# Number of filtering passes to run |
|
"passes": 3, |
|
# Maximum number of entries to fetch |
|
"max_entries": None, |
|
# Directory in wich data will be put. ``None`` is XDG default location. |
|
"data_directory": None, |
|
# Path to the modules directory containing all Weboob modules. ``None`` if |
|
# ``weboob_modules`` package is pip-installed, and you want to use |
|
# ``pkgresource`` to automatically find it. |
|
"modules_path": None, |
|
# SQLAlchemy URI to the database to use |
|
"database": None, |
|
# Path to the Whoosh search index file. Use ``None`` to put it in |
|
# ``data_directory``. |
|
"search_index": None, |
|
# Web app port |
|
"port": 8080, |
|
# Web app host to listen on |
|
"host": "127.0.0.1", |
|
# Web server to use to serve the webapp (see Bottle deployment doc) |
|
"webserver": None, |
|
# List of Weboob backends to use (default to any backend available) |
|
"backends": None, |
|
# Should email notifications be sent? |
|
"send_email": False, |
|
"smtp_server": "localhost", |
|
"smtp_port": 25, |
|
"smtp_username": None, |
|
"smtp_password": None, |
|
"smtp_from": "noreply@flatisfy.org", |
|
"smtp_to": [], |
|
"notification_lang": "en", |
|
# The web site url, to be used in email notifications. (doesn't matter |
|
# whether the trailing slash is present or not) |
|
"website_url": "http://127.0.0.1:8080", |
|
"ignore_station": False, |
|
} |
|
|
|
LOGGER = logging.getLogger(__name__) |
|
|
|
|
|
def validate_config(config, check_with_data): |
|
""" |
|
Check that the config passed as argument is a valid configuration. |
|
|
|
:param config: A config dictionary to fetch. |
|
:param check_with_data: Whether we should use the available OpenData to |
|
check the config values. |
|
:return: ``True`` if the configuration is valid, ``False`` otherwise. |
|
""" |
|
|
|
def _check_constraints_bounds(bounds): |
|
""" |
|
Check the bounds for numeric constraints. |
|
""" |
|
assert isinstance(bounds, list) |
|
assert len(bounds) == 2 |
|
assert all(x is None or (isinstance(x, (float, int)) and x >= 0) for x in bounds) |
|
if bounds[0] is not None and bounds[1] is not None: |
|
assert bounds[1] > bounds[0] |
|
|
|
try: |
|
# Note: The traceback fetching code only handle single line asserts. |
|
# Then, we disable line-too-long pylint check and E501 flake8 checks |
|
# and use long lines whenever needed, in order to have the full assert |
|
# message in the log output. |
|
# pylint: disable=locally-disabled,line-too-long |
|
|
|
assert config["passes"] in [0, 1, 2, 3] |
|
assert config["max_entries"] is None or ( |
|
isinstance(config["max_entries"], int) and config["max_entries"] > 0 |
|
) # noqa: E501 |
|
|
|
assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501 |
|
assert os.path.isdir(config["data_directory"]) |
|
assert isinstance(config["search_index"], str) |
|
assert config["modules_path"] is None or isinstance(config["modules_path"], str) # noqa: E501 |
|
|
|
assert config["database"] is None or isinstance(config["database"], str) # noqa: E501 |
|
|
|
assert isinstance(config["port"], int) |
|
assert isinstance(config["host"], str) |
|
assert config["webserver"] is None or isinstance(config["webserver"], str) # noqa: E501 |
|
assert config["backends"] is None or isinstance(config["backends"], list) # noqa: E501 |
|
|
|
assert isinstance(config["send_email"], bool) |
|
assert config["smtp_server"] is None or isinstance(config["smtp_server"], str) # noqa: E501 |
|
assert config["smtp_port"] is None or isinstance(config["smtp_port"], int) # noqa: E501 |
|
assert config["smtp_username"] is None or isinstance(config["smtp_username"], str) # noqa: E501 |
|
assert config["smtp_password"] is None or isinstance(config["smtp_password"], str) # noqa: E501 |
|
assert config["smtp_to"] is None or isinstance(config["smtp_to"], list) |
|
assert config["notification_lang"] is None or isinstance(config["notification_lang"], str) |
|
|
|
assert isinstance(config["store_personal_data"], bool) |
|
assert isinstance(config["max_distance_housing_station"], (int, float)) |
|
assert isinstance(config["duplicate_threshold"], int) |
|
assert isinstance(config["duplicate_image_hash_threshold"], int) |
|
|
|
# API keys |
|
assert config["navitia_api_key"] is None or isinstance(config["navitia_api_key"], str) # noqa: E501 |
|
assert config["mapbox_api_key"] is None or isinstance(config["mapbox_api_key"], str) # noqa: E501 |
|
|
|
assert config["ignore_station"] is None or isinstance(config["ignore_station"], bool) # noqa: E501 |
|
|
|
# Ensure constraints are ok |
|
assert config["constraints"] |
|
for constraint in config["constraints"].values(): |
|
assert "type" in constraint |
|
assert isinstance(constraint["type"], str) |
|
assert constraint["type"].upper() in POSTS_TYPES.__members__ |
|
|
|
assert "minimum_nb_photos" in constraint |
|
if constraint["minimum_nb_photos"]: |
|
assert isinstance(constraint["minimum_nb_photos"], int) |
|
assert constraint["minimum_nb_photos"] >= 0 |
|
|
|
assert "description_should_contain" in constraint |
|
assert isinstance(constraint["description_should_contain"], list) |
|
if constraint["description_should_contain"]: |
|
for term in constraint["description_should_contain"]: |
|
assert isinstance(term, str) |
|
|
|
assert "description_should_not_contain" in constraint |
|
assert isinstance(constraint["description_should_not_contain"], list) |
|
if constraint["description_should_not_contain"]: |
|
for term in constraint["description_should_not_contain"]: |
|
assert isinstance(term, str) |
|
|
|
assert "house_types" in constraint |
|
assert constraint["house_types"] |
|
for house_type in constraint["house_types"]: |
|
assert house_type.upper() in HOUSE_TYPES.__members__ |
|
|
|
assert "postal_codes" in constraint |
|
assert constraint["postal_codes"] |
|
assert all(isinstance(x, str) for x in constraint["postal_codes"]) |
|
if "insee_codes" in constraint: |
|
assert constraint["insee_codes"] |
|
assert all(isinstance(x, str) for x in constraint["insee_codes"]) |
|
|
|
if check_with_data: |
|
# Ensure data is built into db |
|
data.preprocess_data(config, force=False) |
|
# Check postal codes |
|
opendata = data.load_data(PostalCode, constraint, config) |
|
opendata_postal_codes = [x.postal_code for x in opendata] |
|
opendata_insee_codes = [x.insee_code for x in opendata] |
|
for postal_code in constraint["postal_codes"]: |
|
assert postal_code in opendata_postal_codes # noqa: E501 |
|
if "insee_codes" in constraint: |
|
for insee in constraint["insee_codes"]: |
|
assert insee in opendata_insee_codes # noqa: E501 |
|
|
|
assert "area" in constraint |
|
_check_constraints_bounds(constraint["area"]) |
|
|
|
assert "cost" in constraint |
|
_check_constraints_bounds(constraint["cost"]) |
|
|
|
assert "rooms" in constraint |
|
_check_constraints_bounds(constraint["rooms"]) |
|
|
|
assert "bedrooms" in constraint |
|
_check_constraints_bounds(constraint["bedrooms"]) |
|
|
|
assert "time_to" in constraint |
|
assert isinstance(constraint["time_to"], dict) |
|
for name, item in constraint["time_to"].items(): |
|
assert isinstance(name, str) |
|
assert "gps" in item |
|
assert isinstance(item["gps"], list) |
|
assert len(item["gps"]) == 2 |
|
assert "time" in item |
|
_check_constraints_bounds(item["time"]) |
|
if "mode" in item: |
|
TimeToModes[item["mode"]] |
|
|
|
return True |
|
except (AssertionError, KeyError): |
|
_, _, exc_traceback = sys.exc_info() |
|
return traceback.extract_tb(exc_traceback)[-1][-1] |
|
|
|
|
|
def load_config(args=None, check_with_data=True): |
|
""" |
|
Load the configuration from file. |
|
|
|
:param args: An argparse args structure. |
|
:param check_with_data: Whether we should use the available OpenData to |
|
check the config values. Defaults to ``True``. |
|
:return: The loaded config dict. |
|
""" |
|
LOGGER.info("Initializing configuration...") |
|
# Default configuration |
|
config_data = DEFAULT_CONFIG.copy() |
|
|
|
# Load config from specified JSON |
|
if args and getattr(args, "config", None): |
|
LOGGER.debug("Loading configuration from %s.", args.config) |
|
try: |
|
with open(args.config, "r") as fh: |
|
config_data.update(json.load(fh)) |
|
except (IOError, ValueError) as exc: |
|
LOGGER.error( |
|
"Unable to load configuration from file, using default configuration: %s.", |
|
exc, |
|
) |
|
|
|
# Overload config with arguments |
|
if args and getattr(args, "passes", None) is not None: |
|
LOGGER.debug("Overloading number of passes from CLI arguments: %d.", args.passes) |
|
config_data["passes"] = args.passes |
|
if args and getattr(args, "max_entries", None) is not None: |
|
LOGGER.debug( |
|
"Overloading maximum number of entries from CLI arguments: %d.", |
|
args.max_entries, |
|
) |
|
config_data["max_entries"] = args.max_entries |
|
if args and getattr(args, "port", None) is not None: |
|
LOGGER.debug("Overloading web app port: %d.", args.port) |
|
config_data["port"] = args.port |
|
if args and getattr(args, "host", None) is not None: |
|
LOGGER.debug("Overloading web app host: %s.", args.host) |
|
config_data["host"] = str(args.host) |
|
|
|
# Handle data_directory option |
|
if args and getattr(args, "data_dir", None) is not None: |
|
LOGGER.debug("Overloading data directory from CLI arguments.") |
|
config_data["data_directory"] = args.data_dir |
|
elif config_data["data_directory"] is None: |
|
config_data["data_directory"] = appdirs.user_data_dir("flatisfy", "flatisfy") |
|
LOGGER.debug("Using default XDG data directory: %s.", config_data["data_directory"]) |
|
|
|
if not os.path.isdir(config_data["data_directory"]): |
|
LOGGER.info( |
|
"Creating data directory according to config: %s", |
|
config_data["data_directory"], |
|
) |
|
os.makedirs(config_data["data_directory"]) |
|
os.makedirs(os.path.join(config_data["data_directory"], "images")) |
|
|
|
if config_data["database"] is None: |
|
config_data["database"] = "sqlite:///" + os.path.join(config_data["data_directory"], "flatisfy.db") |
|
|
|
if config_data["search_index"] is None: |
|
config_data["search_index"] = os.path.join(config_data["data_directory"], "search_index") |
|
|
|
# Handle constraints filtering |
|
if args and getattr(args, "constraints", None) is not None: |
|
LOGGER.info( |
|
( |
|
"Filtering constraints from config according to CLI argument. " |
|
"Using only the following constraints: %s." |
|
), |
|
args.constraints.replace(",", ", "), |
|
) |
|
constraints_filter = args.constraints.split(",") |
|
config_data["constraints"] = {k: v for k, v in config_data["constraints"].items() if k in constraints_filter} |
|
|
|
# Sanitize website url |
|
if config_data["website_url"] is not None: |
|
if config_data["website_url"][-1] != "/": |
|
config_data["website_url"] += "/" |
|
|
|
config_validation = validate_config(config_data, check_with_data) |
|
if config_validation is True: |
|
LOGGER.info("Config has been fully initialized.") |
|
return config_data |
|
LOGGER.error("Error in configuration: %s.", config_validation) |
|
return None |
|
|
|
|
|
def init_config(output=None): |
|
""" |
|
Initialize an empty configuration file. |
|
|
|
:param output: File to output content to. Defaults to ``stdin``. |
|
""" |
|
config_data = DEFAULT_CONFIG.copy() |
|
|
|
if output and output != "-": |
|
with open(output, "w") as fh: |
|
fh.write(tools.pretty_json(config_data)) |
|
else: |
|
print(tools.pretty_json(config_data))
|
|
|