Flatisfy is your new companion to ease your search of a new housing :)

config.py 14KB


  1. # coding: utf-8
  2. """
  3. This module handles the configuration management for Flatisfy.
  4. It loads the default configuration, then overloads it with the provided config
  5. file and then overloads it with command-line options.
  6. """
  7. from __future__ import absolute_import, print_function, unicode_literals
  8. from builtins import str
  9. import json
  10. import logging
  11. import os
  12. import sys
  13. import traceback
  14. import appdirs
  15. from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
  16. from flatisfy import data
  17. from flatisfy import tools
  18. from flatisfy.constants import TimeToModes
  19. from flatisfy.models.postal_code import PostalCode
  20. # Default configuration
  21. DEFAULT_CONFIG = {
  22. # Constraints to match
  23. "constraints": {
  24. "default": {
  25. "type": None, # RENT, SALE, SHARING
  26. "house_types": [], # List of house types, must be in APART, HOUSE,
  27. # PARKING, LAND, OTHER or UNKNOWN
  28. "postal_codes": [], # List of postal codes
  29. "insees": [], # List of postal codes
  30. "area": (None, None), # (min, max) in m^2
  31. "cost": (None, None), # (min, max) in currency unit
  32. "rooms": (None, None), # (min, max)
  33. "bedrooms": (None, None), # (min, max)
  34. "minimum_nb_photos": None, # min number of photos
  35. "description_should_contain": [], # list of terms
  36. "description_should_not_contain": [
  37. "vendu",
  38. "Vendu",
  39. "VENDU",
  40. "recherche",
  41. ],
  42. "time_to": {} # Dict mapping names to {"gps": [lat, lng],
  43. # "time": (min, max),
  44. # "mode": Valid mode }
  45. # Time is in seconds
  46. }
  47. },
  48. # Whether or not to store personal data from housing posts (phone number
  49. # etc)
  50. "store_personal_data": False,
  51. # Max distance between an housing and a found station, to avoid
  52. # false-positive
  53. "max_distance_housing_station": 1500,
  54. # Score to consider two flats as being duplicates
  55. "duplicate_threshold": 15,
  56. # Score to consider two images as being duplicates through hash comparison
  57. "duplicate_image_hash_threshold": 10,
  58. # Whether images should be downloaded and served locally
  59. "serve_images_locally": True,
  60. # Navitia API key
  61. "navitia_api_key": None,
  62. # Mapbox API key
  63. "mapbox_api_key": None,
  64. # Number of filtering passes to run
  65. "passes": 3,
  66. # Maximum number of entries to fetch
  67. "max_entries": None,
  68. # Directory in wich data will be put. ``None`` is XDG default location.
  69. "data_directory": None,
  70. # Path to the modules directory containing all Weboob modules. ``None`` if
  71. # ``weboob_modules`` package is pip-installed, and you want to use
  72. # ``pkgresource`` to automatically find it.
  73. "modules_path": None,
  74. # SQLAlchemy URI to the database to use
  75. "database": None,
  76. # Path to the Whoosh search index file. Use ``None`` to put it in
  77. # ``data_directory``.
  78. "search_index": None,
  79. # Web app port
  80. "port": 8080,
  81. # Web app host to listen on
  82. "host": "127.0.0.1",
  83. # Web server to use to serve the webapp (see Bottle deployment doc)
  84. "webserver": None,
  85. # List of Weboob backends to use (default to any backend available)
  86. "backends": None,
  87. # Should email notifications be sent?
  88. "send_email": False,
  89. "smtp_server": "localhost",
  90. "smtp_port": 25,
  91. "smtp_username": None,
  92. "smtp_password": None,
  93. "smtp_from": "noreply@flatisfy.org",
  94. "smtp_to": [],
  95. "notification_lang": "en",
  96. # The web site url, to be used in email notifications. (doesn't matter
  97. # whether the trailing slash is present or not)
  98. "website_url": "http://127.0.0.1:8080",
  99. "ignore_station": False,
  100. }
  101. LOGGER = logging.getLogger(__name__)
  102. def validate_config(config, check_with_data):
  103. """
  104. Check that the config passed as argument is a valid configuration.
  105. :param config: A config dictionary to fetch.
  106. :param check_with_data: Whether we should use the available OpenData to
  107. check the config values.
  108. :return: ``True`` if the configuration is valid, ``False`` otherwise.
  109. """
  110. def _check_constraints_bounds(bounds):
  111. """
  112. Check the bounds for numeric constraints.
  113. """
  114. assert isinstance(bounds, list)
  115. assert len(bounds) == 2
  116. assert all(x is None or (isinstance(x, (float, int)) and x >= 0) for x in bounds)
  117. if bounds[0] is not None and bounds[1] is not None:
  118. assert bounds[1] > bounds[0]
  119. try:
  120. # Note: The traceback fetching code only handle single line asserts.
  121. # Then, we disable line-too-long pylint check and E501 flake8 checks
  122. # and use long lines whenever needed, in order to have the full assert
  123. # message in the log output.
  124. # pylint: disable=locally-disabled,line-too-long
  125. assert config["passes"] in [0, 1, 2, 3]
  126. assert config["max_entries"] is None or (
  127. isinstance(config["max_entries"], int) and config["max_entries"] > 0
  128. ) # noqa: E501
  129. assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501
  130. assert os.path.isdir(config["data_directory"])
  131. assert isinstance(config["search_index"], str)
  132. assert config["modules_path"] is None or isinstance(config["modules_path"], str) # noqa: E501
  133. assert config["database"] is None or isinstance(config["database"], str) # noqa: E501
  134. assert isinstance(config["port"], int)
  135. assert isinstance(config["host"], str)
  136. assert config["webserver"] is None or isinstance(config["webserver"], str) # noqa: E501
  137. assert config["backends"] is None or isinstance(config["backends"], list) # noqa: E501
  138. assert isinstance(config["send_email"], bool)
  139. assert config["smtp_server"] is None or isinstance(config["smtp_server"], str) # noqa: E501
  140. assert config["smtp_port"] is None or isinstance(config["smtp_port"], int) # noqa: E501
  141. assert config["smtp_username"] is None or isinstance(config["smtp_username"], str) # noqa: E501
  142. assert config["smtp_password"] is None or isinstance(config["smtp_password"], str) # noqa: E501
  143. assert config["smtp_to"] is None or isinstance(config["smtp_to"], list)
  144. assert config["notification_lang"] is None or isinstance(config["notification_lang"], str)
  145. assert isinstance(config["store_personal_data"], bool)
  146. assert isinstance(config["max_distance_housing_station"], (int, float))
  147. assert isinstance(config["duplicate_threshold"], int)
  148. assert isinstance(config["duplicate_image_hash_threshold"], int)
  149. # API keys
  150. assert config["navitia_api_key"] is None or isinstance(config["navitia_api_key"], str) # noqa: E501
  151. assert config["mapbox_api_key"] is None or isinstance(config["mapbox_api_key"], str) # noqa: E501
  152. assert config["ignore_station"] is None or isinstance(config["ignore_station"], bool) # noqa: E501
  153. # Ensure constraints are ok
  154. assert config["constraints"]
  155. for constraint in config["constraints"].values():
  156. assert "type" in constraint
  157. assert isinstance(constraint["type"], str)
  158. assert constraint["type"].upper() in POSTS_TYPES.__members__
  159. assert "minimum_nb_photos" in constraint
  160. if constraint["minimum_nb_photos"]:
  161. assert isinstance(constraint["minimum_nb_photos"], int)
  162. assert constraint["minimum_nb_photos"] >= 0
  163. assert "description_should_contain" in constraint
  164. assert isinstance(constraint["description_should_contain"], list)
  165. if constraint["description_should_contain"]:
  166. for term in constraint["description_should_contain"]:
  167. assert isinstance(term, str)
  168. assert "description_should_not_contain" in constraint
  169. assert isinstance(constraint["description_should_not_contain"], list)
  170. if constraint["description_should_not_contain"]:
  171. for term in constraint["description_should_not_contain"]:
  172. assert isinstance(term, str)
  173. assert "house_types" in constraint
  174. assert constraint["house_types"]
  175. for house_type in constraint["house_types"]:
  176. assert house_type.upper() in HOUSE_TYPES.__members__
  177. assert "postal_codes" in constraint
  178. assert constraint["postal_codes"]
  179. assert all(isinstance(x, str) for x in constraint["postal_codes"])
  180. if "insee_codes" in constraint:
  181. assert constraint["insee_codes"]
  182. assert all(isinstance(x, str) for x in constraint["insee_codes"])
  183. if check_with_data:
  184. # Ensure data is built into db
  185. data.preprocess_data(config, force=False)
  186. # Check postal codes
  187. opendata = data.load_data(PostalCode, constraint, config)
  188. opendata_postal_codes = [x.postal_code for x in opendata]
  189. opendata_insee_codes = [x.insee_code for x in opendata]
  190. for postal_code in constraint["postal_codes"]:
  191. assert postal_code in opendata_postal_codes # noqa: E501
  192. if "insee_codes" in constraint:
  193. for insee in constraint["insee_codes"]:
  194. assert insee in opendata_insee_codes # noqa: E501
  195. assert "area" in constraint
  196. _check_constraints_bounds(constraint["area"])
  197. assert "cost" in constraint
  198. _check_constraints_bounds(constraint["cost"])
  199. assert "rooms" in constraint
  200. _check_constraints_bounds(constraint["rooms"])
  201. assert "bedrooms" in constraint
  202. _check_constraints_bounds(constraint["bedrooms"])
  203. assert "time_to" in constraint
  204. assert isinstance(constraint["time_to"], dict)
  205. for name, item in constraint["time_to"].items():
  206. assert isinstance(name, str)
  207. assert "gps" in item
  208. assert isinstance(item["gps"], list)
  209. assert len(item["gps"]) == 2
  210. assert "time" in item
  211. _check_constraints_bounds(item["time"])
  212. if "mode" in item:
  213. TimeToModes[item["mode"]]
  214. return True
  215. except (AssertionError, KeyError):
  216. _, _, exc_traceback = sys.exc_info()
  217. return traceback.extract_tb(exc_traceback)[-1][-1]
  218. def load_config(args=None, check_with_data=True):
  219. """
  220. Load the configuration from file.
  221. :param args: An argparse args structure.
  222. :param check_with_data: Whether we should use the available OpenData to
  223. check the config values. Defaults to ``True``.
  224. :return: The loaded config dict.
  225. """
  226. LOGGER.info("Initializing configuration...")
  227. # Default configuration
  228. config_data = DEFAULT_CONFIG.copy()
  229. # Load config from specified JSON
  230. if args and getattr(args, "config", None):
  231. LOGGER.debug("Loading configuration from %s.", args.config)
  232. try:
  233. with open(args.config, "r") as fh:
  234. config_data.update(json.load(fh))
  235. except (IOError, ValueError) as exc:
  236. LOGGER.error(
  237. "Unable to load configuration from file, using default configuration: %s.",
  238. exc,
  239. )
  240. # Overload config with arguments
  241. if args and getattr(args, "passes", None) is not None:
  242. LOGGER.debug("Overloading number of passes from CLI arguments: %d.", args.passes)
  243. config_data["passes"] = args.passes
  244. if args and getattr(args, "max_entries", None) is not None:
  245. LOGGER.debug(
  246. "Overloading maximum number of entries from CLI arguments: %d.",
  247. args.max_entries,
  248. )
  249. config_data["max_entries"] = args.max_entries
  250. if args and getattr(args, "port", None) is not None:
  251. LOGGER.debug("Overloading web app port: %d.", args.port)
  252. config_data["port"] = args.port
  253. if args and getattr(args, "host", None) is not None:
  254. LOGGER.debug("Overloading web app host: %s.", args.host)
  255. config_data["host"] = str(args.host)
  256. # Handle data_directory option
  257. if args and getattr(args, "data_dir", None) is not None:
  258. LOGGER.debug("Overloading data directory from CLI arguments.")
  259. config_data["data_directory"] = args.data_dir
  260. elif config_data["data_directory"] is None:
  261. config_data["data_directory"] = appdirs.user_data_dir("flatisfy", "flatisfy")
  262. LOGGER.debug("Using default XDG data directory: %s.", config_data["data_directory"])
  263. if not os.path.isdir(config_data["data_directory"]):
  264. LOGGER.info(
  265. "Creating data directory according to config: %s",
  266. config_data["data_directory"],
  267. )
  268. os.makedirs(config_data["data_directory"])
  269. os.makedirs(os.path.join(config_data["data_directory"], "images"))
  270. if config_data["database"] is None:
  271. config_data["database"] = "sqlite:///" + os.path.join(config_data["data_directory"], "flatisfy.db")
  272. if config_data["search_index"] is None:
  273. config_data["search_index"] = os.path.join(config_data["data_directory"], "search_index")
  274. # Handle constraints filtering
  275. if args and getattr(args, "constraints", None) is not None:
  276. LOGGER.info(
  277. (
  278. "Filtering constraints from config according to CLI argument. "
  279. "Using only the following constraints: %s."
  280. ),
  281. args.constraints.replace(",", ", "),
  282. )
  283. constraints_filter = args.constraints.split(",")
  284. config_data["constraints"] = {k: v for k, v in config_data["constraints"].items() if k in constraints_filter}
  285. # Sanitize website url
  286. if config_data["website_url"] is not None:
  287. if config_data["website_url"][-1] != "/":
  288. config_data["website_url"] += "/"
  289. config_validation = validate_config(config_data, check_with_data)
  290. if config_validation is True:
  291. LOGGER.info("Config has been fully initialized.")
  292. return config_data
  293. LOGGER.error("Error in configuration: %s.", config_validation)
  294. return None
  295. def init_config(output=None):
  296. """
  297. Initialize an empty configuration file.
  298. :param output: File to output content to. Defaults to ``stdin``.
  299. """
  300. config_data = DEFAULT_CONFIG.copy()
  301. if output and output != "-":
  302. with open(output, "w") as fh:
  303. fh.write(tools.pretty_json(config_data))
  304. else:
  305. print(tools.pretty_json(config_data))