Flatisfy is your new companion to ease your search of a new housing :)

fetch.py 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. # coding: utf-8
  2. """
  3. This module contains all the code related to fetching and loading flats lists.
  4. """
  5. from __future__ import absolute_import, print_function, unicode_literals
  6. from builtins import str
  7. import collections
  8. import itertools
  9. import json
  10. import logging
  11. from flatisfy import database
  12. from flatisfy import tools
  13. from flatisfy.constants import BACKENDS_BY_PRECEDENCE
  14. from flatisfy.models import flat as flat_model
  15. LOGGER = logging.getLogger(__name__)
  16. try:
  17. from weboob.capabilities.housing import Query, POSTS_TYPES, HOUSE_TYPES
  18. from weboob.core.bcall import CallErrors
  19. from weboob.core.ouiboube import WebNip
  20. from weboob.tools.json import WeboobEncoder
  21. except ImportError:
  22. LOGGER.error("Weboob is not available on your system. Make sure you installed it.")
  23. raise
  24. class WebOOBProxy(object):
  25. """
  26. Wrapper around WebOOB ``WebNip`` class, to fetch housing posts without
  27. having to spawn a subprocess.
  28. """
  29. @staticmethod
  30. def version():
  31. """
  32. Get WebOOB version.
  33. :return: The installed WebOOB version.
  34. """
  35. return WebNip.VERSION
  36. @staticmethod
  37. def restore_decimal_fields(flat):
  38. """
  39. Parse fields expected to be in Decimal type to float. They were dumped
  40. as str in the JSON dump process.
  41. :param flat: A flat dict.
  42. :return: A flat dict with Decimal fields converted to float.
  43. """
  44. for field in ["area", "cost", "rooms", "bedrooms", "price_per_meter"]:
  45. try:
  46. flat[field] = float(flat[field])
  47. except (TypeError, ValueError):
  48. flat[field] = None
  49. except KeyError:
  50. pass
  51. return flat
  52. def __init__(self, config):
  53. """
  54. Create a WebOOB handle and try to load the modules.
  55. :param config: A config dict.
  56. """
  57. # Default backends
  58. if not config["backends"]:
  59. backends = BACKENDS_BY_PRECEDENCE
  60. else:
  61. backends = config["backends"]
  62. # Create base WebNip object
  63. self.webnip = WebNip(modules_path=config["modules_path"])
  64. # Create backends
  65. self.backends = [self.webnip.load_backend(module, module, params={}) for module in backends]
  66. def __enter__(self):
  67. return self
  68. def __exit__(self, *args):
  69. self.webnip.deinit()
  70. def build_queries(self, constraints_dict):
  71. """
  72. Build WebOOB ``weboob.capabilities.housing.Query`` objects from the
  73. constraints defined in the configuration. Each query has at most 3
  74. cities, to comply with housing websites limitations.
  75. :param constraints_dict: A dictionary of constraints, as defined in the
  76. config.
  77. :return: A list of WebOOB ``weboob.capabilities.housing.Query``
  78. objects. Returns ``None`` if an error occurred.
  79. """
  80. queries = []
  81. # First, find all matching cities for the postal codes in constraints
  82. matching_cities = []
  83. for postal_code in constraints_dict["postal_codes"]:
  84. try:
  85. for city in self.webnip.do("search_city", postal_code):
  86. matching_cities.append(city)
  87. except CallErrors as exc:
  88. # If an error occured, just log it
  89. LOGGER.error(
  90. ("An error occured while building query for postal code %s: %s"),
  91. postal_code,
  92. str(exc),
  93. )
  94. if not matching_cities:
  95. # If postal code gave no match, warn the user
  96. LOGGER.warn("Postal code %s could not be matched with a city.", postal_code)
  97. # Remove "TOUTES COMMUNES" entry which are duplicates of the individual
  98. # cities entries in Logicimmo module.
  99. matching_cities = [
  100. city
  101. for city in matching_cities
  102. if not (city.backend == "logicimmo" and city.name.startswith("TOUTES COMMUNES"))
  103. ]
  104. # Then, build queries by grouping cities by at most 3
  105. for cities_batch in tools.batch(matching_cities, 3):
  106. query = Query()
  107. query.cities = list(cities_batch)
  108. try:
  109. query.house_types = [
  110. getattr(HOUSE_TYPES, house_type.upper()) for house_type in constraints_dict["house_types"]
  111. ]
  112. except AttributeError:
  113. LOGGER.error("Invalid house types constraint.")
  114. return None
  115. try:
  116. query.type = getattr(POSTS_TYPES, constraints_dict["type"].upper())
  117. except AttributeError:
  118. LOGGER.error("Invalid post type constraint.")
  119. return None
  120. query.area_min = constraints_dict["area"][0]
  121. query.area_max = constraints_dict["area"][1]
  122. query.cost_min = constraints_dict["cost"][0]
  123. query.cost_max = constraints_dict["cost"][1]
  124. query.nb_rooms = constraints_dict["rooms"][0]
  125. queries.append(query)
  126. return queries
  127. def query(self, query, max_entries=None, store_personal_data=False):
  128. """
  129. Fetch the housings posts matching a given WebOOB query.
  130. :param query: A WebOOB `weboob.capabilities.housing.Query`` object.
  131. :param max_entries: Maximum number of entries to fetch.
  132. :param store_personal_data: Whether personal data should be fetched
  133. from housing posts (phone number etc).
  134. :return: The matching housing posts, dumped as a list of JSON objects.
  135. """
  136. housings = []
  137. # List the useful backends for this specific query
  138. useful_backends = [x.backend for x in query.cities]
  139. # TODO: Handle max_entries better
  140. try:
  141. for housing in itertools.islice(
  142. self.webnip.do(
  143. "search_housings",
  144. query,
  145. # Only run the call on the required backends.
  146. # Otherwise, WebOOB is doing weird stuff and returning
  147. # nonsense.
  148. backends=[x for x in self.backends if x.name in useful_backends],
  149. ),
  150. max_entries,
  151. ):
  152. if not store_personal_data:
  153. housing.phone = None
  154. housings.append(json.dumps(housing, cls=WeboobEncoder))
  155. except CallErrors as exc:
  156. # If an error occured, just log it
  157. LOGGER.error("An error occured while fetching the housing posts: %s", str(exc))
  158. return housings
  159. def info(self, full_flat_id, store_personal_data=False):
  160. """
  161. Get information (details) about an housing post.
  162. :param full_flat_id: A WebOOB housing post id, in complete form
  163. (ID@BACKEND)
  164. :param store_personal_data: Whether personal data should be fetched
  165. from housing posts (phone number etc).
  166. :return: The details in JSON.
  167. """
  168. flat_id, backend_name = full_flat_id.rsplit("@", 1)
  169. try:
  170. backend = next(backend for backend in self.backends if backend.name == backend_name)
  171. except StopIteration:
  172. LOGGER.error("Backend %s is not available.", backend_name)
  173. return "{}"
  174. try:
  175. housing = backend.get_housing(flat_id)
  176. if not store_personal_data:
  177. # Ensure phone is cleared
  178. housing.phone = None
  179. else:
  180. # Ensure phone is fetched
  181. backend.fillobj(housing, "phone")
  182. # Otherwise, we miss the @backend afterwards
  183. housing.id = full_flat_id
  184. return json.dumps(housing, cls=WeboobEncoder)
  185. except Exception as exc: # pylint: disable=broad-except
  186. # If an error occured, just log it
  187. LOGGER.error("An error occured while fetching housing %s: %s", full_flat_id, str(exc))
  188. return "{}"
  189. def fetch_flats(config):
  190. """
  191. Fetch the available flats using the Flatboob / WebOOB config.
  192. :param config: A config dict.
  193. :return: A dict mapping constraint in config to all available matching
  194. flats.
  195. """
  196. fetched_flats = {}
  197. for constraint_name, constraint in config["constraints"].items():
  198. LOGGER.info("Loading flats for constraint %s...", constraint_name)
  199. with WebOOBProxy(config) as webOOB_proxy:
  200. queries = webOOB_proxy.build_queries(constraint)
  201. housing_posts = []
  202. for query in queries:
  203. housing_posts.extend(webOOB_proxy.query(query, config["max_entries"], config["store_personal_data"]))
  204. housing_posts = housing_posts[: config["max_entries"]]
  205. LOGGER.info("Fetched %d flats.", len(housing_posts))
  206. constraint_flats_list = [json.loads(flat) for flat in housing_posts]
  207. constraint_flats_list = [WebOOBProxy.restore_decimal_fields(flat) for flat in constraint_flats_list]
  208. fetched_flats[constraint_name] = constraint_flats_list
  209. return fetched_flats
  210. def fetch_details(config, flat_id):
  211. """
  212. Fetch the additional details for a flat using Flatboob / WebOOB.
  213. :param config: A config dict.
  214. :param flat_id: ID of the flat to fetch details for.
  215. :return: A flat dict with all the available data.
  216. """
  217. with WebOOBProxy(config) as webOOB_proxy:
  218. LOGGER.info("Loading additional details for flat %s.", flat_id)
  219. webOOB_output = webOOB_proxy.info(flat_id, config["store_personal_data"])
  220. flat_details = json.loads(webOOB_output)
  221. flat_details = WebOOBProxy.restore_decimal_fields(flat_details)
  222. LOGGER.info("Fetched details for flat %s.", flat_id)
  223. return flat_details
  224. def load_flats_from_file(json_file, config):
  225. """
  226. Load a dumped flats list from JSON file.
  227. :param json_file: The file to load housings list from.
  228. :return: A dict mapping constraint in config to all available matching
  229. flats.
  230. .. note::
  231. As we do not know which constraint is met by a given flat, all the
  232. flats are returned for any available constraint, and they will be
  233. filtered out afterwards.
  234. """
  235. flats_list = []
  236. try:
  237. LOGGER.info("Loading flats list from file %s", json_file)
  238. with open(json_file, "r") as fh:
  239. flats_list = json.load(fh)
  240. LOGGER.info("Found %d flats.", len(flats_list))
  241. except (IOError, ValueError):
  242. LOGGER.error("File %s is not a valid dump file.", json_file)
  243. return {constraint_name: flats_list for constraint_name in config["constraints"]}
  244. def load_flats_from_db(config):
  245. """
  246. Load flats from database.
  247. :param config: A config dict.
  248. :return: A dict mapping constraint in config to all available matching
  249. flats.
  250. """
  251. get_session = database.init_db(config["database"], config["search_index"])
  252. loaded_flats = collections.defaultdict(list)
  253. with get_session() as session:
  254. for flat in session.query(flat_model.Flat).all():
  255. loaded_flats[flat.flatisfy_constraint].append(flat.json_api_repr())
  256. return loaded_flats