flatisfy/flatisfy/fetch.py
Phyks (Lucas Verney) 7fd44b7e8f No longer make queries with more than three cities
Some postal codes correspond to multiple cities, then sometimes we were
spawning queries with more than three cities. This should no longer be
the case. This fixes https://github.com/Phyks/Flatisfy/issues/10.
2017-10-24 15:50:19 -04:00

315 lines
10 KiB
Python

# coding: utf-8
"""
This module contains all the code related to fetching and loading flats lists.
"""
from __future__ import absolute_import, print_function, unicode_literals
import collections
import itertools
import json
import logging
from flatisfy import database
from flatisfy import tools
from flatisfy.models import flat as flat_model
LOGGER = logging.getLogger(__name__)
try:
from weboob.capabilities.housing import Query
from weboob.core.bcall import CallErrors
from weboob.core.ouiboube import WebNip
from weboob.tools.json import WeboobEncoder
except ImportError:
LOGGER.error("Weboob is not available on your system. Make sure you "
"installed it.")
raise
class WeboobProxy(object):
"""
Wrapper around Weboob ``WebNip`` class, to fetch housing posts without
having to spawn a subprocess.
"""
@staticmethod
def version():
"""
Get Weboob version.
:return: The installed Weboob version.
"""
return WebNip.VERSION
@staticmethod
def restore_decimal_fields(flat):
"""
Parse fields expected to be in Decimal type to float. They were dumped
as str in the JSON dump process.
:param flat: A flat dict.
:return: A flat dict with Decimal fields converted to float.
"""
for field in ["area", "cost", "rooms", "bedrooms", "price_per_meter"]:
try:
flat[field] = float(flat[field])
except (TypeError, ValueError):
flat[field] = None
except KeyError:
pass
return flat
def __init__(self, config):
"""
Create a Weboob handle and try to load the modules.
:param config: A config dict.
"""
# Default backends
if not config["backends"]:
backends = ["foncia", "seloger", "pap", "leboncoin", "logicimmo",
"explorimmo", "entreparticuliers"]
else:
backends = config["backends"]
# Create base WebNip object
self.webnip = WebNip(modules_path=config["modules_path"])
# Create backends
self.backends = [
self.webnip.load_backend(
module,
module,
params={}
)
for module in backends
]
def __enter__(self):
return self
def __exit__(self, *args):
self.webnip.deinit()
def build_queries(self, constraints_dict):
"""
Build Weboob ``weboob.capabilities.housing.Query`` objects from the
constraints defined in the configuration. Each query has at most 3
cities, to comply with housing websites limitations.
:param constraints_dict: A dictionary of constraints, as defined in the
config.
:return: A list of Weboob ``weboob.capabilities.housing.Query``
objects. Returns ``None`` if an error occurred.
"""
queries = []
# First, find all matching cities for the postal codes in constraints
matching_cities = []
for postal_code in constraints_dict["postal_codes"]:
try:
for city in self.webnip.do("search_city", postal_code):
matching_cities.append(city)
except CallErrors as exc:
# If an error occured, just log it
LOGGER.error(
(
"An error occured while building query for "
"postal code %s: %s"
),
postal_code,
str(exc)
)
if not matching_cities:
# If postal code gave no match, warn the user
LOGGER.warn(
"Postal code %s could not be matched with a city.",
postal_code
)
# Then, build queries by grouping cities by at most 3
for cities_batch in tools.batch(matching_cities, 3):
query = Query()
query.cities = list(cities_batch)
try:
query.house_types = [
getattr(
Query.HOUSE_TYPES,
house_type.upper()
)
for house_type in constraints_dict["house_types"]
]
except AttributeError:
LOGGER.error("Invalid house types constraint.")
return None
try:
query.type = getattr(
Query,
"TYPE_{}".format(constraints_dict["type"].upper())
)
except AttributeError:
LOGGER.error("Invalid post type constraint.")
return None
query.area_min = constraints_dict["area"][0]
query.area_max = constraints_dict["area"][1]
query.cost_min = constraints_dict["cost"][0]
query.cost_max = constraints_dict["cost"][1]
query.nb_rooms = constraints_dict["rooms"][0]
queries.append(query)
return queries
def query(self, query, max_entries=None):
"""
Fetch the housings posts matching a given Weboob query.
:param query: A Weboob `weboob.capabilities.housing.Query`` object.
:param max_entries: Maximum number of entries to fetch.
:return: The matching housing posts, dumped as a list of JSON objects.
"""
housings = []
# TODO: Handle max_entries better
try:
for housing in itertools.islice(
self.webnip.do('search_housings', query),
max_entries
):
housings.append(json.dumps(housing, cls=WeboobEncoder))
except CallErrors as exc:
# If an error occured, just log it
LOGGER.error(
"An error occured while fetching the housing posts: %s",
str(exc)
)
return housings
def info(self, full_flat_id):
"""
Get information (details) about an housing post.
:param full_flat_id: A Weboob housing post id, in complete form
(ID@BACKEND)
:return: The details in JSON.
"""
flat_id, backend_name = full_flat_id.rsplit("@", 1)
try:
backend = next(
backend
for backend in self.backends
if backend.name == backend_name
)
except StopIteration:
LOGGER.error("Backend %s is not available.", backend_name)
return "{}"
try:
housing = backend.get_housing(flat_id)
# Otherwise, we miss the @backend afterwards
housing.id = full_flat_id
return json.dumps(housing, cls=WeboobEncoder)
except Exception as exc: # pylint: disable=broad-except
# If an error occured, just log it
LOGGER.error(
"An error occured while fetching housing %s: %s",
full_flat_id,
str(exc)
)
return "{}"
def fetch_flats(config):
"""
Fetch the available flats using the Flatboob / Weboob config.
:param config: A config dict.
:return: A dict mapping constraint in config to all available matching
flats.
"""
fetched_flats = {}
for constraint_name, constraint in config["constraints"].items():
LOGGER.info("Loading flats for constraint %s...", constraint_name)
with WeboobProxy(config) as weboob_proxy:
queries = weboob_proxy.build_queries(constraint)
housing_posts = []
for query in queries:
housing_posts.extend(
weboob_proxy.query(query, config["max_entries"])
)
LOGGER.info("Fetched %d flats.", len(housing_posts))
constraint_flats_list = [json.loads(flat) for flat in housing_posts]
constraint_flats_list = [WeboobProxy.restore_decimal_fields(flat)
for flat in constraint_flats_list]
fetched_flats[constraint_name] = constraint_flats_list
return fetched_flats
def fetch_details(config, flat_id):
"""
Fetch the additional details for a flat using Flatboob / Weboob.
:param config: A config dict.
:param flat_id: ID of the flat to fetch details for.
:return: A flat dict with all the available data.
"""
with WeboobProxy(config) as weboob_proxy:
LOGGER.info("Loading additional details for flat %s.", flat_id)
weboob_output = weboob_proxy.info(flat_id)
flat_details = json.loads(weboob_output)
flat_details = WeboobProxy.restore_decimal_fields(flat_details)
LOGGER.info("Fetched details for flat %s.", flat_id)
return flat_details
def load_flats_from_file(json_file, config):
"""
Load a dumped flats list from JSON file.
:param json_file: The file to load housings list from.
:return: A dict mapping constraint in config to all available matching
flats.
.. note::
As we do not know which constraint is met by a given flat, all the
flats are returned for any available constraint, and they will be
filtered out afterwards.
"""
flats_list = []
try:
LOGGER.info("Loading flats list from file %s", json_file)
with open(json_file, "r") as fh:
flats_list = json.load(fh)
LOGGER.info("Found %d flats.", len(flats_list))
except (IOError, ValueError):
LOGGER.error("File %s is not a valid dump file.", json_file)
return {
constraint_name: flats_list
for constraint_name in config["constraints"]
}
def load_flats_from_db(config):
"""
Load flats from database.
:param config: A config dict.
:return: A dict mapping constraint in config to all available matching
flats.
"""
get_session = database.init_db(config["database"], config["search_index"])
loaded_flats = collections.defaultdict(list)
with get_session() as session:
for flat in session.query(flat_model.Flat).all():
loaded_flats[flat.flatisfy_constraint].append(flat.json_api_repr())
return loaded_flats