flatisfy/flatisfy/fetch.py

301 lines
9.4 KiB
Python

# coding: utf-8
"""
This module contains all the code related to fetching and loading flats lists.
"""
from __future__ import absolute_import, print_function, unicode_literals
import itertools
import json
import logging
from flatisfy import database
from flatisfy import tools
from flatisfy.models import flat as flat_model
LOGGER = logging.getLogger(__name__)
try:
from weboob.capabilities.housing import Query
from weboob.core.bcall import CallErrors
from weboob.core.ouiboube import WebNip
from weboob.tools.json import WeboobEncoder
except ImportError:
LOGGER.error("Weboob is not available on your system. Make sure you "
"installed it.")
raise
class WeboobProxy(object):
"""
Wrapper around Weboob ``WebNip`` class, to fetch housing posts without
having to spawn a subprocess.
"""
@staticmethod
def version():
"""
Get Weboob version.
:return: The installed Weboob version.
"""
return WebNip.VERSION
@staticmethod
def restore_decimal_fields(flat):
"""
Parse fields expected to be in Decimal type to float. They were dumped
as str in the JSON dump process.
:param flat: A flat dict.
:return: A flat dict with Decimal fields converted to float.
"""
for field in ["area", "cost", "rooms", "bedrooms", "price_per_meter"]:
try:
flat[field] = float(flat[field])
except (TypeError, ValueError):
flat[field] = None
except KeyError:
pass
return flat
def __init__(self, config):
"""
Create a Weboob handle and try to load the modules.
:param config: A config dict.
"""
# Default backends
if not config["backends"]:
backends = ["seloger", "pap", "leboncoin", "logicimmo",
"explorimmo", "entreparticuliers"]
else:
backends = config["backends"]
# Create base WebNip object
self.webnip = WebNip(modules_path=config["modules_path"])
# Create backends
self.backends = [
self.webnip.load_backend(
module,
module,
params={}
)
for module in backends
]
def __enter__(self):
return self
def __exit__(self, *args):
self.webnip.deinit()
def build_queries(self, constraints_dict):
"""
Build Weboob ``weboob.capabilities.housing.Query`` objects from the
constraints defined in the configuration. Each query has at most 3
postal codes, to comply with housing websites limitations.
:param constraints_dict: A dictionary of constraints, as defined in the
config.
:return: A list of Weboob ``weboob.capabilities.housing.Query``
objects. Returns ``None`` if an error occurred.
"""
queries = []
for postal_codes in tools.batch(constraints_dict["postal_codes"], 3):
query = Query()
query.cities = []
for postal_code in postal_codes:
matching_cities = []
try:
for city in self.webnip.do("search_city", postal_code):
matching_cities.append(city)
except CallErrors as exc:
# If an error occured, just log it
LOGGER.error(
(
"An error occured while building query for "
"postal code %s: %s"
),
postal_code,
str(exc)
)
if not matching_cities:
# If postal code gave no match, warn the user
LOGGER.warn(
"Postal code %s could not be matched with a city.",
postal_code
)
# Append the matched cities to the query
for city in matching_cities:
query.cities.append(city)
try:
query.house_types = [
getattr(
Query.HOUSE_TYPES,
house_type.upper()
)
for house_type in constraints_dict["house_types"]
]
except AttributeError:
LOGGER.error("Invalid house types constraint.")
return None
try:
query.type = getattr(
Query,
"TYPE_{}".format(constraints_dict["type"].upper())
)
except AttributeError:
LOGGER.error("Invalid post type constraint.")
return None
query.area_min = constraints_dict["area"][0]
query.area_max = constraints_dict["area"][1]
query.cost_min = constraints_dict["cost"][0]
query.cost_max = constraints_dict["cost"][1]
query.nb_rooms = constraints_dict["rooms"][0]
queries.append(query)
return queries
def query(self, query, max_entries=None):
"""
Fetch the housings posts matching a given Weboob query.
:param query: A Weboob `weboob.capabilities.housing.Query`` object.
:param max_entries: Maximum number of entries to fetch.
:return: The matching housing posts, dumped as a list of JSON objects.
"""
housings = []
# TODO: Handle max_entries better
try:
for housing in itertools.islice(
self.webnip.do('search_housings', query),
max_entries
):
housings.append(json.dumps(housing, cls=WeboobEncoder))
except CallErrors as exc:
# If an error occured, just log it
LOGGER.error(
"An error occured while fetching the housing posts: %s",
str(exc)
)
return housings
def info(self, full_flat_id):
"""
Get information (details) about an housing post.
:param full_flat_id: A Weboob housing post id, in complete form
(ID@BACKEND)
:return: The details in JSON.
"""
flat_id, backend_name = full_flat_id.rsplit("@", 1)
try:
backend = next(
backend
for backend in self.backends
if backend.name == backend_name
)
except StopIteration:
LOGGER.error("Backend %s is not available.", backend_name)
return "{}"
try:
housing = backend.get_housing(flat_id)
# Otherwise, we miss the @backend afterwards
housing.id = full_flat_id
return json.dumps(housing, cls=WeboobEncoder)
except CallErrors as exc:
# If an error occured, just log it
LOGGER.error(
"An error occured while fetching housing %s: %s",
full_flat_id,
str(exc)
)
def fetch_flats_list(config):
"""
Fetch the available flats using the Flatboob / Weboob config.
:param config: A config dict.
:return: A list of all available flats.
"""
flats_list = []
with WeboobProxy(config) as weboob_proxy:
LOGGER.info("Loading flats...")
queries = weboob_proxy.build_queries(config["constraints"])
housing_posts = []
for query in queries:
housing_posts.extend(
weboob_proxy.query(query, config["max_entries"])
)
LOGGER.info("Fetched %d flats.", len(housing_posts))
flats_list = [json.loads(flat) for flat in housing_posts]
flats_list = [WeboobProxy.restore_decimal_fields(flat)
for flat in flats_list]
return flats_list
def fetch_details(config, flat_id):
"""
Fetch the additional details for a flat using Flatboob / Weboob.
:param config: A config dict.
:param flat_id: ID of the flat to fetch details for.
:return: A flat dict with all the available data.
"""
with WeboobProxy(config) as weboob_proxy:
LOGGER.info("Loading additional details for flat %s.", flat_id)
weboob_output = weboob_proxy.info(flat_id)
flat_details = json.loads(weboob_output)
flat_details = WeboobProxy.restore_decimal_fields(flat_details)
LOGGER.info("Fetched details for flat %s.", flat_id)
return flat_details
def load_flats_list_from_file(json_file):
"""
Load a dumped flats list from JSON file.
:param json_file: The file to load housings list from.
:return: A list of all the flats in the dump file.
"""
flats_list = []
try:
LOGGER.info("Loading flats list from file %s", json_file)
with open(json_file, "r") as fh:
flats_list = json.load(fh)
LOGGER.info("Found %d flats.", len(flats_list))
except (IOError, ValueError):
LOGGER.error("File %s is not a valid dump file.", json_file)
return flats_list
def load_flats_list_from_db(config):
"""
Load flats from database.
:param config: A config dict.
:return: A list of all the flats in the database.
"""
flats_list = []
get_session = database.init_db(config["database"], config["search_index"])
with get_session() as session:
# TODO: Better serialization
flats_list = [flat.json_api_repr()
for flat in session.query(flat_model.Flat).all()]
return flats_list