2017-04-03 17:29:29 +02:00
|
|
|
# coding: utf-8
|
|
|
|
"""
|
|
|
|
Main commands available for flatisfy.
|
|
|
|
"""
|
|
|
|
from __future__ import absolute_import, print_function, unicode_literals
|
|
|
|
|
2017-04-27 17:08:10 +02:00
|
|
|
import collections
|
2017-04-13 23:24:31 +02:00
|
|
|
import logging
|
|
|
|
|
2017-04-03 17:29:29 +02:00
|
|
|
import flatisfy.filters
|
|
|
|
from flatisfy import database
|
2017-06-13 18:19:16 +02:00
|
|
|
from flatisfy import email
|
2017-04-03 17:29:29 +02:00
|
|
|
from flatisfy.models import flat as flat_model
|
2017-06-15 15:48:16 +02:00
|
|
|
from flatisfy.models import postal_code as postal_code_model
|
|
|
|
from flatisfy.models import public_transport as public_transport_model
|
2017-04-03 17:29:29 +02:00
|
|
|
from flatisfy import fetch
|
|
|
|
from flatisfy import tools
|
2017-04-27 17:08:10 +02:00
|
|
|
from flatisfy.filters import metadata
|
2017-04-03 17:29:29 +02:00
|
|
|
from flatisfy.web import app as web_app
|
|
|
|
|
|
|
|
|
2017-04-13 23:24:31 +02:00
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2017-06-16 16:21:13 +02:00
|
|
|
def filter_flats_list(config, constraint_name, flats_list, fetch_details=True):
|
2017-04-03 17:29:29 +02:00
|
|
|
"""
|
2017-04-27 16:37:39 +02:00
|
|
|
Filter the available flats list. Then, filter it according to criteria.
|
2017-04-03 17:29:29 +02:00
|
|
|
|
|
|
|
:param config: A config dict.
|
2017-06-16 16:21:13 +02:00
|
|
|
:param constraint_name: The constraint name that the ``flats_list`` should
|
|
|
|
satisfy.
|
2017-04-27 16:37:39 +02:00
|
|
|
:param fetch_details: Whether additional details should be fetched between
|
|
|
|
the two passes.
|
|
|
|
:param flats_list: The initial list of flat objects to filter.
|
2017-04-27 17:08:10 +02:00
|
|
|
:return: A dict mapping flat status and list of flat objects.
|
2017-04-03 17:29:29 +02:00
|
|
|
"""
|
2017-04-27 17:08:10 +02:00
|
|
|
# Add the flatisfy metadata entry and prepare the flat objects
|
2017-06-16 16:21:13 +02:00
|
|
|
flats_list = metadata.init(flats_list, constraint_name)
|
|
|
|
|
|
|
|
# Get the associated constraint from config
|
|
|
|
try:
|
|
|
|
constraint = config["constraints"][constraint_name]
|
|
|
|
except KeyError:
|
2017-06-19 16:08:06 +02:00
|
|
|
LOGGER.error(
|
|
|
|
"Missing constraint %s. Skipping filtering for these posts.",
|
|
|
|
constraint_name
|
|
|
|
)
|
|
|
|
return {
|
|
|
|
"new": [],
|
|
|
|
"duplicate": [],
|
|
|
|
"ignored": []
|
|
|
|
}
|
2017-04-27 17:08:10 +02:00
|
|
|
|
|
|
|
first_pass_result = collections.defaultdict(list)
|
|
|
|
second_pass_result = collections.defaultdict(list)
|
2017-05-04 20:52:10 +02:00
|
|
|
third_pass_result = collections.defaultdict(list)
|
2017-04-03 17:29:29 +02:00
|
|
|
# Do a first pass with the available infos to try to remove as much
|
|
|
|
# unwanted postings as possible
|
|
|
|
if config["passes"] > 0:
|
2017-04-27 17:08:10 +02:00
|
|
|
first_pass_result = flatisfy.filters.first_pass(flats_list,
|
2017-06-16 16:21:13 +02:00
|
|
|
constraint,
|
2017-04-27 17:08:10 +02:00
|
|
|
config)
|
|
|
|
else:
|
|
|
|
first_pass_result["new"] = flats_list
|
|
|
|
|
|
|
|
# Load additional infos
|
|
|
|
if fetch_details:
|
|
|
|
for i, flat in enumerate(first_pass_result["new"]):
|
|
|
|
details = fetch.fetch_details(config, flat["id"])
|
|
|
|
first_pass_result["new"][i] = tools.merge_dicts(flat, details)
|
2017-04-03 17:29:29 +02:00
|
|
|
|
|
|
|
# Do a second pass to consolidate all the infos we found and make use of
|
|
|
|
# additional infos
|
|
|
|
if config["passes"] > 1:
|
2017-04-27 17:08:10 +02:00
|
|
|
second_pass_result = flatisfy.filters.second_pass(
|
2017-06-16 16:21:13 +02:00
|
|
|
first_pass_result["new"], constraint, config
|
2017-04-03 17:29:29 +02:00
|
|
|
)
|
2017-04-27 17:08:10 +02:00
|
|
|
else:
|
|
|
|
second_pass_result["new"] = first_pass_result["new"]
|
2017-04-03 17:29:29 +02:00
|
|
|
|
2017-05-04 20:52:10 +02:00
|
|
|
# Do a third pass to deduplicate better
|
|
|
|
if config["passes"] > 2:
|
|
|
|
third_pass_result = flatisfy.filters.third_pass(
|
|
|
|
second_pass_result["new"], config
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
third_pass_result["new"] = second_pass_result["new"]
|
|
|
|
|
2017-04-27 17:08:10 +02:00
|
|
|
return {
|
2017-05-04 20:52:10 +02:00
|
|
|
"new": third_pass_result["new"],
|
2017-04-28 20:59:46 +02:00
|
|
|
"duplicate": (
|
|
|
|
first_pass_result["duplicate"] +
|
2017-05-04 20:52:10 +02:00
|
|
|
second_pass_result["duplicate"] +
|
|
|
|
third_pass_result["duplicate"]
|
2017-04-28 20:59:46 +02:00
|
|
|
),
|
2017-04-27 17:08:10 +02:00
|
|
|
"ignored": (
|
2017-05-04 20:52:10 +02:00
|
|
|
first_pass_result["ignored"] +
|
|
|
|
second_pass_result["ignored"] +
|
|
|
|
third_pass_result["ignored"]
|
2017-04-27 17:08:10 +02:00
|
|
|
)
|
|
|
|
}
|
2017-04-03 17:29:29 +02:00
|
|
|
|
|
|
|
|
2017-06-16 16:21:13 +02:00
|
|
|
def filter_fetched_flats(config, fetched_flats, fetch_details=True):
|
|
|
|
"""
|
|
|
|
Filter the available flats list. Then, filter it according to criteria.
|
|
|
|
|
|
|
|
:param config: A config dict.
|
|
|
|
:param fetch_details: Whether additional details should be fetched between
|
|
|
|
the two passes.
|
|
|
|
:param fetched_flats: The initial dict mapping constraints to the list of
|
|
|
|
fetched flat objects to filter.
|
|
|
|
:return: A dict mapping constraints to a dict mapping flat status and list
|
|
|
|
of flat objects.
|
|
|
|
"""
|
|
|
|
for constraint_name, flats_list in fetched_flats.items():
|
|
|
|
fetched_flats[constraint_name] = filter_flats_list(
|
|
|
|
config,
|
|
|
|
constraint_name,
|
|
|
|
flats_list,
|
|
|
|
fetch_details
|
|
|
|
)
|
|
|
|
return fetched_flats
|
|
|
|
|
|
|
|
|
2017-04-27 16:37:39 +02:00
|
|
|
def import_and_filter(config, load_from_db=False):
|
2017-04-03 17:29:29 +02:00
|
|
|
"""
|
|
|
|
Fetch the available flats list. Then, filter it according to criteria.
|
|
|
|
Finally, store it in the database.
|
|
|
|
|
|
|
|
:param config: A config dict.
|
2017-04-27 16:37:39 +02:00
|
|
|
:param load_from_db: Whether to load flats from database or fetch them
|
|
|
|
using Weboob.
|
2017-04-03 17:29:29 +02:00
|
|
|
:return: ``None``.
|
|
|
|
"""
|
|
|
|
# Fetch and filter flats list
|
2017-04-27 16:37:39 +02:00
|
|
|
if load_from_db:
|
2017-06-16 16:21:13 +02:00
|
|
|
fetched_flats = fetch.load_flats_from_db(config)
|
2017-04-27 16:37:39 +02:00
|
|
|
else:
|
2017-06-16 16:21:13 +02:00
|
|
|
fetched_flats = fetch.fetch_flats(config)
|
2017-05-03 08:54:03 +02:00
|
|
|
# Do not fetch additional details if we loaded data from the db.
|
2017-06-16 16:21:13 +02:00
|
|
|
flats_by_status = filter_fetched_flats(config, fetched_flats=fetched_flats,
|
|
|
|
fetch_details=(not load_from_db))
|
2017-04-03 17:29:29 +02:00
|
|
|
# Create database connection
|
2017-05-02 18:35:34 +02:00
|
|
|
get_session = database.init_db(config["database"], config["search_index"])
|
2017-04-03 17:29:29 +02:00
|
|
|
|
2017-06-13 18:19:16 +02:00
|
|
|
new_flats = []
|
|
|
|
|
2017-05-02 16:05:15 +02:00
|
|
|
LOGGER.info("Merging fetched flats in database...")
|
2017-06-16 16:21:13 +02:00
|
|
|
# Flatten the flats_by_status dict
|
|
|
|
flatten_flats_by_status = collections.defaultdict(list)
|
|
|
|
for flats in flats_by_status.values():
|
|
|
|
for status, flats_list in flats.items():
|
|
|
|
flatten_flats_by_status[status].extend(flats_list)
|
|
|
|
|
2017-04-03 17:29:29 +02:00
|
|
|
with get_session() as session:
|
2017-06-16 16:21:13 +02:00
|
|
|
for status, flats_list in flatten_flats_by_status.items():
|
2017-05-02 16:05:15 +02:00
|
|
|
# Build SQLAlchemy Flat model objects for every available flat
|
|
|
|
flats_objects = {
|
|
|
|
flat_dict["id"]: flat_model.Flat.from_dict(flat_dict)
|
|
|
|
for flat_dict in flats_list
|
|
|
|
}
|
|
|
|
|
|
|
|
if flats_objects:
|
|
|
|
# If there are some flats, try to merge them with the ones in
|
|
|
|
# db
|
|
|
|
existing_flats_queries = session.query(flat_model.Flat).filter(
|
|
|
|
flat_model.Flat.id.in_(flats_objects.keys())
|
|
|
|
)
|
|
|
|
for each in existing_flats_queries.all():
|
|
|
|
# For each flat to merge, take care not to overwrite the
|
|
|
|
# status if the user defined it
|
|
|
|
flat_object = flats_objects[each.id]
|
|
|
|
if each.status in flat_model.AUTOMATED_STATUSES:
|
|
|
|
flat_object.status = getattr(
|
|
|
|
flat_model.FlatStatus, status
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
flat_object.status = each.status
|
|
|
|
# For each flat already in the db, merge it (UPDATE)
|
|
|
|
# instead of adding it
|
|
|
|
session.merge(flats_objects.pop(each.id))
|
2017-05-04 15:33:20 +02:00
|
|
|
|
|
|
|
# For any other flat, it is not already in the database, so we can
|
|
|
|
# just set the status field without worrying
|
|
|
|
for flat in flats_objects.values():
|
|
|
|
flat.status = getattr(flat_model.FlatStatus, status)
|
2017-06-13 18:19:16 +02:00
|
|
|
if flat.status == flat_model.FlatStatus.new:
|
|
|
|
new_flats.append(flat)
|
2017-05-04 15:33:20 +02:00
|
|
|
|
2017-05-02 16:05:15 +02:00
|
|
|
session.add_all(flats_objects.values())
|
2017-06-13 18:19:16 +02:00
|
|
|
|
|
|
|
if config["send_email"]:
|
|
|
|
email.send_notification(config, new_flats)
|
|
|
|
|
2017-05-02 16:05:15 +02:00
|
|
|
LOGGER.info("Done!")
|
2017-04-03 17:29:29 +02:00
|
|
|
|
|
|
|
|
2017-04-13 23:24:31 +02:00
|
|
|
def purge_db(config):
|
|
|
|
"""
|
|
|
|
Purge the database.
|
|
|
|
|
|
|
|
:param config: A config dict.
|
|
|
|
:return: ``None``
|
|
|
|
"""
|
2017-05-02 18:35:34 +02:00
|
|
|
get_session = database.init_db(config["database"], config["search_index"])
|
2017-04-13 23:24:31 +02:00
|
|
|
|
|
|
|
with get_session() as session:
|
|
|
|
# Delete every flat in the db
|
|
|
|
LOGGER.info("Purge all flats from the database.")
|
2017-05-02 18:35:34 +02:00
|
|
|
for flat in session.query(flat_model.Flat).all():
|
|
|
|
# Use (slower) deletion by object, to ensure whoosh index is
|
|
|
|
# updated
|
|
|
|
session.delete(flat)
|
2017-06-15 15:48:16 +02:00
|
|
|
LOGGER.info("Purge all postal codes from the database.")
|
|
|
|
session.query(postal_code_model.PostalCode).delete()
|
|
|
|
LOGGER.info("Purge all public transportations from the database.")
|
|
|
|
session.query(public_transport_model.PublicTransport).delete()
|
2017-04-13 23:24:31 +02:00
|
|
|
|
|
|
|
|
2017-04-03 17:29:29 +02:00
|
|
|
def serve(config):
|
|
|
|
"""
|
|
|
|
Serve the web app.
|
|
|
|
|
|
|
|
:param config: A config dict.
|
|
|
|
:return: ``None``, long-running process.
|
|
|
|
"""
|
|
|
|
app = web_app.get_app(config)
|
2017-04-13 23:24:31 +02:00
|
|
|
|
|
|
|
server = config.get("webserver", None)
|
|
|
|
if not server:
|
|
|
|
# Default webserver is quiet, as Bottle is used with Canister for
|
|
|
|
# standard logging
|
|
|
|
server = web_app.QuietWSGIRefServer
|
|
|
|
|
|
|
|
app.run(host=config["host"], port=config["port"], server=server)
|