Refilter command and backends in config
* Add a refilter command * Add a backend option in config to only enable some backends.
This commit is contained in:
parent
18ef841672
commit
1d98c631e0
@ -32,9 +32,11 @@ The available commands are:
|
||||
|
||||
* `init-config` to generate an empty configuration file, either on the `stdin`
|
||||
or in the specified file.
|
||||
* `build-data` to rebuild OpenData datasets.
|
||||
* `fetch` to load and filter housings posts and output a JSON dump.
|
||||
* `filter` to filter a previously fetched list of housings posts, provided as
|
||||
a JSON dump.
|
||||
* `filter` to filter again the flats in the database (and update their status)
|
||||
according to changes in config. It can also filter a previously fetched list
|
||||
of housings posts, provided as a JSON dump (with a `--input` argument).
|
||||
* `import` to import and filter housing posts into the database.
|
||||
* `serve` to serve the built-in webapp with the development server. Do not use
|
||||
in production.
|
||||
|
@ -11,6 +11,7 @@ import sys
|
||||
import flatisfy.config
|
||||
from flatisfy import cmds
|
||||
from flatisfy import data
|
||||
from flatisfy import fetch
|
||||
from flatisfy import tools
|
||||
|
||||
|
||||
@ -76,14 +77,18 @@ def parse_args(argv=None):
|
||||
help="Fetch housings posts")
|
||||
|
||||
# Filter subcommand parser
|
||||
parser_filter = subparsers.add_parser("filter", parents=[parent_parser],
|
||||
help=(
|
||||
"Filter housings posts. No "
|
||||
"fetching of additional infos "
|
||||
"is done."))
|
||||
parser_filter = subparsers.add_parser(
|
||||
"filter", parents=[parent_parser],
|
||||
help="Filter housings posts according to constraints in config."
|
||||
)
|
||||
parser_filter.add_argument(
|
||||
"input",
|
||||
help="JSON dump of the housings post to filter."
|
||||
"--input",
|
||||
help=(
|
||||
"Optional JSON dump of the housings post to filter. If provided, "
|
||||
"no additional fetching of infos is done, and the script outputs "
|
||||
"a filtered JSON dump on stdout. If not provided, update status "
|
||||
"of the flats in the database."
|
||||
)
|
||||
)
|
||||
|
||||
# Import subcommand parser
|
||||
@ -149,7 +154,9 @@ def main():
|
||||
# Fetch command
|
||||
if args.cmd == "fetch":
|
||||
# Fetch and filter flats list
|
||||
flats_list, _ = cmds.fetch_and_filter(config)
|
||||
flats_list = fetch.fetch_flats_list(config)
|
||||
flats_list, _ = cmds.filter_flats(config, flats_list=flats_list,
|
||||
fetch_details=True)
|
||||
# Sort by cost
|
||||
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost")
|
||||
|
||||
@ -159,18 +166,26 @@ def main():
|
||||
# Filter command
|
||||
elif args.cmd == "filter":
|
||||
# Load and filter flats list
|
||||
flats_list = cmds.load_and_filter(args.input, config)
|
||||
if args.input:
|
||||
flats_list = fetch.load_flats_list_from_file(args.input)
|
||||
|
||||
flats_list, _ = cmds.filter_flats(config, flats_list=flats_list,
|
||||
fetch_details=False)
|
||||
|
||||
# Sort by cost
|
||||
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost")
|
||||
|
||||
# Output to stdout
|
||||
print(
|
||||
tools.pretty_json(flats_list)
|
||||
)
|
||||
else:
|
||||
cmds.import_and_filter(config, load_from_db=True)
|
||||
# Import command
|
||||
elif args.cmd == "import":
|
||||
# TODO: Do not fetch details for already imported flats / use the last
|
||||
# timestamp
|
||||
cmds.import_and_filter(config)
|
||||
cmds.import_and_filter(config, load_from_db=False)
|
||||
# Purge command
|
||||
elif args.cmd == "purge":
|
||||
cmds.purge_db(config)
|
||||
|
@ -17,18 +17,17 @@ from flatisfy.web import app as web_app
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fetch_and_filter(config):
|
||||
def filter_flats(config, flats_list=None, fetch_details=True):
|
||||
"""
|
||||
Fetch the available flats list. Then, filter it according to criteria.
|
||||
Filter the available flats list. Then, filter it according to criteria.
|
||||
|
||||
:param config: A config dict.
|
||||
:param fetch_details: Whether additional details should be fetched between
|
||||
the two passes.
|
||||
:param flats_list: The initial list of flat objects to filter.
|
||||
:return: A tuple of the list of all matching flats and the list of ignored
|
||||
flats.
|
||||
"""
|
||||
# TODO: Reduce load on housings listing websites
|
||||
# Fetch flats list with flatboobs
|
||||
flats_list = fetch.fetch_flats_list(config)
|
||||
|
||||
# Do a first pass with the available infos to try to remove as much
|
||||
# unwanted postings as possible
|
||||
if config["passes"] > 0:
|
||||
@ -39,6 +38,7 @@ def fetch_and_filter(config):
|
||||
# additional infos
|
||||
if config["passes"] > 1:
|
||||
# Load additional infos
|
||||
if fetch_details:
|
||||
for i, flat in enumerate(flats_list):
|
||||
details = fetch.fetch_details(config, flat["id"])
|
||||
flats_list[i] = tools.merge_dicts(flat, details)
|
||||
@ -51,44 +51,23 @@ def fetch_and_filter(config):
|
||||
return flats_list, ignored_flats
|
||||
|
||||
|
||||
def load_and_filter(housing_file, config):
|
||||
"""
|
||||
Load the dumped flats list. Then, filter it according to criteria.
|
||||
|
||||
:param housing_file: The JSON file to load flats from.
|
||||
:param config: A config dict.
|
||||
:return: A tuple of the list of all matching flats and the list of ignored
|
||||
flats.
|
||||
"""
|
||||
# Load flats list
|
||||
flats_list = fetch.load_flats_list(housing_file)
|
||||
|
||||
# Do a first pass with the available infos to try to remove as much
|
||||
# unwanted postings as possible
|
||||
if config["passes"] > 0:
|
||||
flats_list, ignored_flats = flatisfy.filters.first_pass(flats_list,
|
||||
config)
|
||||
|
||||
# Do a second pass to consolidate all the infos we found
|
||||
if config["passes"] > 1:
|
||||
flats_list, extra_ignored_flats = flatisfy.filters.second_pass(
|
||||
flats_list, config
|
||||
)
|
||||
ignored_flats.extend(extra_ignored_flats)
|
||||
|
||||
return flats_list, ignored_flats
|
||||
|
||||
|
||||
def import_and_filter(config):
|
||||
def import_and_filter(config, load_from_db=False):
|
||||
"""
|
||||
Fetch the available flats list. Then, filter it according to criteria.
|
||||
Finally, store it in the database.
|
||||
|
||||
:param config: A config dict.
|
||||
:param load_from_db: Whether to load flats from database or fetch them
|
||||
using Weboob.
|
||||
:return: ``None``.
|
||||
"""
|
||||
# Fetch and filter flats list
|
||||
flats_list, ignored_list = fetch_and_filter(config)
|
||||
if load_from_db:
|
||||
flats_list = fetch.load_flats_list_from_db(config)
|
||||
else:
|
||||
flats_list = fetch.fetch_flats_list(config)
|
||||
flats_list, ignored_list = filter_flats(config, flats_list=flats_list,
|
||||
fetch_details=True)
|
||||
# Create database connection
|
||||
get_session = database.init_db(config["database"])
|
||||
|
||||
|
@ -54,7 +54,9 @@ DEFAULT_CONFIG = {
|
||||
# Web app host to listen on
|
||||
"host": "127.0.0.1",
|
||||
# Web server to use to serve the webapp (see Bottle deployment doc)
|
||||
"webserver": None
|
||||
"webserver": None,
|
||||
# List of Weboob backends to use (default to any backend available)
|
||||
"backends": None
|
||||
}
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
@ -135,6 +137,7 @@ def validate_config(config):
|
||||
assert isinstance(config["port"], int)
|
||||
assert isinstance(config["host"], str)
|
||||
assert config["webserver"] is None or isinstance(config["webserver"], str) # noqa: E501
|
||||
assert config["backends"] is None or isinstance(config["backends"], list) # noqa: E501
|
||||
|
||||
return True
|
||||
except (AssertionError, KeyError):
|
||||
|
@ -37,7 +37,10 @@ def _preprocess_ratp(output_dir):
|
||||
ratp_data = collections.defaultdict(list)
|
||||
for item in ratp_data_raw:
|
||||
stop_name = item["fields"]["stop_name"].lower()
|
||||
ratp_data[stop_name].append(item["fields"]["coord"])
|
||||
ratp_data[stop_name].append({
|
||||
"gps": item["fields"]["coord"],
|
||||
"name": item["fields"]["stop_name"]
|
||||
})
|
||||
|
||||
# Output it
|
||||
with open(os.path.join(output_dir, "ratp.json"), "w") as fh:
|
||||
|
@ -8,7 +8,9 @@ import itertools
|
||||
import json
|
||||
import logging
|
||||
|
||||
from flatisfy import database
|
||||
from flatisfy import tools
|
||||
from flatisfy.models import flat as flat_model
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
@ -59,6 +61,13 @@ class WeboobProxy(object):
|
||||
|
||||
:param config: A config dict.
|
||||
"""
|
||||
# Default backends
|
||||
if not config["backends"]:
|
||||
backends = ["seloger", "pap", "leboncoin", "logicimmo",
|
||||
"explorimmo", "entreparticuliers"]
|
||||
else:
|
||||
backends = config["backends"]
|
||||
|
||||
# Create base WebNip object
|
||||
self.webnip = WebNip(modules_path=config["modules_path"])
|
||||
|
||||
@ -69,8 +78,7 @@ class WeboobProxy(object):
|
||||
module,
|
||||
params={}
|
||||
)
|
||||
for module in ["seloger", "pap", "leboncoin", "logicimmo",
|
||||
"explorimmo", "entreparticuliers"]
|
||||
for module in backends
|
||||
]
|
||||
|
||||
def __enter__(self):
|
||||
@ -210,13 +218,13 @@ def fetch_details(config, flat_id):
|
||||
weboob_output = weboob_proxy.info(flat_id)
|
||||
|
||||
flat_details = json.loads(weboob_output)
|
||||
flats_details = WeboobProxy.restore_decimal_fields(flat_details)
|
||||
flat_details = WeboobProxy.restore_decimal_fields(flat_details)
|
||||
LOGGER.info("Fetched details for flat %s.", flat_id)
|
||||
|
||||
return flat_details
|
||||
|
||||
|
||||
def load_flats_list(json_file):
|
||||
def load_flats_list_from_file(json_file):
|
||||
"""
|
||||
Load a dumped flats list from JSON file.
|
||||
|
||||
@ -232,3 +240,20 @@ def load_flats_list(json_file):
|
||||
except (IOError, ValueError):
|
||||
LOGGER.error("File %s is not a valid dump file.", json_file)
|
||||
return flats_list
|
||||
|
||||
|
||||
def load_flats_list_from_db(config):
|
||||
"""
|
||||
Load flats from database.
|
||||
|
||||
:param config: A config dict.
|
||||
:return: A list of all the flats in the database.
|
||||
"""
|
||||
flats_list = []
|
||||
get_session = database.init_db(config["database"])
|
||||
|
||||
with get_session() as session:
|
||||
# TODO: Better serialization
|
||||
flats_list = [flat.json_api_repr()
|
||||
for flat in session.query(flat_model.Flat).all()]
|
||||
return flats_list
|
||||
|
@ -31,8 +31,13 @@ def init(flats_list):
|
||||
if "flatisfy" not in flat:
|
||||
flat["flatisfy"] = {}
|
||||
# Move url key to urls
|
||||
if "urls" not in flat:
|
||||
if "url" in flat:
|
||||
flat["urls"] = [flat["url"]]
|
||||
else:
|
||||
flat["urls"] = []
|
||||
# Create merged_ids key
|
||||
if "merged_ids" not in flat:
|
||||
flat["merged_ids"] = [flat["id"]]
|
||||
|
||||
return flats_list
|
||||
@ -261,16 +266,18 @@ def guess_stations(flats_list, config, distance_threshold=1500):
|
||||
# of coordinates, for efficiency. Note that multiple stations
|
||||
# with the same name exist in a city, hence the list of
|
||||
# coordinates.
|
||||
for station_gps in opendata["stations"][station[0]]:
|
||||
distance = tools.distance(station_gps, postal_code_gps)
|
||||
for station_data in opendata["stations"][station[0]]:
|
||||
distance = tools.distance(station_data["gps"],
|
||||
postal_code_gps)
|
||||
if distance < distance_threshold:
|
||||
# If at least one of the coordinates for a given
|
||||
# station is close enough, that's ok and we can add
|
||||
# the station
|
||||
good_matched_stations.append({
|
||||
"name": station[0],
|
||||
"key": station[0],
|
||||
"name": station_data["name"],
|
||||
"confidence": station[1],
|
||||
"gps": station_gps
|
||||
"gps": station_data["gps"]
|
||||
})
|
||||
break
|
||||
LOGGER.debug(
|
||||
|
@ -64,7 +64,6 @@ class DatabasePlugin(object):
|
||||
if self.KEYWORD not in callback_args:
|
||||
# If no need for a db session, call the route callback
|
||||
return callback
|
||||
else:
|
||||
def wrapper(*args, **kwargs):
|
||||
"""
|
||||
Wrap the callback in a call to get_session.
|
||||
|
@ -197,7 +197,7 @@ export default {
|
||||
},
|
||||
displayedStations () {
|
||||
if (this.flat.flatisfy_stations.length > 0) {
|
||||
const stationsNames = this.flat.flatisfy_stations.map(station => capitalize(station.name))
|
||||
const stationsNames = this.flat.flatisfy_stations.map(station => station.name)
|
||||
return stationsNames.join(', ')
|
||||
} else {
|
||||
return null
|
||||
|
Loading…
Reference in New Issue
Block a user