Refilter command and backends in config
* Add a refilter command * Add a backend option in config to only enable some backends.
This commit is contained in:
parent
18ef841672
commit
1d98c631e0
@ -32,9 +32,11 @@ The available commands are:
|
|||||||
|
|
||||||
* `init-config` to generate an empty configuration file, either on the `stdin`
|
* `init-config` to generate an empty configuration file, either on the `stdin`
|
||||||
or in the specified file.
|
or in the specified file.
|
||||||
|
* `build-data` to rebuild OpenData datasets.
|
||||||
* `fetch` to load and filter housings posts and output a JSON dump.
|
* `fetch` to load and filter housings posts and output a JSON dump.
|
||||||
* `filter` to filter a previously fetched list of housings posts, provided as
|
* `filter` to filter again the flats in the database (and update their status)
|
||||||
a JSON dump.
|
according to changes in config. It can also filter a previously fetched list
|
||||||
|
of housings posts, provided as a JSON dump (with a `--input` argument).
|
||||||
* `import` to import and filter housing posts into the database.
|
* `import` to import and filter housing posts into the database.
|
||||||
* `serve` to serve the built-in webapp with the development server. Do not use
|
* `serve` to serve the built-in webapp with the development server. Do not use
|
||||||
in production.
|
in production.
|
||||||
|
@ -11,6 +11,7 @@ import sys
|
|||||||
import flatisfy.config
|
import flatisfy.config
|
||||||
from flatisfy import cmds
|
from flatisfy import cmds
|
||||||
from flatisfy import data
|
from flatisfy import data
|
||||||
|
from flatisfy import fetch
|
||||||
from flatisfy import tools
|
from flatisfy import tools
|
||||||
|
|
||||||
|
|
||||||
@ -76,14 +77,18 @@ def parse_args(argv=None):
|
|||||||
help="Fetch housings posts")
|
help="Fetch housings posts")
|
||||||
|
|
||||||
# Filter subcommand parser
|
# Filter subcommand parser
|
||||||
parser_filter = subparsers.add_parser("filter", parents=[parent_parser],
|
parser_filter = subparsers.add_parser(
|
||||||
help=(
|
"filter", parents=[parent_parser],
|
||||||
"Filter housings posts. No "
|
help="Filter housings posts according to constraints in config."
|
||||||
"fetching of additional infos "
|
)
|
||||||
"is done."))
|
|
||||||
parser_filter.add_argument(
|
parser_filter.add_argument(
|
||||||
"input",
|
"--input",
|
||||||
help="JSON dump of the housings post to filter."
|
help=(
|
||||||
|
"Optional JSON dump of the housings post to filter. If provided, "
|
||||||
|
"no additional fetching of infos is done, and the script outputs "
|
||||||
|
"a filtered JSON dump on stdout. If not provided, update status "
|
||||||
|
"of the flats in the database."
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Import subcommand parser
|
# Import subcommand parser
|
||||||
@ -149,7 +154,9 @@ def main():
|
|||||||
# Fetch command
|
# Fetch command
|
||||||
if args.cmd == "fetch":
|
if args.cmd == "fetch":
|
||||||
# Fetch and filter flats list
|
# Fetch and filter flats list
|
||||||
flats_list, _ = cmds.fetch_and_filter(config)
|
flats_list = fetch.fetch_flats_list(config)
|
||||||
|
flats_list, _ = cmds.filter_flats(config, flats_list=flats_list,
|
||||||
|
fetch_details=True)
|
||||||
# Sort by cost
|
# Sort by cost
|
||||||
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost")
|
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost")
|
||||||
|
|
||||||
@ -159,18 +166,26 @@ def main():
|
|||||||
# Filter command
|
# Filter command
|
||||||
elif args.cmd == "filter":
|
elif args.cmd == "filter":
|
||||||
# Load and filter flats list
|
# Load and filter flats list
|
||||||
flats_list = cmds.load_and_filter(args.input, config)
|
if args.input:
|
||||||
# Sort by cost
|
flats_list = fetch.load_flats_list_from_file(args.input)
|
||||||
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost")
|
|
||||||
|
|
||||||
print(
|
flats_list, _ = cmds.filter_flats(config, flats_list=flats_list,
|
||||||
tools.pretty_json(flats_list)
|
fetch_details=False)
|
||||||
)
|
|
||||||
|
# Sort by cost
|
||||||
|
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost")
|
||||||
|
|
||||||
|
# Output to stdout
|
||||||
|
print(
|
||||||
|
tools.pretty_json(flats_list)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cmds.import_and_filter(config, load_from_db=True)
|
||||||
# Import command
|
# Import command
|
||||||
elif args.cmd == "import":
|
elif args.cmd == "import":
|
||||||
# TODO: Do not fetch details for already imported flats / use the last
|
# TODO: Do not fetch details for already imported flats / use the last
|
||||||
# timestamp
|
# timestamp
|
||||||
cmds.import_and_filter(config)
|
cmds.import_and_filter(config, load_from_db=False)
|
||||||
# Purge command
|
# Purge command
|
||||||
elif args.cmd == "purge":
|
elif args.cmd == "purge":
|
||||||
cmds.purge_db(config)
|
cmds.purge_db(config)
|
||||||
|
@ -17,18 +17,17 @@ from flatisfy.web import app as web_app
|
|||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def fetch_and_filter(config):
|
def filter_flats(config, flats_list=None, fetch_details=True):
|
||||||
"""
|
"""
|
||||||
Fetch the available flats list. Then, filter it according to criteria.
|
Filter the available flats list. Then, filter it according to criteria.
|
||||||
|
|
||||||
:param config: A config dict.
|
:param config: A config dict.
|
||||||
|
:param fetch_details: Whether additional details should be fetched between
|
||||||
|
the two passes.
|
||||||
|
:param flats_list: The initial list of flat objects to filter.
|
||||||
:return: A tuple of the list of all matching flats and the list of ignored
|
:return: A tuple of the list of all matching flats and the list of ignored
|
||||||
flats.
|
flats.
|
||||||
"""
|
"""
|
||||||
# TODO: Reduce load on housings listing websites
|
|
||||||
# Fetch flats list with flatboobs
|
|
||||||
flats_list = fetch.fetch_flats_list(config)
|
|
||||||
|
|
||||||
# Do a first pass with the available infos to try to remove as much
|
# Do a first pass with the available infos to try to remove as much
|
||||||
# unwanted postings as possible
|
# unwanted postings as possible
|
||||||
if config["passes"] > 0:
|
if config["passes"] > 0:
|
||||||
@ -39,9 +38,10 @@ def fetch_and_filter(config):
|
|||||||
# additional infos
|
# additional infos
|
||||||
if config["passes"] > 1:
|
if config["passes"] > 1:
|
||||||
# Load additional infos
|
# Load additional infos
|
||||||
for i, flat in enumerate(flats_list):
|
if fetch_details:
|
||||||
details = fetch.fetch_details(config, flat["id"])
|
for i, flat in enumerate(flats_list):
|
||||||
flats_list[i] = tools.merge_dicts(flat, details)
|
details = fetch.fetch_details(config, flat["id"])
|
||||||
|
flats_list[i] = tools.merge_dicts(flat, details)
|
||||||
|
|
||||||
flats_list, extra_ignored_flats = flatisfy.filters.second_pass(
|
flats_list, extra_ignored_flats = flatisfy.filters.second_pass(
|
||||||
flats_list, config
|
flats_list, config
|
||||||
@ -51,44 +51,23 @@ def fetch_and_filter(config):
|
|||||||
return flats_list, ignored_flats
|
return flats_list, ignored_flats
|
||||||
|
|
||||||
|
|
||||||
def load_and_filter(housing_file, config):
|
def import_and_filter(config, load_from_db=False):
|
||||||
"""
|
|
||||||
Load the dumped flats list. Then, filter it according to criteria.
|
|
||||||
|
|
||||||
:param housing_file: The JSON file to load flats from.
|
|
||||||
:param config: A config dict.
|
|
||||||
:return: A tuple of the list of all matching flats and the list of ignored
|
|
||||||
flats.
|
|
||||||
"""
|
|
||||||
# Load flats list
|
|
||||||
flats_list = fetch.load_flats_list(housing_file)
|
|
||||||
|
|
||||||
# Do a first pass with the available infos to try to remove as much
|
|
||||||
# unwanted postings as possible
|
|
||||||
if config["passes"] > 0:
|
|
||||||
flats_list, ignored_flats = flatisfy.filters.first_pass(flats_list,
|
|
||||||
config)
|
|
||||||
|
|
||||||
# Do a second pass to consolidate all the infos we found
|
|
||||||
if config["passes"] > 1:
|
|
||||||
flats_list, extra_ignored_flats = flatisfy.filters.second_pass(
|
|
||||||
flats_list, config
|
|
||||||
)
|
|
||||||
ignored_flats.extend(extra_ignored_flats)
|
|
||||||
|
|
||||||
return flats_list, ignored_flats
|
|
||||||
|
|
||||||
|
|
||||||
def import_and_filter(config):
|
|
||||||
"""
|
"""
|
||||||
Fetch the available flats list. Then, filter it according to criteria.
|
Fetch the available flats list. Then, filter it according to criteria.
|
||||||
Finally, store it in the database.
|
Finally, store it in the database.
|
||||||
|
|
||||||
:param config: A config dict.
|
:param config: A config dict.
|
||||||
|
:param load_from_db: Whether to load flats from database or fetch them
|
||||||
|
using Weboob.
|
||||||
:return: ``None``.
|
:return: ``None``.
|
||||||
"""
|
"""
|
||||||
# Fetch and filter flats list
|
# Fetch and filter flats list
|
||||||
flats_list, ignored_list = fetch_and_filter(config)
|
if load_from_db:
|
||||||
|
flats_list = fetch.load_flats_list_from_db(config)
|
||||||
|
else:
|
||||||
|
flats_list = fetch.fetch_flats_list(config)
|
||||||
|
flats_list, ignored_list = filter_flats(config, flats_list=flats_list,
|
||||||
|
fetch_details=True)
|
||||||
# Create database connection
|
# Create database connection
|
||||||
get_session = database.init_db(config["database"])
|
get_session = database.init_db(config["database"])
|
||||||
|
|
||||||
|
@ -54,7 +54,9 @@ DEFAULT_CONFIG = {
|
|||||||
# Web app host to listen on
|
# Web app host to listen on
|
||||||
"host": "127.0.0.1",
|
"host": "127.0.0.1",
|
||||||
# Web server to use to serve the webapp (see Bottle deployment doc)
|
# Web server to use to serve the webapp (see Bottle deployment doc)
|
||||||
"webserver": None
|
"webserver": None,
|
||||||
|
# List of Weboob backends to use (default to any backend available)
|
||||||
|
"backends": None
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
@ -135,6 +137,7 @@ def validate_config(config):
|
|||||||
assert isinstance(config["port"], int)
|
assert isinstance(config["port"], int)
|
||||||
assert isinstance(config["host"], str)
|
assert isinstance(config["host"], str)
|
||||||
assert config["webserver"] is None or isinstance(config["webserver"], str) # noqa: E501
|
assert config["webserver"] is None or isinstance(config["webserver"], str) # noqa: E501
|
||||||
|
assert config["backends"] is None or isinstance(config["backends"], list) # noqa: E501
|
||||||
|
|
||||||
return True
|
return True
|
||||||
except (AssertionError, KeyError):
|
except (AssertionError, KeyError):
|
||||||
|
@ -37,7 +37,10 @@ def _preprocess_ratp(output_dir):
|
|||||||
ratp_data = collections.defaultdict(list)
|
ratp_data = collections.defaultdict(list)
|
||||||
for item in ratp_data_raw:
|
for item in ratp_data_raw:
|
||||||
stop_name = item["fields"]["stop_name"].lower()
|
stop_name = item["fields"]["stop_name"].lower()
|
||||||
ratp_data[stop_name].append(item["fields"]["coord"])
|
ratp_data[stop_name].append({
|
||||||
|
"gps": item["fields"]["coord"],
|
||||||
|
"name": item["fields"]["stop_name"]
|
||||||
|
})
|
||||||
|
|
||||||
# Output it
|
# Output it
|
||||||
with open(os.path.join(output_dir, "ratp.json"), "w") as fh:
|
with open(os.path.join(output_dir, "ratp.json"), "w") as fh:
|
||||||
|
@ -8,7 +8,9 @@ import itertools
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from flatisfy import database
|
||||||
from flatisfy import tools
|
from flatisfy import tools
|
||||||
|
from flatisfy.models import flat as flat_model
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -59,6 +61,13 @@ class WeboobProxy(object):
|
|||||||
|
|
||||||
:param config: A config dict.
|
:param config: A config dict.
|
||||||
"""
|
"""
|
||||||
|
# Default backends
|
||||||
|
if not config["backends"]:
|
||||||
|
backends = ["seloger", "pap", "leboncoin", "logicimmo",
|
||||||
|
"explorimmo", "entreparticuliers"]
|
||||||
|
else:
|
||||||
|
backends = config["backends"]
|
||||||
|
|
||||||
# Create base WebNip object
|
# Create base WebNip object
|
||||||
self.webnip = WebNip(modules_path=config["modules_path"])
|
self.webnip = WebNip(modules_path=config["modules_path"])
|
||||||
|
|
||||||
@ -69,8 +78,7 @@ class WeboobProxy(object):
|
|||||||
module,
|
module,
|
||||||
params={}
|
params={}
|
||||||
)
|
)
|
||||||
for module in ["seloger", "pap", "leboncoin", "logicimmo",
|
for module in backends
|
||||||
"explorimmo", "entreparticuliers"]
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
@ -210,13 +218,13 @@ def fetch_details(config, flat_id):
|
|||||||
weboob_output = weboob_proxy.info(flat_id)
|
weboob_output = weboob_proxy.info(flat_id)
|
||||||
|
|
||||||
flat_details = json.loads(weboob_output)
|
flat_details = json.loads(weboob_output)
|
||||||
flats_details = WeboobProxy.restore_decimal_fields(flat_details)
|
flat_details = WeboobProxy.restore_decimal_fields(flat_details)
|
||||||
LOGGER.info("Fetched details for flat %s.", flat_id)
|
LOGGER.info("Fetched details for flat %s.", flat_id)
|
||||||
|
|
||||||
return flat_details
|
return flat_details
|
||||||
|
|
||||||
|
|
||||||
def load_flats_list(json_file):
|
def load_flats_list_from_file(json_file):
|
||||||
"""
|
"""
|
||||||
Load a dumped flats list from JSON file.
|
Load a dumped flats list from JSON file.
|
||||||
|
|
||||||
@ -232,3 +240,20 @@ def load_flats_list(json_file):
|
|||||||
except (IOError, ValueError):
|
except (IOError, ValueError):
|
||||||
LOGGER.error("File %s is not a valid dump file.", json_file)
|
LOGGER.error("File %s is not a valid dump file.", json_file)
|
||||||
return flats_list
|
return flats_list
|
||||||
|
|
||||||
|
|
||||||
|
def load_flats_list_from_db(config):
|
||||||
|
"""
|
||||||
|
Load flats from database.
|
||||||
|
|
||||||
|
:param config: A config dict.
|
||||||
|
:return: A list of all the flats in the database.
|
||||||
|
"""
|
||||||
|
flats_list = []
|
||||||
|
get_session = database.init_db(config["database"])
|
||||||
|
|
||||||
|
with get_session() as session:
|
||||||
|
# TODO: Better serialization
|
||||||
|
flats_list = [flat.json_api_repr()
|
||||||
|
for flat in session.query(flat_model.Flat).all()]
|
||||||
|
return flats_list
|
||||||
|
@ -31,9 +31,14 @@ def init(flats_list):
|
|||||||
if "flatisfy" not in flat:
|
if "flatisfy" not in flat:
|
||||||
flat["flatisfy"] = {}
|
flat["flatisfy"] = {}
|
||||||
# Move url key to urls
|
# Move url key to urls
|
||||||
flat["urls"] = [flat["url"]]
|
if "urls" not in flat:
|
||||||
|
if "url" in flat:
|
||||||
|
flat["urls"] = [flat["url"]]
|
||||||
|
else:
|
||||||
|
flat["urls"] = []
|
||||||
# Create merged_ids key
|
# Create merged_ids key
|
||||||
flat["merged_ids"] = [flat["id"]]
|
if "merged_ids" not in flat:
|
||||||
|
flat["merged_ids"] = [flat["id"]]
|
||||||
|
|
||||||
return flats_list
|
return flats_list
|
||||||
|
|
||||||
@ -261,16 +266,18 @@ def guess_stations(flats_list, config, distance_threshold=1500):
|
|||||||
# of coordinates, for efficiency. Note that multiple stations
|
# of coordinates, for efficiency. Note that multiple stations
|
||||||
# with the same name exist in a city, hence the list of
|
# with the same name exist in a city, hence the list of
|
||||||
# coordinates.
|
# coordinates.
|
||||||
for station_gps in opendata["stations"][station[0]]:
|
for station_data in opendata["stations"][station[0]]:
|
||||||
distance = tools.distance(station_gps, postal_code_gps)
|
distance = tools.distance(station_data["gps"],
|
||||||
|
postal_code_gps)
|
||||||
if distance < distance_threshold:
|
if distance < distance_threshold:
|
||||||
# If at least one of the coordinates for a given
|
# If at least one of the coordinates for a given
|
||||||
# station is close enough, that's ok and we can add
|
# station is close enough, that's ok and we can add
|
||||||
# the station
|
# the station
|
||||||
good_matched_stations.append({
|
good_matched_stations.append({
|
||||||
"name": station[0],
|
"key": station[0],
|
||||||
|
"name": station_data["name"],
|
||||||
"confidence": station[1],
|
"confidence": station[1],
|
||||||
"gps": station_gps
|
"gps": station_data["gps"]
|
||||||
})
|
})
|
||||||
break
|
break
|
||||||
LOGGER.debug(
|
LOGGER.debug(
|
||||||
|
@ -64,16 +64,15 @@ class DatabasePlugin(object):
|
|||||||
if self.KEYWORD not in callback_args:
|
if self.KEYWORD not in callback_args:
|
||||||
# If no need for a db session, call the route callback
|
# If no need for a db session, call the route callback
|
||||||
return callback
|
return callback
|
||||||
else:
|
def wrapper(*args, **kwargs):
|
||||||
def wrapper(*args, **kwargs):
|
"""
|
||||||
"""
|
Wrap the callback in a call to get_session.
|
||||||
Wrap the callback in a call to get_session.
|
"""
|
||||||
"""
|
with self.get_session() as session:
|
||||||
with self.get_session() as session:
|
# Get a db session and pass it to the callback
|
||||||
# Get a db session and pass it to the callback
|
kwargs[self.KEYWORD] = session
|
||||||
kwargs[self.KEYWORD] = session
|
return callback(*args, **kwargs)
|
||||||
return callback(*args, **kwargs)
|
return wrapper
|
||||||
return wrapper
|
|
||||||
|
|
||||||
|
|
||||||
Plugin = DatabasePlugin
|
Plugin = DatabasePlugin
|
||||||
|
@ -197,7 +197,7 @@ export default {
|
|||||||
},
|
},
|
||||||
displayedStations () {
|
displayedStations () {
|
||||||
if (this.flat.flatisfy_stations.length > 0) {
|
if (this.flat.flatisfy_stations.length > 0) {
|
||||||
const stationsNames = this.flat.flatisfy_stations.map(station => capitalize(station.name))
|
const stationsNames = this.flat.flatisfy_stations.map(station => station.name)
|
||||||
return stationsNames.join(', ')
|
return stationsNames.join(', ')
|
||||||
} else {
|
} else {
|
||||||
return null
|
return null
|
||||||
|
Loading…
Reference in New Issue
Block a user