131 lines
3.2 KiB
Python
Executable File
131 lines
3.2 KiB
Python
Executable File
# coding: utf-8
|
|
#!/usr/bin/env python3
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
|
|
from fuzzywuzzy import process as fuzzyprocess
|
|
|
|
import config
|
|
|
|
|
|
def pretty_json(json_str):
|
|
return json.dumps(json_str, indent=4, separators=(',', ': '),
|
|
sort_keys=True)
|
|
|
|
|
|
def preprocess_data():
|
|
if not os.path.isdir("build"):
|
|
os.mkdir("build")
|
|
|
|
if not os.path.isfile("build/ratp.json"):
|
|
ratp_data = []
|
|
with open("data/ratp.json", "r") as fh:
|
|
ratp_data = json.load(fh)
|
|
ratp_data = sorted(
|
|
list(set(
|
|
x["fields"]["stop_name"].lower() for x in ratp_data
|
|
))
|
|
)
|
|
with open("build/ratp.json", "w") as fh:
|
|
fh.write(pretty_json(ratp_data))
|
|
|
|
|
|
def fetch_flats_list():
|
|
flats_list = []
|
|
for query in config.QUERIES:
|
|
flatboob_output = subprocess.check_output(
|
|
["flatboob", "-n", "0", "-f", "json", "load", query]
|
|
)
|
|
flats_list.extend(json.loads(flatboob_output))
|
|
return flats_list
|
|
|
|
|
|
def remove_duplicates(flats_list):
|
|
unique_flats_list = []
|
|
ids = []
|
|
for flat in flats_list:
|
|
if flat["id"] in ids:
|
|
continue
|
|
ids.append(id)
|
|
unique_flats_list.append(flat)
|
|
return unique_flats_list
|
|
|
|
|
|
def sort_by(flats_list, key="cost"):
|
|
return sorted(flats_list, key=lambda x: x["cost"])
|
|
|
|
|
|
def refine_params(flats_list):
|
|
def filter_conditions(x):
|
|
is_ok = True
|
|
if "cost" in x:
|
|
cost = x["cost"]
|
|
is_ok = (
|
|
is_ok and
|
|
(cost < config.PARAMS["max_cost"] and
|
|
cost > config.PARAMS["min_cost"])
|
|
)
|
|
if "area" in x:
|
|
area = x["area"]
|
|
is_ok = (
|
|
is_ok and
|
|
(area < config.PARAMS["max_area"] and
|
|
area > config.PARAMS["min_area"])
|
|
)
|
|
return is_ok
|
|
|
|
return filter(filter_conditions, flats_list)
|
|
|
|
|
|
def match_ratp(flats_list):
|
|
ratp_stations = []
|
|
with open("build/ratp.json", "r") as fh:
|
|
ratp_stations = json.load(fh)
|
|
|
|
for flat in flats_list:
|
|
if "station" in flat and flat["station"]:
|
|
# There is some station fetched by flatboob, try to match it
|
|
flat["ratp_station"] = fuzzyprocess.extractOne(
|
|
flat["station"], ratp_stations
|
|
)
|
|
# TODO: Cross-check station location to choose the best fit
|
|
|
|
return flats_list
|
|
|
|
|
|
def main(dumpfile=None):
|
|
if dumpfile is None:
|
|
flats_list = fetch_flats_list()
|
|
else:
|
|
with open(dumpfile, "r") as fh:
|
|
flats_list = json.load(fh)
|
|
|
|
# First pass
|
|
flats_list = remove_duplicates(flats_list)
|
|
flats_list = sort_by(flats_list, "cost")
|
|
flats_list = refine_params(flats_list)
|
|
|
|
# TODO: flats_list = match_ratp(flats_list)
|
|
|
|
# TODO: Second pass, loading additional infos for each entry
|
|
|
|
return flats_list
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) > 1:
|
|
dumpfile = sys.argv[1]
|
|
else:
|
|
dumpfile = None
|
|
|
|
try:
|
|
preprocess_data()
|
|
flats_list = main(dumpfile)
|
|
print(
|
|
pretty_json(flats_list)
|
|
)
|
|
except KeyboardInterrupt:
|
|
pass
|