flatisfy/flat.py

131 lines
3.2 KiB
Python
Executable File

# coding: utf-8
#!/usr/bin/env python3
import json
import os
import subprocess
import sys
from fuzzywuzzy import process as fuzzyprocess
import config
def pretty_json(json_str):
return json.dumps(json_str, indent=4, separators=(',', ': '),
sort_keys=True)
def preprocess_data():
if not os.path.isdir("build"):
os.mkdir("build")
if not os.path.isfile("build/ratp.json"):
ratp_data = []
with open("data/ratp.json", "r") as fh:
ratp_data = json.load(fh)
ratp_data = sorted(
list(set(
x["fields"]["stop_name"].lower() for x in ratp_data
))
)
with open("build/ratp.json", "w") as fh:
fh.write(pretty_json(ratp_data))
def fetch_flats_list():
flats_list = []
for query in config.QUERIES:
flatboob_output = subprocess.check_output(
["flatboob", "-n", "0", "-f", "json", "load", query]
)
flats_list.extend(json.loads(flatboob_output))
return flats_list
def remove_duplicates(flats_list):
unique_flats_list = []
ids = []
for flat in flats_list:
if flat["id"] in ids:
continue
ids.append(id)
unique_flats_list.append(flat)
return unique_flats_list
def sort_by(flats_list, key="cost"):
return sorted(flats_list, key=lambda x: x["cost"])
def refine_params(flats_list):
def filter_conditions(x):
is_ok = True
if "cost" in x:
cost = x["cost"]
is_ok = (
is_ok and
(cost < config.PARAMS["max_cost"] and
cost > config.PARAMS["min_cost"])
)
if "area" in x:
area = x["area"]
is_ok = (
is_ok and
(area < config.PARAMS["max_area"] and
area > config.PARAMS["min_area"])
)
return is_ok
return filter(filter_conditions, flats_list)
def match_ratp(flats_list):
ratp_stations = []
with open("build/ratp.json", "r") as fh:
ratp_stations = json.load(fh)
for flat in flats_list:
if "station" in flat and flat["station"]:
# There is some station fetched by flatboob, try to match it
flat["ratp_station"] = fuzzyprocess.extractOne(
flat["station"], ratp_stations
)
# TODO: Cross-check station location to choose the best fit
return flats_list
def main(dumpfile=None):
if dumpfile is None:
flats_list = fetch_flats_list()
else:
with open(dumpfile, "r") as fh:
flats_list = json.load(fh)
# First pass
flats_list = remove_duplicates(flats_list)
flats_list = sort_by(flats_list, "cost")
flats_list = refine_params(flats_list)
# TODO: flats_list = match_ratp(flats_list)
# TODO: Second pass, loading additional infos for each entry
return flats_list
if __name__ == "__main__":
if len(sys.argv) > 1:
dumpfile = sys.argv[1]
else:
dumpfile = None
try:
preprocess_data()
flats_list = main(dumpfile)
print(
pretty_json(flats_list)
)
except KeyboardInterrupt:
pass