Reformat with black (max-line-length=120)

This commit is contained in:
Gautier P 2021-01-26 16:49:43 +01:00
parent 582a868a1d
commit a92db5e8ee
17 changed files with 130 additions and 390 deletions

10
.editorconfig Normal file
View File

@ -0,0 +1,10 @@
root = true
[*]
indent_style = space
indent_size = 4
end_of_line = lf
insert_final_newline = true
[*.py]
max_line_length=120

View File

@ -28,15 +28,11 @@ def parse_args(argv=None):
"""
Create parser and parse arguments.
"""
parser = argparse.ArgumentParser(
prog="Flatisfy", description="Find the perfect flat."
)
parser = argparse.ArgumentParser(prog="Flatisfy", description="Find the perfect flat.")
# Parent parser containing arguments common to any subcommand
parent_parser = argparse.ArgumentParser(add_help=False)
parent_parser.add_argument(
"--data-dir", help="Location of Flatisfy data directory."
)
parent_parser.add_argument("--data-dir", help="Location of Flatisfy data directory.")
parent_parser.add_argument("--config", help="Configuration file to use.")
parent_parser.add_argument(
"--passes",
@ -44,12 +40,8 @@ def parse_args(argv=None):
type=int,
help="Number of passes to do on the filtered data.",
)
parent_parser.add_argument(
"--max-entries", type=int, help="Maximum number of entries to fetch."
)
parent_parser.add_argument(
"-v", "--verbose", action="store_true", help="Verbose logging output."
)
parent_parser.add_argument("--max-entries", type=int, help="Maximum number of entries to fetch.")
parent_parser.add_argument("-v", "--verbose", action="store_true", help="Verbose logging output.")
parent_parser.add_argument("-vv", action="store_true", help="Debug logging output.")
parent_parser.add_argument(
"--constraints",
@ -61,17 +53,13 @@ def parse_args(argv=None):
subparsers = parser.add_subparsers(dest="cmd", help="Available subcommands")
# Build data subcommand
subparsers.add_parser(
"build-data", parents=[parent_parser], help="Build necessary data"
)
subparsers.add_parser("build-data", parents=[parent_parser], help="Build necessary data")
# Init config subcommand
parser_init_config = subparsers.add_parser(
"init-config", parents=[parent_parser], help="Initialize empty configuration."
)
parser_init_config.add_argument(
"output", nargs="?", help="Output config file. Use '-' for stdout."
)
parser_init_config.add_argument("output", nargs="?", help="Output config file. Use '-' for stdout.")
# Fetch subcommand parser
subparsers.add_parser("fetch", parents=[parent_parser], help="Fetch housings posts")
@ -93,9 +81,7 @@ def parse_args(argv=None):
)
# Import subcommand parser
import_filter = subparsers.add_parser(
"import", parents=[parent_parser], help="Import housing posts in database."
)
import_filter = subparsers.add_parser("import", parents=[parent_parser], help="Import housing posts in database.")
import_filter.add_argument(
"--new-only",
action="store_true",
@ -106,9 +92,7 @@ def parse_args(argv=None):
subparsers.add_parser("purge", parents=[parent_parser], help="Purge database.")
# Serve subcommand parser
parser_serve = subparsers.add_parser(
"serve", parents=[parent_parser], help="Serve the web app."
)
parser_serve = subparsers.add_parser("serve", parents=[parent_parser], help="Serve the web app.")
parser_serve.add_argument("--port", type=int, help="Port to bind to.")
parser_serve.add_argument("--host", help="Host to listen on.")
@ -170,14 +154,9 @@ def main():
if args.cmd == "fetch":
# Fetch and filter flats list
fetched_flats = fetch.fetch_flats(config)
fetched_flats = cmds.filter_fetched_flats(
config, fetched_flats=fetched_flats, fetch_details=True
)
fetched_flats = cmds.filter_fetched_flats(config, fetched_flats=fetched_flats, fetch_details=True)
# Sort by cost
fetched_flats = {
k: tools.sort_list_of_dicts_by(v["new"], "cost")
for k, v in fetched_flats.items()
}
fetched_flats = {k: tools.sort_list_of_dicts_by(v["new"], "cost") for k, v in fetched_flats.items()}
print(tools.pretty_json(fetched_flats))
return
@ -187,15 +166,10 @@ def main():
if args.input:
fetched_flats = fetch.load_flats_from_file(args.input, config)
fetched_flats = cmds.filter_fetched_flats(
config, fetched_flats=fetched_flats, fetch_details=False
)
fetched_flats = cmds.filter_fetched_flats(config, fetched_flats=fetched_flats, fetch_details=False)
# Sort by cost
fetched_flats = {
k: tools.sort_list_of_dicts_by(v["new"], "cost")
for k, v in fetched_flats.items()
}
fetched_flats = {k: tools.sort_list_of_dicts_by(v["new"], "cost") for k, v in fetched_flats.items()}
# Output to stdout
print(tools.pretty_json(fetched_flats))

View File

@ -123,9 +123,7 @@ def validate_config(config, check_with_data):
"""
assert isinstance(bounds, list)
assert len(bounds) == 2
assert all(
x is None or (isinstance(x, (float, int)) and x >= 0) for x in bounds
)
assert all(x is None or (isinstance(x, (float, int)) and x >= 0) for x in bounds)
if bounds[0] is not None and bounds[1] is not None:
assert bounds[1] > bounds[0]
@ -141,45 +139,25 @@ def validate_config(config, check_with_data):
isinstance(config["max_entries"], int) and config["max_entries"] > 0
) # noqa: E501
assert config["data_directory"] is None or isinstance(
config["data_directory"], str
) # noqa: E501
assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501
assert os.path.isdir(config["data_directory"])
assert isinstance(config["search_index"], str)
assert config["modules_path"] is None or isinstance(
config["modules_path"], str
) # noqa: E501
assert config["modules_path"] is None or isinstance(config["modules_path"], str) # noqa: E501
assert config["database"] is None or isinstance(
config["database"], str
) # noqa: E501
assert config["database"] is None or isinstance(config["database"], str) # noqa: E501
assert isinstance(config["port"], int)
assert isinstance(config["host"], str)
assert config["webserver"] is None or isinstance(
config["webserver"], str
) # noqa: E501
assert config["backends"] is None or isinstance(
config["backends"], list
) # noqa: E501
assert config["webserver"] is None or isinstance(config["webserver"], str) # noqa: E501
assert config["backends"] is None or isinstance(config["backends"], list) # noqa: E501
assert isinstance(config["send_email"], bool)
assert config["smtp_server"] is None or isinstance(
config["smtp_server"], str
) # noqa: E501
assert config["smtp_port"] is None or isinstance(
config["smtp_port"], int
) # noqa: E501
assert config["smtp_username"] is None or isinstance(
config["smtp_username"], str
) # noqa: E501
assert config["smtp_password"] is None or isinstance(
config["smtp_password"], str
) # noqa: E501
assert config["smtp_server"] is None or isinstance(config["smtp_server"], str) # noqa: E501
assert config["smtp_port"] is None or isinstance(config["smtp_port"], int) # noqa: E501
assert config["smtp_username"] is None or isinstance(config["smtp_username"], str) # noqa: E501
assert config["smtp_password"] is None or isinstance(config["smtp_password"], str) # noqa: E501
assert config["smtp_to"] is None or isinstance(config["smtp_to"], list)
assert config["notification_lang"] is None or isinstance(
config["notification_lang"], str
)
assert config["notification_lang"] is None or isinstance(config["notification_lang"], str)
assert isinstance(config["store_personal_data"], bool)
assert isinstance(config["max_distance_housing_station"], (int, float))
@ -187,16 +165,10 @@ def validate_config(config, check_with_data):
assert isinstance(config["duplicate_image_hash_threshold"], int)
# API keys
assert config["navitia_api_key"] is None or isinstance(
config["navitia_api_key"], str
) # noqa: E501
assert config["mapbox_api_key"] is None or isinstance(
config["mapbox_api_key"], str
) # noqa: E501
assert config["navitia_api_key"] is None or isinstance(config["navitia_api_key"], str) # noqa: E501
assert config["mapbox_api_key"] is None or isinstance(config["mapbox_api_key"], str) # noqa: E501
assert config["ignore_station"] is None or isinstance(
config["ignore_station"], bool
) # noqa: E501
assert config["ignore_station"] is None or isinstance(config["ignore_station"], bool) # noqa: E501
# Ensure constraints are ok
assert config["constraints"]
@ -234,10 +206,7 @@ def validate_config(config, check_with_data):
# Ensure data is built into db
data.preprocess_data(config, force=False)
# Check postal codes
opendata_postal_codes = [
x.postal_code
for x in data.load_data(PostalCode, constraint, config)
]
opendata_postal_codes = [x.postal_code for x in data.load_data(PostalCode, constraint, config)]
for postal_code in constraint["postal_codes"]:
assert postal_code in opendata_postal_codes # noqa: E501
@ -292,16 +261,13 @@ def load_config(args=None, check_with_data=True):
config_data.update(json.load(fh))
except (IOError, ValueError) as exc:
LOGGER.error(
"Unable to load configuration from file, "
"using default configuration: %s.",
"Unable to load configuration from file, " "using default configuration: %s.",
exc,
)
# Overload config with arguments
if args and getattr(args, "passes", None) is not None:
LOGGER.debug(
"Overloading number of passes from CLI arguments: %d.", args.passes
)
LOGGER.debug("Overloading number of passes from CLI arguments: %d.", args.passes)
config_data["passes"] = args.passes
if args and getattr(args, "max_entries", None) is not None:
LOGGER.debug(
@ -322,9 +288,7 @@ def load_config(args=None, check_with_data=True):
config_data["data_directory"] = args.data_dir
elif config_data["data_directory"] is None:
config_data["data_directory"] = appdirs.user_data_dir("flatisfy", "flatisfy")
LOGGER.debug(
"Using default XDG data directory: %s.", config_data["data_directory"]
)
LOGGER.debug("Using default XDG data directory: %s.", config_data["data_directory"])
if not os.path.isdir(config_data["data_directory"]):
LOGGER.info(
@ -335,14 +299,10 @@ def load_config(args=None, check_with_data=True):
os.makedirs(os.path.join(config_data["data_directory"], "images"))
if config_data["database"] is None:
config_data["database"] = "sqlite:///" + os.path.join(
config_data["data_directory"], "flatisfy.db"
)
config_data["database"] = "sqlite:///" + os.path.join(config_data["data_directory"], "flatisfy.db")
if config_data["search_index"] is None:
config_data["search_index"] = os.path.join(
config_data["data_directory"], "search_index"
)
config_data["search_index"] = os.path.join(config_data["data_directory"], "search_index")
# Handle constraints filtering
if args and getattr(args, "constraints", None) is not None:
@ -354,11 +314,7 @@ def load_config(args=None, check_with_data=True):
args.constraints.replace(",", ", "),
)
constraints_filter = args.constraints.split(",")
config_data["constraints"] = {
k: v
for k, v in config_data["constraints"].items()
if k in constraints_filter
}
config_data["constraints"] = {k: v for k, v in config_data["constraints"].items() if k in constraints_filter}
# Sanitize website url
if config_data["website_url"] is not None:

View File

@ -50,10 +50,7 @@ def preprocess_data(config, force=False):
# Check if a build is required
get_session = database.init_db(config["database"], config["search_index"])
with get_session() as session:
is_built = (
session.query(PublicTransport).count() > 0
and session.query(PostalCode).count() > 0
)
is_built = session.query(PublicTransport).count() > 0 and session.query(PostalCode).count() > 0
if is_built and not force:
# No need to rebuild the database, skip
return False
@ -66,9 +63,7 @@ def preprocess_data(config, force=False):
for preprocess in data_files.PREPROCESSING_FUNCTIONS:
data_objects = preprocess()
if not data_objects:
raise flatisfy.exceptions.DataBuildError(
"Error with %s." % preprocess.__name__
)
raise flatisfy.exceptions.DataBuildError("Error with %s." % preprocess.__name__)
with get_session() as session:
session.add_all(data_objects)
LOGGER.info("Done building data!")

View File

@ -114,19 +114,11 @@ def french_postal_codes_to_quarter(postal_code):
}
subdivision = next(
(
i
for i, departments in department_to_subdivision.items()
if departement in departments
),
(i for i, departments in department_to_subdivision.items() if departement in departments),
None,
)
return next(
(
i
for i, subdivisions in subdivision_to_quarters.items()
if subdivision in subdivisions
),
(i for i, subdivisions in subdivision_to_quarters.items() if subdivision in subdivisions),
None,
)
@ -165,9 +157,7 @@ def _preprocess_laposte():
)
continue
name = normalize_string(
titlecase.titlecase(fields["nom_de_la_commune"]), lowercase=False
)
name = normalize_string(titlecase.titlecase(fields["nom_de_la_commune"]), lowercase=False)
if (fields["code_postal"], name) in seen_postal_codes:
continue
@ -183,9 +173,7 @@ def _preprocess_laposte():
)
)
except KeyError:
LOGGER.info(
"Missing data for postal code %s, skipping it.", fields["code_postal"]
)
LOGGER.info("Missing data for postal code %s, skipping it.", fields["code_postal"])
return postal_codes_data
@ -201,15 +189,11 @@ def _preprocess_public_transport():
for area, data_file in TRANSPORT_DATA_FILES.items():
LOGGER.info("Building from public transport data %s.", data_file)
try:
with io.open(
os.path.join(MODULE_DIR, data_file), "r", encoding="utf-8"
) as fh:
with io.open(os.path.join(MODULE_DIR, data_file), "r", encoding="utf-8") as fh:
filereader = csv.reader(fh)
next(filereader, None) # Skip first row (headers)
for row in filereader:
public_transport_data.append(
PublicTransport(name=row[2], area=area, lat=row[3], lng=row[4])
)
public_transport_data.append(PublicTransport(name=row[2], area=area, lat=row[3], lng=row[4]))
except (IOError, IndexError):
LOGGER.error("Invalid raw opendata file: %s.", data_file)
return []

View File

@ -92,23 +92,17 @@ class IndexService(object):
for model in session.new:
model_class = model.__class__
if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append(
("new", model)
)
self.to_update.setdefault(model_class.__name__, []).append(("new", model))
for model in session.deleted:
model_class = model.__class__
if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append(
("deleted", model)
)
self.to_update.setdefault(model_class.__name__, []).append(("deleted", model))
for model in session.dirty:
model_class = model.__class__
if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append(
("changed", model)
)
self.to_update.setdefault(model_class.__name__, []).append(("changed", model))
def after_commit(self, session):
"""
@ -129,16 +123,11 @@ class IndexService(object):
# added as a new doc. Could probably replace this with a whoosh
# update.
writer.delete_by_term(
primary_field, text_type(getattr(model, primary_field))
)
writer.delete_by_term(primary_field, text_type(getattr(model, primary_field)))
if change_type in ("new", "changed"):
attrs = dict((key, getattr(model, key)) for key in searchable)
attrs = {
attr: text_type(getattr(model, attr))
for attr in attrs.keys()
}
attrs = {attr: text_type(getattr(model, attr)) for attr in attrs.keys()}
attrs[primary_field] = text_type(getattr(model, primary_field))
writer.add_document(**attrs)

View File

@ -16,9 +16,7 @@ from email.utils import formatdate, make_msgid
LOGGER = logging.getLogger(__name__)
def send_email(
server, port, subject, _from, _to, txt, html, username=None, password=None
):
def send_email(server, port, subject, _from, _to, txt, html, username=None, password=None):
"""
Send an email

View File

@ -24,9 +24,7 @@ try:
from weboob.core.ouiboube import WebNip
from weboob.tools.json import WeboobEncoder
except ImportError:
LOGGER.error(
"Weboob is not available on your system. Make sure you " "installed it."
)
LOGGER.error("Weboob is not available on your system. Make sure you " "installed it.")
raise
@ -79,9 +77,7 @@ class WebOOBProxy(object):
self.webnip = WebNip(modules_path=config["modules_path"])
# Create backends
self.backends = [
self.webnip.load_backend(module, module, params={}) for module in backends
]
self.backends = [self.webnip.load_backend(module, module, params={}) for module in backends]
def __enter__(self):
return self
@ -118,18 +114,14 @@ class WebOOBProxy(object):
if not matching_cities:
# If postal code gave no match, warn the user
LOGGER.warn(
"Postal code %s could not be matched with a city.", postal_code
)
LOGGER.warn("Postal code %s could not be matched with a city.", postal_code)
# Remove "TOUTES COMMUNES" entry which are duplicates of the individual
# cities entries in Logicimmo module.
matching_cities = [
city
for city in matching_cities
if not (
city.backend == "logicimmo" and city.name.startswith("TOUTES COMMUNES")
)
if not (city.backend == "logicimmo" and city.name.startswith("TOUTES COMMUNES"))
]
# Then, build queries by grouping cities by at most 3
@ -139,8 +131,7 @@ class WebOOBProxy(object):
try:
query.house_types = [
getattr(HOUSE_TYPES, house_type.upper())
for house_type in constraints_dict["house_types"]
getattr(HOUSE_TYPES, house_type.upper()) for house_type in constraints_dict["house_types"]
]
except AttributeError:
LOGGER.error("Invalid house types constraint.")
@ -193,9 +184,7 @@ class WebOOBProxy(object):
housings.append(json.dumps(housing, cls=WeboobEncoder))
except CallErrors as exc:
# If an error occured, just log it
LOGGER.error(
"An error occured while fetching the housing posts: %s", str(exc)
)
LOGGER.error("An error occured while fetching the housing posts: %s", str(exc))
return housings
def info(self, full_flat_id, store_personal_data=False):
@ -210,9 +199,7 @@ class WebOOBProxy(object):
"""
flat_id, backend_name = full_flat_id.rsplit("@", 1)
try:
backend = next(
backend for backend in self.backends if backend.name == backend_name
)
backend = next(backend for backend in self.backends if backend.name == backend_name)
except StopIteration:
LOGGER.error("Backend %s is not available.", backend_name)
return "{}"
@ -231,9 +218,7 @@ class WebOOBProxy(object):
return json.dumps(housing, cls=WeboobEncoder)
except Exception as exc: # pylint: disable=broad-except
# If an error occured, just log it
LOGGER.error(
"An error occured while fetching housing %s: %s", full_flat_id, str(exc)
)
LOGGER.error("An error occured while fetching housing %s: %s", full_flat_id, str(exc))
return "{}"
@ -253,18 +238,12 @@ def fetch_flats(config):
queries = webOOB_proxy.build_queries(constraint)
housing_posts = []
for query in queries:
housing_posts.extend(
webOOB_proxy.query(
query, config["max_entries"], config["store_personal_data"]
)
)
housing_posts.extend(webOOB_proxy.query(query, config["max_entries"], config["store_personal_data"]))
housing_posts = housing_posts[: config["max_entries"]]
LOGGER.info("Fetched %d flats.", len(housing_posts))
constraint_flats_list = [json.loads(flat) for flat in housing_posts]
constraint_flats_list = [
WebOOBProxy.restore_decimal_fields(flat) for flat in constraint_flats_list
]
constraint_flats_list = [WebOOBProxy.restore_decimal_fields(flat) for flat in constraint_flats_list]
fetched_flats[constraint_name] = constraint_flats_list
return fetched_flats

View File

@ -169,9 +169,7 @@ def detect(flats_list, key="id", merge=True, should_intersect=False):
# Sort matching flats by backend precedence
matching_flats.sort(
key=lambda flat: next(
i
for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE)
if flat["id"].endswith(backend)
i for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE) if flat["id"].endswith(backend)
),
reverse=True,
)
@ -199,9 +197,7 @@ def detect(flats_list, key="id", merge=True, should_intersect=False):
if should_intersect:
# We added some flats twice with the above method, let's deduplicate on
# id.
unique_flats_list, _ = detect(
unique_flats_list, key="id", merge=True, should_intersect=False
)
unique_flats_list, _ = detect(unique_flats_list, key="id", merge=True, should_intersect=False)
return unique_flats_list, duplicate_flats
@ -274,18 +270,14 @@ def get_duplicate_score(flat1, flat2, photo_cache, hash_threshold):
# If the two flats are from the same website and have a
# different float part, consider they cannot be duplicates. See
# https://framagit.org/phyks/Flatisfy/issues/100.
both_are_from_same_backend = (
flat1["id"].split("@")[-1] == flat2["id"].split("@")[-1]
)
both_are_from_same_backend = flat1["id"].split("@")[-1] == flat2["id"].split("@")[-1]
both_have_float_part = (flat1["area"] % 1) > 0 and (flat2["area"] % 1) > 0
both_have_equal_float_part = (flat1["area"] % 1) == (flat2["area"] % 1)
if both_have_float_part and both_are_from_same_backend:
assert both_have_equal_float_part
if flat1.get("photos", []) and flat2.get("photos", []):
n_common_photos = find_number_common_photos(
flat1["photos"], flat2["photos"], photo_cache, hash_threshold
)
n_common_photos = find_number_common_photos(flat1["photos"], flat2["photos"], photo_cache, hash_threshold)
min_number_photos = min(len(flat1["photos"]), len(flat2["photos"]))
@ -332,18 +324,13 @@ def deep_detect(flats_list, config):
if flat2["id"] in matching_flats[flat1["id"]]:
continue
n_common_items = get_duplicate_score(
flat1, flat2, photo_cache, config["duplicate_image_hash_threshold"]
)
n_common_items = get_duplicate_score(flat1, flat2, photo_cache, config["duplicate_image_hash_threshold"])
# Minimal score to consider they are duplicates
if n_common_items >= config["duplicate_threshold"]:
# Mark flats as duplicates
LOGGER.info(
(
"Found duplicates using deep detection: (%s, %s). "
"Score is %d."
),
("Found duplicates using deep detection: (%s, %s). " "Score is %d."),
flat1["id"],
flat2["id"],
n_common_items,
@ -369,9 +356,7 @@ def deep_detect(flats_list, config):
to_merge = sorted(
[flat for flat in flats_list if flat["id"] in matching_flats[flat_id]],
key=lambda flat: next(
i
for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE)
if flat["id"].endswith(backend)
i for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE) if flat["id"].endswith(backend)
),
reverse=True,
)

View File

@ -22,9 +22,7 @@ def download_images(flats_list, config):
:param flats_list: A list of flats dicts.
:param config: A config dict.
"""
photo_cache = ImageCache(
storage_dir=os.path.join(config["data_directory"], "images")
)
photo_cache = ImageCache(storage_dir=os.path.join(config["data_directory"], "images"))
for flat in flats_list:
for photo in flat["photos"]:
# Download photo

View File

@ -97,11 +97,7 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
# Get the matches (normalized strings)
# Keep only ``limit`` matches.
matches = sorted(
[
(choice, len(choice))
for choice in tools.uniqify(unique_normalized_choices)
if choice in normalized_query
],
[(choice, len(choice)) for choice in tools.uniqify(unique_normalized_choices) if choice in normalized_query],
key=lambda x: x[1],
reverse=True,
)
@ -115,11 +111,7 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
# Convert back matches to original strings
# Also filter out matches below threshold
matches = [
(choices[normalized_choices.index(x[0])], x[1])
for x in matches
if x[1] >= threshold
]
matches = [(choices[normalized_choices.index(x[0])], x[1]) for x in matches if x[1] >= threshold]
return matches
@ -135,16 +127,10 @@ def guess_location_position(location, cities, constraint):
# Find associated postal codes
matched_postal_codes = []
for matched_city_name, _ in matched_cities:
postal_code_objects_for_city = [
x for x in cities if x.name == matched_city_name
]
matched_postal_codes.extend(
pc.postal_code for pc in postal_code_objects_for_city
)
postal_code_objects_for_city = [x for x in cities if x.name == matched_city_name]
matched_postal_codes.extend(pc.postal_code for pc in postal_code_objects_for_city)
# Try to match them with postal codes in config constraint
matched_postal_codes_in_config = set(matched_postal_codes) & set(
constraint["postal_codes"]
)
matched_postal_codes_in_config = set(matched_postal_codes) & set(constraint["postal_codes"])
if matched_postal_codes_in_config:
# If there are some matched postal codes which are also in
# config, use them preferentially. This avoid ignoring
@ -158,18 +144,14 @@ def guess_location_position(location, cities, constraint):
# take the city position
for matched_city_name, _ in matched_cities:
postal_code_objects_for_city = [
x
for x in cities
if x.name == matched_city_name and x.postal_code == postal_code
x for x in cities if x.name == matched_city_name and x.postal_code == postal_code
]
if len(postal_code_objects_for_city):
position = {
"lat": postal_code_objects_for_city[0].lat,
"lng": postal_code_objects_for_city[0].lng,
}
LOGGER.debug(
("Found position %s using city %s."), position, matched_city_name
)
LOGGER.debug(("Found position %s using city %s."), position, matched_city_name)
break
return (postal_code, position)
@ -228,30 +210,18 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
# Then fetch position (and postal_code is couldn't be found earlier)
if postal_code:
cities = [
x for x in opendata["postal_codes"] if x.postal_code == postal_code
]
cities = [x for x in opendata["postal_codes"] if x.postal_code == postal_code]
(_, position) = guess_location_position(location, cities, constraint)
else:
(postal_code, position) = guess_location_position(
location, opendata["postal_codes"], constraint
)
(postal_code, position) = guess_location_position(location, opendata["postal_codes"], constraint)
# Check that postal code is not too far from the ones listed in config,
# limit bad fuzzy matching
if postal_code and distance_threshold:
distance = min(
tools.distance(
next(
(x.lat, x.lng)
for x in opendata["postal_codes"]
if x.postal_code == postal_code
),
next(
(x.lat, x.lng)
for x in opendata["postal_codes"]
if x.postal_code == constraint_postal_code
),
next((x.lat, x.lng) for x in opendata["postal_codes"] if x.postal_code == postal_code),
next((x.lat, x.lng) for x in opendata["postal_codes"] if x.postal_code == constraint_postal_code),
)
for constraint_postal_code in constraint["postal_codes"]
)
@ -314,9 +284,7 @@ def guess_stations(flats_list, constraint, config):
if not flat_station:
# Skip everything if empty station
LOGGER.info(
"No stations field for flat %s, skipping stations lookup.", flat["id"]
)
LOGGER.info("No stations field for flat %s, skipping stations lookup.", flat["id"])
continue
# Weboob modules can return several stations in a comma-separated list.
@ -345,22 +313,14 @@ def guess_stations(flats_list, constraint, config):
if postal_code:
# If there is a postal code, check that the matched station is
# closed to it
postal_code_gps = next(
(x.lat, x.lng)
for x in opendata["postal_codes"]
if x.postal_code == postal_code
)
postal_code_gps = next((x.lat, x.lng) for x in opendata["postal_codes"] if x.postal_code == postal_code)
for station in matched_stations:
# Note that multiple stations with the same name exist in a
# city, hence the list of stations objects for a given matching
# station name.
stations_objects = [
x for x in opendata["stations"] if x.name == station[0]
]
stations_objects = [x for x in opendata["stations"] if x.name == station[0]]
for station_data in stations_objects:
distance = tools.distance(
(station_data.lat, station_data.lng), postal_code_gps
)
distance = tools.distance((station_data.lat, station_data.lng), postal_code_gps)
if distance < distance_threshold:
# If at least one of the coordinates for a given
# station is close enough, that's ok and we can add
@ -375,19 +335,14 @@ def guess_stations(flats_list, constraint, config):
)
break
LOGGER.info(
(
"Station %s is too far from flat %s (%dm > %dm), "
"discarding this station."
),
("Station %s is too far from flat %s (%dm > %dm), " "discarding this station."),
station[0],
flat["id"],
int(distance),
int(distance_threshold),
)
else:
LOGGER.info(
"No postal code for flat %s, skipping stations detection.", flat["id"]
)
LOGGER.info("No postal code for flat %s, skipping stations detection.", flat["id"])
if not good_matched_stations:
# No stations found, log it and cotninue with next housing
@ -460,8 +415,7 @@ def compute_travel_times(flats_list, constraint, config):
station["gps"], place["gps"], TimeToModes[mode], config
)
if time_from_station_dict and (
time_from_station_dict["time"] < time_to_place_dict
or time_to_place_dict is None
time_from_station_dict["time"] < time_to_place_dict or time_to_place_dict is None
):
# If starting from this station makes the route to the
# specified place shorter, update

View File

@ -182,22 +182,14 @@ class Flat(BASE):
# Handle flatisfy metadata
flat_dict = flat_dict.copy()
if "flatisfy" in flat_dict:
flat_dict["flatisfy_stations"] = flat_dict["flatisfy"].get(
"matched_stations", []
)
flat_dict["flatisfy_postal_code"] = flat_dict["flatisfy"].get(
"postal_code", None
)
flat_dict["flatisfy_stations"] = flat_dict["flatisfy"].get("matched_stations", [])
flat_dict["flatisfy_postal_code"] = flat_dict["flatisfy"].get("postal_code", None)
flat_dict["flatisfy_position"] = flat_dict["flatisfy"].get("position", None)
flat_dict["flatisfy_time_to"] = flat_dict["flatisfy"].get("time_to", {})
flat_dict["flatisfy_constraint"] = flat_dict["flatisfy"].get(
"constraint", "default"
)
flat_dict["flatisfy_constraint"] = flat_dict["flatisfy"].get("constraint", "default")
del flat_dict["flatisfy"]
flat_dict = {
k: v for k, v in flat_dict.items() if k in inspect(Flat).columns.keys()
}
flat_dict = {k: v for k, v in flat_dict.items() if k in inspect(Flat).columns.keys()}
return Flat(**flat_dict)
def __repr__(self):

View File

@ -65,9 +65,7 @@ class TestTexts(unittest.TestCase):
tools.convert_arabic_to_roman_in_text("Dans le 15e arrondissement"),
)
self.assertEqual(
"XXeme arr.", tools.convert_arabic_to_roman_in_text("20eme arr.")
)
self.assertEqual("XXeme arr.", tools.convert_arabic_to_roman_in_text("20eme arr."))
self.assertEqual(
"A AIX EN PROVENCE",
@ -121,25 +119,19 @@ class TestPhoneNumbers(unittest.TestCase):
"""
Checks phone numbers with international prefixes.
"""
self.assertEqual(
"0605040302", duplicates.homogeneize_phone_number("+33605040302")
)
self.assertEqual("0605040302", duplicates.homogeneize_phone_number("+33605040302"))
def test_dots_separators(self):
"""
Checks phone numbers with dots.
"""
self.assertEqual(
"0605040302", duplicates.homogeneize_phone_number("06.05.04.03.02")
)
self.assertEqual("0605040302", duplicates.homogeneize_phone_number("06.05.04.03.02"))
def test_spaces_separators(self):
"""
Checks phone numbers with spaces.
"""
self.assertEqual(
"0605040302", duplicates.homogeneize_phone_number("06 05 04 03 02")
)
self.assertEqual("0605040302", duplicates.homogeneize_phone_number("06 05 04 03 02"))
class TestPhotos(unittest.TestCase):
@ -157,11 +149,7 @@ class TestPhotos(unittest.TestCase):
"""
photo = {"url": TESTS_DATA_DIR + "127028739@seloger.jpg"}
self.assertTrue(
duplicates.compare_photos(
photo, photo, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
)
self.assertTrue(duplicates.compare_photos(photo, photo, self.IMAGE_CACHE, self.HASH_THRESHOLD))
def test_different_photos(self):
"""
@ -256,9 +244,7 @@ class TestImageCache(unittest.TestCase):
"""
def __init__(self, *args, **kwargs):
self.IMAGE_CACHE = ImageCache( # pylint: disable=invalid-name
storage_dir=tempfile.mkdtemp(prefix="flatisfy-")
)
self.IMAGE_CACHE = ImageCache(storage_dir=tempfile.mkdtemp(prefix="flatisfy-")) # pylint: disable=invalid-name
super(TestImageCache, self).__init__(*args, **kwargs)
def test_invalid_url(self):
@ -297,9 +283,7 @@ class TestDuplicates(unittest.TestCase):
"""
Generates a fake flat post.
"""
backend = BACKENDS_BY_PRECEDENCE[
random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)
]
backend = BACKENDS_BY_PRECEDENCE[random.randint(0, len(BACKENDS_BY_PRECEDENCE) - 1)]
return {
"id": str(random.randint(100000, 199999)) + "@" + backend,
"phone": "0607080910",
@ -331,9 +315,7 @@ class TestDuplicates(unittest.TestCase):
"""
flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1)
score = duplicates.get_duplicate_score(
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
score = duplicates.get_duplicate_score(flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD)
self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_different_prices(self):
@ -344,9 +326,7 @@ class TestDuplicates(unittest.TestCase):
flat2 = copy.deepcopy(flat1)
flat2["cost"] += 1000
score = duplicates.get_duplicate_score(
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
score = duplicates.get_duplicate_score(flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_different_rooms(self):
@ -358,9 +338,7 @@ class TestDuplicates(unittest.TestCase):
flat2 = copy.deepcopy(flat1)
flat2["rooms"] += 1
score = duplicates.get_duplicate_score(
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
score = duplicates.get_duplicate_score(flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_different_areas(self):
@ -371,9 +349,7 @@ class TestDuplicates(unittest.TestCase):
flat2 = copy.deepcopy(flat1)
flat2["area"] += 10
score = duplicates.get_duplicate_score(
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
score = duplicates.get_duplicate_score(flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_different_areas_decimals(self):
@ -386,9 +362,7 @@ class TestDuplicates(unittest.TestCase):
flat1["area"] = 50.65
flat2["area"] = 50.37
score = duplicates.get_duplicate_score(
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
score = duplicates.get_duplicate_score(flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_different_phones(self):
@ -400,9 +374,7 @@ class TestDuplicates(unittest.TestCase):
flat2 = copy.deepcopy(flat1)
flat2["phone"] = "0708091011"
score = duplicates.get_duplicate_score(
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
score = duplicates.get_duplicate_score(flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
def test_real_duplicates(self):
@ -412,9 +384,7 @@ class TestDuplicates(unittest.TestCase):
"""
flats = self.load_files("127028739@seloger", "14428129@explorimmo")
score = duplicates.get_duplicate_score(
flats[0], flats[1], self.IMAGE_CACHE, self.HASH_THRESHOLD
)
score = duplicates.get_duplicate_score(flats[0], flats[1], self.IMAGE_CACHE, self.HASH_THRESHOLD)
self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITH_PHOTOS)
# TODO: fixme, find new testing examples

View File

@ -90,9 +90,7 @@ def convert_arabic_to_roman_in_text(text):
:returns: The corresponding text with roman literals converted to
arabic.
"""
return re.sub(
r"(\d+)", lambda matchobj: convert_arabic_to_roman(matchobj.group(0)), text
)
return re.sub(r"(\d+)", lambda matchobj: convert_arabic_to_roman(matchobj.group(0)), text)
def hash_dict(func):
@ -155,9 +153,7 @@ def pretty_json(data):
"toto": "ok"
}
"""
return json.dumps(
data, cls=DateAwareJSONEncoder, indent=4, separators=(",", ": "), sort_keys=True
)
return json.dumps(data, cls=DateAwareJSONEncoder, indent=4, separators=(",", ": "), sort_keys=True)
def batch(iterable, size):
@ -296,10 +292,7 @@ def distance(gps1, gps2):
long2 = math.radians(gps2[1])
# pylint: disable=locally-disabled,invalid-name
a = (
math.sin((lat2 - lat1) / 2.0) ** 2
+ math.cos(lat1) * math.cos(lat2) * math.sin((long2 - long1) / 2.0) ** 2
)
a = math.sin((lat2 - lat1) / 2.0) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin((long2 - long1) / 2.0) ** 2
c = 2.0 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
earth_radius = 6371000
@ -329,9 +322,7 @@ def merge_dicts(*args):
if len(args) == 1:
return args[0]
flat1, flat2 = args[
:2
] # pylint: disable=locally-disabled,unbalanced-tuple-unpacking,line-too-long
flat1, flat2 = args[:2] # pylint: disable=locally-disabled,unbalanced-tuple-unpacking,line-too-long
merged_flat = {}
for k, value2 in flat2.items():
value1 = flat1.get(k, None)
@ -408,9 +399,7 @@ def get_travel_time_between(latlng_from, latlng_to, mode, config):
sections.append(
{
"geojson": section["geojson"],
"color": (
section["display_informations"].get("color", None)
),
"color": (section["display_informations"].get("color", None)),
}
)
elif section["type"] == "street_network":
@ -427,8 +416,7 @@ def get_travel_time_between(latlng_from, latlng_to, mode, config):
) as exc:
# Ignore any possible exception
LOGGER.warning(
"An exception occurred during travel time lookup on "
"Navitia: %s.",
"An exception occurred during travel time lookup on " "Navitia: %s.",
str(exc),
)
else:
@ -467,9 +455,7 @@ def get_travel_time_between(latlng_from, latlng_to, mode, config):
route = response.geojson()["features"][0]
# Fix longitude/latitude inversion in geojson output
geometry = route["geometry"]
geometry["coordinates"] = [
(x[1], x[0]) for x in geometry["coordinates"]
]
geometry["coordinates"] = [(x[1], x[0]) for x in geometry["coordinates"]]
sections = [{"geojson": geometry, "color": "000"}]
travel_time = route["properties"]["duration"]
except (requests.exceptions.RequestException, IndexError, KeyError) as exc:

View File

@ -28,9 +28,7 @@ class QuietWSGIRefServer(bottle.WSGIRefServer):
quiet = True
def run(self, app):
app.log.info(
"Server is now up and ready! Listening on %s:%s." % (self.host, self.port)
)
app.log.info("Server is now up and ready! Listening on %s:%s." % (self.host, self.port))
super(QuietWSGIRefServer, self).run(app)
@ -61,11 +59,7 @@ def get_app(config):
app.install(canister.Canister())
# Use DateAwareJSONEncoder to dump JSON strings
# From http://stackoverflow.com/questions/21282040/bottle-framework-how-to-return-datetime-in-json-response#comment55718456_21282666. pylint: disable=locally-disabled,line-too-long
app.install(
bottle.JSONPlugin(
json_dumps=functools.partial(json.dumps, cls=DateAwareJSONEncoder)
)
)
app.install(bottle.JSONPlugin(json_dumps=functools.partial(json.dumps, cls=DateAwareJSONEncoder)))
# Enable CORS
@app.hook("after_request")
@ -76,9 +70,7 @@ def get_app(config):
# The str() call is required as we import unicode_literal and WSGI
# headers list should have plain str type.
bottle.response.headers[str("Access-Control-Allow-Origin")] = str("*")
bottle.response.headers[str("Access-Control-Allow-Methods")] = str(
"PUT, GET, POST, DELETE, OPTIONS, PATCH"
)
bottle.response.headers[str("Access-Control-Allow-Methods")] = str("PUT, GET, POST, DELETE, OPTIONS, PATCH")
bottle.response.headers[str("Access-Control-Allow-Headers")] = str(
"Origin, Accept, Content-Type, X-Requested-With, X-CSRF-Token"
)
@ -86,9 +78,7 @@ def get_app(config):
# API v1 routes
app.route("/api/v1", ["GET", "OPTIONS"], api_routes.index_v1)
app.route(
"/api/v1/time_to_places", ["GET", "OPTIONS"], api_routes.time_to_places_v1
)
app.route("/api/v1/time_to_places", ["GET", "OPTIONS"], api_routes.time_to_places_v1)
app.route("/api/v1/flats", ["GET", "OPTIONS"], api_routes.flats_v1)
app.route("/api/v1/flats/:flat_id", ["GET", "OPTIONS"], api_routes.flat_v1)
@ -130,9 +120,7 @@ def get_app(config):
app.route(
"/data/img/<filename:path>",
"GET",
lambda filename: bottle.static_file(
filename, root=os.path.join(config["data_directory"], "images")
),
lambda filename: bottle.static_file(filename, root=os.path.join(config["data_directory"], "images")),
)
return app

View File

@ -83,9 +83,7 @@ def _JSONApiSpec(query, model, default_sorting=None):
try:
sorting.append(getattr(model, default_sorting))
except AttributeError:
raise ValueError(
"Invalid default sorting key provided: {}.".format(default_sorting)
)
raise ValueError("Invalid default sorting key provided: {}.".format(default_sorting))
return filters, page_number, page_size, sorting
@ -104,9 +102,7 @@ def _serialize_flat(flat, config):
postal_codes = {}
for constraint_name, constraint in config["constraints"].items():
postal_codes[constraint_name] = flatisfy.data.load_data(
PostalCode, constraint, config
)
postal_codes[constraint_name] = flatisfy.data.load_data(PostalCode, constraint, config)
try:
assert flat["flatisfy_postal_code"]
@ -287,9 +283,7 @@ def time_to_places_v1(config):
try:
places = {}
for constraint_name, constraint in config["constraints"].items():
places[constraint_name] = {
k: v["gps"] for k, v in constraint["time_to"].items()
}
places[constraint_name] = {k: v["gps"] for k, v in constraint["time_to"].items()}
return {"data": places}
except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc))
@ -342,11 +336,7 @@ def search_v1(db, config):
except ValueError as exc:
return JSONError(400, str(exc))
flats_db_query = (
flat_model.Flat.search_query(db, query)
.filter_by(**filters)
.order_by(*sorting)
)
flats_db_query = flat_model.Flat.search_query(db, query).filter_by(**filters).order_by(*sorting)
flats = [
_serialize_flat(flat, config)
for flat in itertools.islice(
@ -381,9 +371,7 @@ def ics_feed_v1(config, db):
cal = vobject.iCalendar()
try:
flats_with_visits = db.query(flat_model.Flat).filter(
flat_model.Flat.visit_date.isnot(None)
)
flats_with_visits = db.query(flat_model.Flat).filter(flat_model.Flat.visit_date.isnot(None))
for flat in flats_with_visits:
vevent = cal.add("vevent")

10
wsgi.py
View File

@ -13,9 +13,7 @@ from flatisfy.web import app as web_app
class Args:
config = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "config/config.json"
)
config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config/config.json")
LOGGER = logging.getLogger("flatisfy")
@ -23,11 +21,7 @@ LOGGER = logging.getLogger("flatisfy")
CONFIG = flatisfy.config.load_config(Args())
if CONFIG is None:
LOGGER.error(
"Invalid configuration. Exiting. "
"Run init-config before if this is the first time "
"you run Flatisfy."
)
LOGGER.error("Invalid configuration. Exiting. Run init-config before if this is the first time you run Flatisfy.")
sys.exit(1)