reformat with black

This commit is contained in:
Gautier P 2021-01-26 14:39:52 +01:00
parent 9c5afac91c
commit 42909bd46f
30 changed files with 860 additions and 973 deletions

View File

@ -18,7 +18,8 @@
import os
import sys
sys.path.insert(0, os.path.abspath('..'))
sys.path.insert(0, os.path.abspath(".."))
# -- General configuration ------------------------------------------------
@ -30,19 +31,19 @@ sys.path.insert(0, os.path.abspath('..'))
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.viewcode',
"sphinx.ext.autodoc",
"sphinx.ext.viewcode",
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
templates_path = ["_templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = ['.rst', '.md']
source_suffix = [".rst", ".md"]
source_parsers = {
'.md': 'recommonmark.parser.CommonMarkParser',
".md": "recommonmark.parser.CommonMarkParser",
}
# The encoding of source files.
@ -50,21 +51,21 @@ source_parsers = {
# source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
master_doc = "index"
# General information about the project.
project = u'Flatisfy'
copyright = u'2017, Phyks (Lucas Verney)'
author = u'Phyks (Lucas Verney)'
project = u"Flatisfy"
copyright = u"2017, Phyks (Lucas Verney)"
author = u"Phyks (Lucas Verney)"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = u'0.1'
version = u"0.1"
# The full version, including alpha/beta/rc tags.
release = u'0.1'
release = u"0.1"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@ -85,7 +86,7 @@ language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# The reST default role (used for this markup: `text`) to use for all
# documents.
@ -107,7 +108,7 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
# modindex_common_prefix = []
@ -124,7 +125,7 @@ todo_include_todos = False
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'classic'
html_theme = "classic"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
@ -158,7 +159,7 @@ html_theme = 'classic'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ["_static"]
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
@ -238,34 +239,36 @@ html_static_path = ['_static']
# html_search_scorer = 'scorer.js'
# Output file base name for HTML help builder.
htmlhelp_basename = 'Flatisfydoc'
htmlhelp_basename = "Flatisfydoc"
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'Flatisfy.tex', u'Flatisfy Documentation',
u'Phyks (Lucas Verney)', 'manual'),
(
master_doc,
"Flatisfy.tex",
u"Flatisfy Documentation",
u"Phyks (Lucas Verney)",
"manual",
),
]
# The name of an image file (relative to this directory) to place at the top of
@ -305,10 +308,7 @@ latex_documents = [
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'flatisfy', u'Flatisfy Documentation',
[author], 1)
]
man_pages = [(master_doc, "flatisfy", u"Flatisfy Documentation", [author], 1)]
# If true, show URL addresses after external links.
#
@ -321,9 +321,15 @@ man_pages = [
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'Flatisfy', u'Flatisfy Documentation',
author, 'Flatisfy', 'One line description of project.',
'Miscellaneous'),
(
master_doc,
"Flatisfy",
u"Flatisfy Documentation",
author,
"Flatisfy",
"One line description of project.",
"Miscellaneous",
),
]
# Documents to append as an appendix to all manuals.

View File

@ -17,6 +17,7 @@ from flatisfy import data
from flatisfy import fetch
from flatisfy import tools
from flatisfy import tests
# pylint: enable=locally-disabled,wrong-import-position
@ -27,68 +28,59 @@ def parse_args(argv=None):
"""
Create parser and parse arguments.
"""
parser = argparse.ArgumentParser(prog="Flatisfy",
description="Find the perfect flat.")
parser = argparse.ArgumentParser(
prog="Flatisfy", description="Find the perfect flat."
)
# Parent parser containing arguments common to any subcommand
parent_parser = argparse.ArgumentParser(add_help=False)
parent_parser.add_argument(
"--data-dir",
help="Location of Flatisfy data directory."
"--data-dir", help="Location of Flatisfy data directory."
)
parent_parser.add_argument("--config", help="Configuration file to use.")
parent_parser.add_argument(
"--passes",
choices=[0, 1, 2, 3],
type=int,
help="Number of passes to do on the filtered data.",
)
parent_parser.add_argument(
"--config",
help="Configuration file to use."
"--max-entries", type=int, help="Maximum number of entries to fetch."
)
parent_parser.add_argument(
"--passes", choices=[0, 1, 2, 3], type=int,
help="Number of passes to do on the filtered data."
"-v", "--verbose", action="store_true", help="Verbose logging output."
)
parent_parser.add_argument("-vv", action="store_true", help="Debug logging output.")
parent_parser.add_argument(
"--max-entries", type=int,
help="Maximum number of entries to fetch."
)
parent_parser.add_argument(
"-v", "--verbose", action="store_true",
help="Verbose logging output."
)
parent_parser.add_argument(
"-vv", action="store_true",
help="Debug logging output."
)
parent_parser.add_argument(
"--constraints", type=str,
help="Comma-separated list of constraints to consider."
"--constraints",
type=str,
help="Comma-separated list of constraints to consider.",
)
# Subcommands
subparsers = parser.add_subparsers(
dest="cmd", help="Available subcommands"
)
subparsers = parser.add_subparsers(dest="cmd", help="Available subcommands")
# Build data subcommand
subparsers.add_parser(
"build-data", parents=[parent_parser],
help="Build necessary data"
"build-data", parents=[parent_parser], help="Build necessary data"
)
# Init config subcommand
parser_init_config = subparsers.add_parser(
"init-config", parents=[parent_parser],
help="Initialize empty configuration."
"init-config", parents=[parent_parser], help="Initialize empty configuration."
)
parser_init_config.add_argument(
"output", nargs="?", help="Output config file. Use '-' for stdout."
)
# Fetch subcommand parser
subparsers.add_parser("fetch", parents=[parent_parser],
help="Fetch housings posts")
subparsers.add_parser("fetch", parents=[parent_parser], help="Fetch housings posts")
# Filter subcommand parser
parser_filter = subparsers.add_parser(
"filter", parents=[parent_parser],
help="Filter housings posts according to constraints in config."
"filter",
parents=[parent_parser],
help="Filter housings posts according to constraints in config.",
)
parser_filter.add_argument(
"--input",
@ -97,34 +89,31 @@ def parse_args(argv=None):
"no additional fetching of infos is done, and the script outputs "
"a filtered JSON dump on stdout. If not provided, update status "
"of the flats in the database."
)
),
)
# Import subcommand parser
import_filter = subparsers.add_parser(
"import", parents=[parent_parser],
help="Import housing posts in database.")
"import", parents=[parent_parser], help="Import housing posts in database."
)
import_filter.add_argument(
"--new-only",
action="store_true",
help=(
"Download new housing posts only but do not refresh existing ones"
)
help=("Download new housing posts only but do not refresh existing ones"),
)
# Purge subcommand parser
subparsers.add_parser("purge", parents=[parent_parser],
help="Purge database.")
subparsers.add_parser("purge", parents=[parent_parser], help="Purge database.")
# Serve subcommand parser
parser_serve = subparsers.add_parser("serve", parents=[parent_parser],
help="Serve the web app.")
parser_serve = subparsers.add_parser(
"serve", parents=[parent_parser], help="Serve the web app."
)
parser_serve.add_argument("--port", type=int, help="Port to bind to.")
parser_serve.add_argument("--host", help="Host to listen on.")
# Test subcommand parser
subparsers.add_parser("test", parents=[parent_parser],
help="Unit testing.")
subparsers.add_parser("test", parents=[parent_parser], help="Unit testing.")
return parser.parse_args(argv)
@ -139,15 +128,15 @@ def main():
# Set logger
if args.vv:
logging.getLogger('').setLevel(logging.DEBUG)
logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG)
logging.getLogger("").setLevel(logging.DEBUG)
logging.getLogger("sqlalchemy.engine").setLevel(logging.DEBUG)
elif args.verbose:
logging.getLogger('').setLevel(logging.INFO)
logging.getLogger("").setLevel(logging.INFO)
# sqlalchemy INFO level is way too loud, just stick with WARNING
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
else:
logging.getLogger('').setLevel(logging.WARNING)
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
logging.getLogger("").setLevel(logging.WARNING)
logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
# Init-config command
if args.cmd == "init-config":
@ -161,9 +150,11 @@ def main():
else:
config = flatisfy.config.load_config(args, check_with_data=True)
if config is None:
LOGGER.error("Invalid configuration. Exiting. "
"Run init-config before if this is the first time "
"you run Flatisfy.")
LOGGER.error(
"Invalid configuration. Exiting. "
"Run init-config before if this is the first time "
"you run Flatisfy."
)
sys.exit(1)
# Purge command
@ -179,18 +170,16 @@ def main():
if args.cmd == "fetch":
# Fetch and filter flats list
fetched_flats = fetch.fetch_flats(config)
fetched_flats = cmds.filter_fetched_flats(config,
fetched_flats=fetched_flats,
fetch_details=True)
fetched_flats = cmds.filter_fetched_flats(
config, fetched_flats=fetched_flats, fetch_details=True
)
# Sort by cost
fetched_flats = {
k: tools.sort_list_of_dicts_by(v["new"], "cost")
for k, v in fetched_flats.items()
}
print(
tools.pretty_json(fetched_flats)
)
print(tools.pretty_json(fetched_flats))
return
# Filter command
elif args.cmd == "filter":
@ -199,9 +188,7 @@ def main():
fetched_flats = fetch.load_flats_from_file(args.input, config)
fetched_flats = cmds.filter_fetched_flats(
config,
fetched_flats=fetched_flats,
fetch_details=False
config, fetched_flats=fetched_flats, fetch_details=False
)
# Sort by cost
@ -211,9 +198,7 @@ def main():
}
# Output to stdout
print(
tools.pretty_json(fetched_flats)
)
print(tools.pretty_json(fetched_flats))
else:
cmds.import_and_filter(config, load_from_db=True)
return

View File

@ -23,7 +23,9 @@ import time
LOGGER = logging.getLogger(__name__)
def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, past_flats=None):
def filter_flats_list(
config, constraint_name, flats_list, fetch_details=True, past_flats=None
):
"""
Filter the available flats list. Then, filter it according to criteria.
@ -45,13 +47,9 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
except KeyError:
LOGGER.error(
"Missing constraint %s. Skipping filtering for these posts.",
constraint_name
constraint_name,
)
return {
"new": [],
"duplicate": [],
"ignored": []
}
return {"new": [], "duplicate": [], "ignored": []}
first_pass_result = collections.defaultdict(list)
second_pass_result = collections.defaultdict(list)
@ -59,9 +57,7 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
# Do a first pass with the available infos to try to remove as much
# unwanted postings as possible
if config["passes"] > 0:
first_pass_result = flatisfy.filters.first_pass(flats_list,
constraint,
config)
first_pass_result = flatisfy.filters.first_pass(flats_list, constraint, config)
else:
first_pass_result["new"] = flats_list
@ -95,8 +91,7 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
# Do a third pass to deduplicate better
if config["passes"] > 2:
third_pass_result = flatisfy.filters.third_pass(
second_pass_result["new"],
config
second_pass_result["new"], config
)
else:
third_pass_result["new"] = second_pass_result["new"]
@ -104,15 +99,15 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
return {
"new": third_pass_result["new"],
"duplicate": (
first_pass_result["duplicate"] +
second_pass_result["duplicate"] +
third_pass_result["duplicate"]
first_pass_result["duplicate"]
+ second_pass_result["duplicate"]
+ third_pass_result["duplicate"]
),
"ignored": (
first_pass_result["ignored"] +
second_pass_result["ignored"] +
third_pass_result["ignored"]
)
first_pass_result["ignored"]
+ second_pass_result["ignored"]
+ third_pass_result["ignored"]
),
}
@ -134,7 +129,7 @@ def filter_fetched_flats(config, fetched_flats, fetch_details=True, past_flats={
constraint_name,
flats_list,
fetch_details,
past_flats.get(constraint_name, None)
past_flats.get(constraint_name, None),
)
return fetched_flats
@ -156,9 +151,12 @@ def import_and_filter(config, load_from_db=False, new_only=False):
else:
fetched_flats = fetch.fetch_flats(config)
# Do not fetch additional details if we loaded data from the db.
flats_by_status = filter_fetched_flats(config, fetched_flats=fetched_flats,
fetch_details=(not load_from_db),
past_flats=past_flats if new_only else {})
flats_by_status = filter_fetched_flats(
config,
fetched_flats=fetched_flats,
fetch_details=(not load_from_db),
past_flats=past_flats if new_only else {},
)
# Create database connection
get_session = database.init_db(config["database"], config["search_index"])
@ -175,7 +173,7 @@ def import_and_filter(config, load_from_db=False, new_only=False):
# Set is_expired to true for all existing flats.
# This will be set back to false if we find them during importing.
for flat in session.query(flat_model.Flat).all():
flat.is_expired = True;
flat.is_expired = True
for status, flats_list in flatten_flats_by_status.items():
# Build SQLAlchemy Flat model objects for every available flat
@ -195,9 +193,7 @@ def import_and_filter(config, load_from_db=False, new_only=False):
# status if the user defined it
flat_object = flats_objects[each.id]
if each.status in flat_model.AUTOMATED_STATUSES:
flat_object.status = getattr(
flat_model.FlatStatus, status
)
flat_object.status = getattr(flat_model.FlatStatus, status)
else:
flat_object.status = each.status
@ -223,11 +219,8 @@ def import_and_filter(config, load_from_db=False, new_only=False):
LOGGER.info(f"Found {len(new_flats)} new flats.")
# Touch a file to indicate last update timestamp
ts_file = os.path.join(
config["data_directory"],
"timestamp"
)
with open(ts_file, 'w'):
ts_file = os.path.join(config["data_directory"], "timestamp")
with open(ts_file, "w"):
os.utime(ts_file, None)
LOGGER.info("Done!")
@ -270,5 +263,8 @@ def serve(config):
# standard logging
server = web_app.QuietWSGIRefServer
print("Launching web viewer running on http://%s:%s" % (config["host"], config["port"]))
print(
"Launching web viewer running on http://%s:%s"
% (config["host"], config["port"])
)
app.run(host=config["host"], port=config["port"], server=server)

View File

@ -30,7 +30,7 @@ DEFAULT_CONFIG = {
"default": {
"type": None, # RENT, SALE, SHARING
"house_types": [], # List of house types, must be in APART, HOUSE,
# PARKING, LAND, OTHER or UNKNOWN
# PARKING, LAND, OTHER or UNKNOWN
"postal_codes": [], # List of postal codes
"area": (None, None), # (min, max) in m^2
"cost": (None, None), # (min, max) in currency unit
@ -42,12 +42,12 @@ DEFAULT_CONFIG = {
"vendu",
"Vendu",
"VENDU",
"recherche"
"recherche",
],
"time_to": {} # Dict mapping names to {"gps": [lat, lng],
# "time": (min, max),
# "mode": Valid mode }
# Time is in seconds
# "time": (min, max),
# "mode": Valid mode }
# Time is in seconds
}
},
# Whether or not to store personal data from housing posts (phone number
@ -91,7 +91,7 @@ DEFAULT_CONFIG = {
"backends": None,
# Should email notifications be sent?
"send_email": False,
"smtp_server": 'localhost',
"smtp_server": "localhost",
"smtp_port": 25,
"smtp_username": None,
"smtp_password": None,
@ -115,6 +115,7 @@ def validate_config(config, check_with_data):
check the config values.
:return: ``True`` if the configuration is valid, ``False`` otherwise.
"""
def _check_constraints_bounds(bounds):
"""
Check the bounds for numeric constraints.
@ -122,12 +123,7 @@ def validate_config(config, check_with_data):
assert isinstance(bounds, list)
assert len(bounds) == 2
assert all(
x is None or
(
isinstance(x, (float, int)) and
x >= 0
)
for x in bounds
x is None or (isinstance(x, (float, int)) and x >= 0) for x in bounds
)
if bounds[0] is not None and bounds[1] is not None:
assert bounds[1] > bounds[0]
@ -140,25 +136,45 @@ def validate_config(config, check_with_data):
# pylint: disable=locally-disabled,line-too-long
assert config["passes"] in [0, 1, 2, 3]
assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0) # noqa: E501
assert config["max_entries"] is None or (
isinstance(config["max_entries"], int) and config["max_entries"] > 0
) # noqa: E501
assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501
assert config["data_directory"] is None or isinstance(
config["data_directory"], str
) # noqa: E501
assert os.path.isdir(config["data_directory"])
assert isinstance(config["search_index"], str)
assert config["modules_path"] is None or isinstance(config["modules_path"], str) # noqa: E501
assert config["modules_path"] is None or isinstance(
config["modules_path"], str
) # noqa: E501
assert config["database"] is None or isinstance(config["database"], str) # noqa: E501
assert config["database"] is None or isinstance(
config["database"], str
) # noqa: E501
assert isinstance(config["port"], int)
assert isinstance(config["host"], str)
assert config["webserver"] is None or isinstance(config["webserver"], str) # noqa: E501
assert config["backends"] is None or isinstance(config["backends"], list) # noqa: E501
assert config["webserver"] is None or isinstance(
config["webserver"], str
) # noqa: E501
assert config["backends"] is None or isinstance(
config["backends"], list
) # noqa: E501
assert isinstance(config["send_email"], bool)
assert config["smtp_server"] is None or isinstance(config["smtp_server"], str) # noqa: E501
assert config["smtp_port"] is None or isinstance(config["smtp_port"], int) # noqa: E501
assert config["smtp_username"] is None or isinstance(config["smtp_username"], str) # noqa: E501
assert config["smtp_password"] is None or isinstance(config["smtp_password"], str) # noqa: E501
assert config["smtp_server"] is None or isinstance(
config["smtp_server"], str
) # noqa: E501
assert config["smtp_port"] is None or isinstance(
config["smtp_port"], int
) # noqa: E501
assert config["smtp_username"] is None or isinstance(
config["smtp_username"], str
) # noqa: E501
assert config["smtp_password"] is None or isinstance(
config["smtp_password"], str
) # noqa: E501
assert config["smtp_to"] is None or isinstance(config["smtp_to"], list)
assert isinstance(config["store_personal_data"], bool)
@ -167,10 +183,16 @@ def validate_config(config, check_with_data):
assert isinstance(config["duplicate_image_hash_threshold"], int)
# API keys
assert config["navitia_api_key"] is None or isinstance(config["navitia_api_key"], str) # noqa: E501
assert config["mapbox_api_key"] is None or isinstance(config["mapbox_api_key"], str) # noqa: E501
assert config["navitia_api_key"] is None or isinstance(
config["navitia_api_key"], str
) # noqa: E501
assert config["mapbox_api_key"] is None or isinstance(
config["mapbox_api_key"], str
) # noqa: E501
assert config["ignore_station"] is None or isinstance(config["ignore_station"], bool) # noqa: E501
assert config["ignore_station"] is None or isinstance(
config["ignore_station"], bool
) # noqa: E501
# Ensure constraints are ok
assert config["constraints"]
@ -191,8 +213,7 @@ def validate_config(config, check_with_data):
assert isinstance(term, str)
assert "description_should_not_contain" in constraint
assert isinstance(constraint["description_should_not_contain"],
list)
assert isinstance(constraint["description_should_not_contain"], list)
if constraint["description_should_not_contain"]:
for term in constraint["description_should_not_contain"]:
assert isinstance(term, str)
@ -269,20 +290,19 @@ def load_config(args=None, check_with_data=True):
LOGGER.error(
"Unable to load configuration from file, "
"using default configuration: %s.",
exc
exc,
)
# Overload config with arguments
if args and getattr(args, "passes", None) is not None:
LOGGER.debug(
"Overloading number of passes from CLI arguments: %d.",
args.passes
"Overloading number of passes from CLI arguments: %d.", args.passes
)
config_data["passes"] = args.passes
if args and getattr(args, "max_entries", None) is not None:
LOGGER.debug(
"Overloading maximum number of entries from CLI arguments: %d.",
args.max_entries
args.max_entries,
)
config_data["max_entries"] = args.max_entries
if args and getattr(args, "port", None) is not None:
@ -297,37 +317,37 @@ def load_config(args=None, check_with_data=True):
LOGGER.debug("Overloading data directory from CLI arguments.")
config_data["data_directory"] = args.data_dir
elif config_data["data_directory"] is None:
config_data["data_directory"] = appdirs.user_data_dir(
"flatisfy",
"flatisfy"
config_data["data_directory"] = appdirs.user_data_dir("flatisfy", "flatisfy")
LOGGER.debug(
"Using default XDG data directory: %s.", config_data["data_directory"]
)
LOGGER.debug("Using default XDG data directory: %s.",
config_data["data_directory"])
if not os.path.isdir(config_data["data_directory"]):
LOGGER.info("Creating data directory according to config: %s",
config_data["data_directory"])
LOGGER.info(
"Creating data directory according to config: %s",
config_data["data_directory"],
)
os.makedirs(config_data["data_directory"])
os.makedirs(os.path.join(config_data["data_directory"], "images"))
if config_data["database"] is None:
config_data["database"] = "sqlite:///" + os.path.join(
config_data["data_directory"],
"flatisfy.db"
config_data["data_directory"], "flatisfy.db"
)
if config_data["search_index"] is None:
config_data["search_index"] = os.path.join(
config_data["data_directory"],
"search_index"
config_data["data_directory"], "search_index"
)
# Handle constraints filtering
if args and getattr(args, "constraints", None) is not None:
LOGGER.info(
("Filtering constraints from config according to CLI argument. "
"Using only the following constraints: %s."),
args.constraints.replace(",", ", ")
(
"Filtering constraints from config according to CLI argument. "
"Using only the following constraints: %s."
),
args.constraints.replace(",", ", "),
)
constraints_filter = args.constraints.split(",")
config_data["constraints"] = {
@ -338,8 +358,8 @@ def load_config(args=None, check_with_data=True):
# Sanitize website url
if config_data["website_url"] is not None:
if config_data["website_url"][-1] != '/':
config_data["website_url"] += '/'
if config_data["website_url"][-1] != "/":
config_data["website_url"] += "/"
config_validation = validate_config(config_data, check_with_data)
if config_validation is True:

View File

@ -16,7 +16,7 @@ BACKENDS_BY_PRECEDENCE = [
"pap",
"leboncoin",
"explorimmo",
"logicimmo"
"logicimmo",
]

View File

@ -24,11 +24,13 @@ except ImportError:
try:
from functools32 import lru_cache
except ImportError:
def lru_cache(maxsize=None): # pylint: disable=unused-argument
"""
Identity implementation of ``lru_cache`` for fallback.
"""
return lambda func: func
LOGGER.warning(
"`functools.lru_cache` is not available on your system. Consider "
"installing `functools32` Python module if using Python2 for "
@ -49,8 +51,8 @@ def preprocess_data(config, force=False):
get_session = database.init_db(config["database"], config["search_index"])
with get_session() as session:
is_built = (
session.query(PublicTransport).count() > 0 and
session.query(PostalCode).count() > 0
session.query(PublicTransport).count() > 0
and session.query(PostalCode).count() > 0
)
if is_built and not force:
# No need to rebuild the database, skip
@ -96,10 +98,7 @@ def load_data(model, constraint, config):
# Load data for each area
areas = list(set(areas))
for area in areas:
results.extend(
session.query(model)
.filter(model.area == area).all()
)
results.extend(session.query(model).filter(model.area == area).all())
# Expunge loaded data from the session to be able to use them
# afterwards
session.expunge_all()

View File

@ -24,8 +24,8 @@ MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
titlecase.set_small_word_list(
# Add French small words
r"l|d|un|une|et|à|a|sur|ou|le|la|de|lès|les|" +
titlecase.SMALL
r"l|d|un|une|et|à|a|sur|ou|le|la|de|lès|les|"
+ titlecase.SMALL
)
TRANSPORT_DATA_FILES = {
@ -33,7 +33,7 @@ TRANSPORT_DATA_FILES = {
"FR-NW": "stops_fr-nw.txt",
"FR-NE": "stops_fr-ne.txt",
"FR-SW": "stops_fr-sw.txt",
"FR-SE": "stops_fr-se.txt"
"FR-SE": "stops_fr-se.txt",
}
@ -51,8 +51,20 @@ def french_postal_codes_to_quarter(postal_code):
# French departements
# Taken from Wikipedia data.
department_to_subdivision = {
"FR-ARA": ["01", "03", "07", "15", "26", "38", "42", "43", "63", "69",
"73", "74"],
"FR-ARA": [
"01",
"03",
"07",
"15",
"26",
"38",
"42",
"43",
"63",
"69",
"73",
"74",
],
"FR-BFC": ["21", "25", "39", "58", "70", "71", "89", "90"],
"FR-BRE": ["22", "29", "35", "44", "56"],
"FR-CVL": ["18", "28", "36", "37", "41", "45"],
@ -61,19 +73,44 @@ def french_postal_codes_to_quarter(postal_code):
"FR-HDF": ["02", "59", "60", "62", "80"],
"FR-IDF": ["75", "77", "78", "91", "92", "93", "94", "95"],
"FR-NOR": ["14", "27", "50", "61", "76"],
"FR-NAQ": ["16", "17", "19", "23", "24", "33", "40", "47", "64", "79",
"86", "87"],
"FR-OCC": ["09", "11", "12", "30", "31", "32", "34", "46", "48", "65",
"66", "81", "82"],
"FR-NAQ": [
"16",
"17",
"19",
"23",
"24",
"33",
"40",
"47",
"64",
"79",
"86",
"87",
],
"FR-OCC": [
"09",
"11",
"12",
"30",
"31",
"32",
"34",
"46",
"48",
"65",
"66",
"81",
"82",
],
"FR-PDL": ["44", "49", "53", "72", "85"],
"FR-PAC": ["04", "05", "06", "13", "83", "84"]
"FR-PAC": ["04", "05", "06", "13", "83", "84"],
}
subdivision_to_quarters = {
'FR-IDF': ['FR-IDF'],
'FR-NW': ['FR-BRE', 'FR-CVL', 'FR-NOR', 'FR-PDL'],
'FR-NE': ['FR-BFC', 'FR-GES', 'FR-HDF'],
'FR-SE': ['FR-ARA', 'FR-COR', 'FR-PAC', 'FR-OCC'],
'FR-SW': ['FR-NAQ']
"FR-IDF": ["FR-IDF"],
"FR-NW": ["FR-BRE", "FR-CVL", "FR-NOR", "FR-PDL"],
"FR-NE": ["FR-BFC", "FR-GES", "FR-HDF"],
"FR-SE": ["FR-ARA", "FR-COR", "FR-PAC", "FR-OCC"],
"FR-SW": ["FR-NAQ"],
}
subdivision = next(
@ -82,7 +119,7 @@ def french_postal_codes_to_quarter(postal_code):
for i, departments in department_to_subdivision.items()
if departement in departments
),
None
None,
)
return next(
(
@ -90,7 +127,7 @@ def french_postal_codes_to_quarter(postal_code):
for i, subdivisions in subdivision_to_quarters.items()
if subdivision in subdivisions
),
None
None,
)
@ -106,9 +143,7 @@ def _preprocess_laposte():
raw_laposte_data = []
# Load opendata file
try:
with io.open(
os.path.join(MODULE_DIR, data_file), "r", encoding='utf-8'
) as fh:
with io.open(os.path.join(MODULE_DIR, data_file), "r", encoding="utf-8") as fh:
raw_laposte_data = json.load(fh)
except (IOError, ValueError):
LOGGER.error("Invalid raw LaPoste opendata file.")
@ -126,29 +161,31 @@ def _preprocess_laposte():
if area is None:
LOGGER.info(
"No matching area found for postal code %s, skipping it.",
fields["code_postal"]
fields["code_postal"],
)
continue
name = normalize_string(
titlecase.titlecase(fields["nom_de_la_commune"]),
lowercase=False
titlecase.titlecase(fields["nom_de_la_commune"]), lowercase=False
)
if (fields["code_postal"], name) in seen_postal_codes:
continue
seen_postal_codes.append((fields["code_postal"], name))
postal_codes_data.append(PostalCode(
area=area,
postal_code=fields["code_postal"],
name=name,
lat=fields["coordonnees_gps"][0],
lng=fields["coordonnees_gps"][1]
))
postal_codes_data.append(
PostalCode(
area=area,
postal_code=fields["code_postal"],
name=name,
lat=fields["coordonnees_gps"][0],
lng=fields["coordonnees_gps"][1],
)
)
except KeyError:
LOGGER.info("Missing data for postal code %s, skipping it.",
fields["code_postal"])
LOGGER.info(
"Missing data for postal code %s, skipping it.", fields["code_postal"]
)
return postal_codes_data
@ -164,17 +201,15 @@ def _preprocess_public_transport():
for area, data_file in TRANSPORT_DATA_FILES.items():
LOGGER.info("Building from public transport data %s.", data_file)
try:
with io.open(os.path.join(MODULE_DIR, data_file), "r",
encoding='utf-8') as fh:
with io.open(
os.path.join(MODULE_DIR, data_file), "r", encoding="utf-8"
) as fh:
filereader = csv.reader(fh)
next(filereader, None) # Skip first row (headers)
for row in filereader:
public_transport_data.append(PublicTransport(
name=row[2],
area=area,
lat=row[3],
lng=row[4]
))
public_transport_data.append(
PublicTransport(name=row[2], area=area, lat=row[3], lng=row[4])
)
except (IOError, IndexError):
LOGGER.error("Invalid raw opendata file: %s.", data_file)
return []
@ -183,7 +218,4 @@ def _preprocess_public_transport():
# List of all the available preprocessing functions. Order can be important.
PREPROCESSING_FUNCTIONS = [
_preprocess_laposte,
_preprocess_public_transport
]
PREPROCESSING_FUNCTIONS = [_preprocess_laposte, _preprocess_public_transport]

View File

@ -47,9 +47,7 @@ def init_db(database_uri=None, search_db_uri=None):
Session = sessionmaker(bind=engine) # pylint: disable=locally-disabled,invalid-name
if search_db_uri:
index_service = IndexService(
whoosh_base=search_db_uri
)
index_service = IndexService(whoosh_base=search_db_uri)
index_service.register_class(flatisfy.models.flat.Flat)
@contextmanager

View File

@ -50,4 +50,4 @@ class StringyJSON(types.TypeDecorator):
# TypeEngine.with_variant says "use StringyJSON instead when
# connecting to 'sqlite'"
# pylint: disable=locally-disabled,invalid-name
MagicJSON = types.JSON().with_variant(StringyJSON, 'sqlite')
MagicJSON = types.JSON().with_variant(StringyJSON, "sqlite")

View File

@ -30,7 +30,6 @@ from whoosh.qparser import MultifieldParser
class IndexService(object):
def __init__(self, config=None, whoosh_base=None):
if not whoosh_base and config:
whoosh_base = config.get("WHOOSH_BASE")
@ -84,8 +83,7 @@ class IndexService(object):
primary = field.name
continue
if field.name in model_class.__searchable__:
schema[field.name] = whoosh.fields.TEXT(
analyzer=StemmingAnalyzer())
schema[field.name] = whoosh.fields.TEXT(analyzer=StemmingAnalyzer())
return Schema(**schema), primary
def before_commit(self, session):
@ -93,21 +91,24 @@ class IndexService(object):
for model in session.new:
model_class = model.__class__
if hasattr(model_class, '__searchable__'):
if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append(
("new", model))
("new", model)
)
for model in session.deleted:
model_class = model.__class__
if hasattr(model_class, '__searchable__'):
if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append(
("deleted", model))
("deleted", model)
)
for model in session.dirty:
model_class = model.__class__
if hasattr(model_class, '__searchable__'):
if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append(
("changed", model))
("changed", model)
)
def after_commit(self, session):
"""
@ -129,11 +130,11 @@ class IndexService(object):
# update.
writer.delete_by_term(
primary_field, text_type(getattr(model, primary_field)))
primary_field, text_type(getattr(model, primary_field))
)
if change_type in ("new", "changed"):
attrs = dict((key, getattr(model, key))
for key in searchable)
attrs = dict((key, getattr(model, key)) for key in searchable)
attrs = {
attr: text_type(getattr(model, attr))
for attr in attrs.keys()
@ -158,8 +159,7 @@ class Searcher(object):
self.parser = MultifieldParser(list(fields), index.schema)
def __call__(self, session, query, limit=None):
results = self.index.searcher().search(
self.parser.parse(query), limit=limit)
results = self.index.searcher().search(self.parser.parse(query), limit=limit)
keys = [x[self.primary] for x in results]
primary_column = getattr(self.model_class, self.primary)

View File

@ -16,7 +16,9 @@ from email.utils import formatdate, make_msgid
LOGGER = logging.getLogger(__name__)
def send_email(server, port, subject, _from, _to, txt, html, username=None, password=None):
def send_email(
server, port, subject, _from, _to, txt, html, username=None, password=None
):
"""
Send an email
@ -36,15 +38,15 @@ def send_email(server, port, subject, _from, _to, txt, html, username=None, pass
if username or password:
server.login(username or "", password or "")
msg = MIMEMultipart('alternative')
msg['Subject'] = subject
msg['From'] = _from
msg['To'] = ', '.join(_to)
msg['Date'] = formatdate()
msg['Message-ID'] = make_msgid()
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = _from
msg["To"] = ", ".join(_to)
msg["Date"] = formatdate()
msg["Message-ID"] = make_msgid()
msg.attach(MIMEText(txt, 'plain', 'utf-8'))
msg.attach(MIMEText(html, 'html', 'utf-8'))
msg.attach(MIMEText(txt, "plain", "utf-8"))
msg.attach(MIMEText(html, "html", "utf-8"))
server.sendmail(_from, _to, msg.as_string())
server.quit()
@ -61,7 +63,7 @@ def send_notification(config, flats):
if not flats:
return
txt = u'Hello dear user,\n\nThe following new flats have been found:\n\n'
txt = "Hello dear user,\n\nThe following new flats have been found:\n\n"
html = """
<html>
<head></head>
@ -81,10 +83,8 @@ def send_notification(config, flats):
cost = str(flat.cost)
currency = str(flat.currency)
txt += (
'- {}: {}#/flat/{} (area: {}, cost: {} {})\n'.format(
title, website_url, flat_id, area, cost, currency
)
txt += "- {}: {}#/flat/{} (area: {}, cost: {} {})\n".format(
title, website_url, flat_id, area, cost, currency
)
html += """
@ -92,26 +92,28 @@ def send_notification(config, flats):
<a href="{}#/flat/{}">{}</a>
(area: {}, cost: {} {})
</li>
""".format(website_url, flat_id, title, area, cost, currency)
""".format(
website_url, flat_id, title, area, cost, currency
)
html += "</ul>"
signature = (
u"\nHope you'll find what you were looking for.\n\nBye!\nFlatisfy"
)
signature = "\nHope you'll find what you were looking for.\n\nBye!\nFlatisfy"
txt += signature
html += signature.replace('\n', '<br>')
html += signature.replace("\n", "<br>")
html += """</p>
</body>
</html>"""
send_email(config["smtp_server"],
config["smtp_port"],
"New flats found!",
config["smtp_from"],
config["smtp_to"],
txt,
html,
config.get("smtp_username"),
config.get("smtp_password"))
send_email(
config["smtp_server"],
config["smtp_port"],
"New flats found!",
config["smtp_from"],
config["smtp_to"],
txt,
html,
config.get("smtp_username"),
config.get("smtp_password"),
)

View File

@ -10,4 +10,5 @@ class DataBuildError(Exception):
"""
Error occurring on building a data file.
"""
pass

View File

@ -24,8 +24,9 @@ try:
from weboob.core.ouiboube import WebNip
from weboob.tools.json import WeboobEncoder
except ImportError:
LOGGER.error("Weboob is not available on your system. Make sure you "
"installed it.")
LOGGER.error(
"Weboob is not available on your system. Make sure you " "installed it."
)
raise
@ -34,6 +35,7 @@ class WebOOBProxy(object):
Wrapper around WebOOB ``WebNip`` class, to fetch housing posts without
having to spawn a subprocess.
"""
@staticmethod
def version():
"""
@ -78,12 +80,7 @@ class WebOOBProxy(object):
# Create backends
self.backends = [
self.webnip.load_backend(
module,
module,
params={}
)
for module in backends
self.webnip.load_backend(module, module, params={}) for module in backends
]
def __enter__(self):
@ -114,19 +111,15 @@ class WebOOBProxy(object):
except CallErrors as exc:
# If an error occured, just log it
LOGGER.error(
(
"An error occured while building query for "
"postal code %s: %s"
),
("An error occured while building query for " "postal code %s: %s"),
postal_code,
str(exc)
str(exc),
)
if not matching_cities:
# If postal code gave no match, warn the user
LOGGER.warn(
"Postal code %s could not be matched with a city.",
postal_code
"Postal code %s could not be matched with a city.", postal_code
)
# Remove "TOUTES COMMUNES" entry which are duplicates of the individual
@ -134,8 +127,9 @@ class WebOOBProxy(object):
matching_cities = [
city
for city in matching_cities
if not (city.backend == 'logicimmo' and
city.name.startswith('TOUTES COMMUNES'))
if not (
city.backend == "logicimmo" and city.name.startswith("TOUTES COMMUNES")
)
]
# Then, build queries by grouping cities by at most 3
@ -145,10 +139,7 @@ class WebOOBProxy(object):
try:
query.house_types = [
getattr(
HOUSE_TYPES,
house_type.upper()
)
getattr(HOUSE_TYPES, house_type.upper())
for house_type in constraints_dict["house_types"]
]
except AttributeError:
@ -156,10 +147,7 @@ class WebOOBProxy(object):
return None
try:
query.type = getattr(
POSTS_TYPES,
constraints_dict["type"].upper()
)
query.type = getattr(POSTS_TYPES, constraints_dict["type"].upper())
except AttributeError:
LOGGER.error("Invalid post type constraint.")
return None
@ -190,16 +178,15 @@ class WebOOBProxy(object):
# TODO: Handle max_entries better
try:
for housing in itertools.islice(
self.webnip.do(
'search_housings',
query,
# Only run the call on the required backends.
# Otherwise, WebOOB is doing weird stuff and returning
# nonsense.
backends=[x for x in self.backends
if x.name in useful_backends]
),
max_entries
self.webnip.do(
"search_housings",
query,
# Only run the call on the required backends.
# Otherwise, WebOOB is doing weird stuff and returning
# nonsense.
backends=[x for x in self.backends if x.name in useful_backends],
),
max_entries,
):
if not store_personal_data:
housing.phone = None
@ -207,8 +194,7 @@ class WebOOBProxy(object):
except CallErrors as exc:
# If an error occured, just log it
LOGGER.error(
"An error occured while fetching the housing posts: %s",
str(exc)
"An error occured while fetching the housing posts: %s", str(exc)
)
return housings
@ -225,9 +211,7 @@ class WebOOBProxy(object):
flat_id, backend_name = full_flat_id.rsplit("@", 1)
try:
backend = next(
backend
for backend in self.backends
if backend.name == backend_name
backend for backend in self.backends if backend.name == backend_name
)
except StopIteration:
LOGGER.error("Backend %s is not available.", backend_name)
@ -240,7 +224,7 @@ class WebOOBProxy(object):
housing.phone = None
else:
# Ensure phone is fetched
backend.fillobj(housing, 'phone')
backend.fillobj(housing, "phone")
# Otherwise, we miss the @backend afterwards
housing.id = full_flat_id
@ -248,9 +232,7 @@ class WebOOBProxy(object):
except Exception as exc: # pylint: disable=broad-except
# If an error occured, just log it
LOGGER.error(
"An error occured while fetching housing %s: %s",
full_flat_id,
str(exc)
"An error occured while fetching housing %s: %s", full_flat_id, str(exc)
)
return "{}"
@ -272,15 +254,17 @@ def fetch_flats(config):
housing_posts = []
for query in queries:
housing_posts.extend(
webOOB_proxy.query(query, config["max_entries"],
config["store_personal_data"])
webOOB_proxy.query(
query, config["max_entries"], config["store_personal_data"]
)
)
housing_posts = housing_posts[:config["max_entries"]]
housing_posts = housing_posts[: config["max_entries"]]
LOGGER.info("Fetched %d flats.", len(housing_posts))
constraint_flats_list = [json.loads(flat) for flat in housing_posts]
constraint_flats_list = [WebOOBProxy.restore_decimal_fields(flat)
for flat in constraint_flats_list]
constraint_flats_list = [
WebOOBProxy.restore_decimal_fields(flat) for flat in constraint_flats_list
]
fetched_flats[constraint_name] = constraint_flats_list
return fetched_flats
@ -295,8 +279,7 @@ def fetch_details(config, flat_id):
"""
with WebOOBProxy(config) as webOOB_proxy:
LOGGER.info("Loading additional details for flat %s.", flat_id)
webOOB_output = webOOB_proxy.info(flat_id,
config["store_personal_data"])
webOOB_output = webOOB_proxy.info(flat_id, config["store_personal_data"])
flat_details = json.loads(webOOB_output)
flat_details = WebOOBProxy.restore_decimal_fields(flat_details)
@ -327,10 +310,7 @@ def load_flats_from_file(json_file, config):
LOGGER.info("Found %d flats.", len(flats_list))
except (IOError, ValueError):
LOGGER.error("File %s is not a valid dump file.", json_file)
return {
constraint_name: flats_list
for constraint_name in config["constraints"]
}
return {constraint_name: flats_list for constraint_name in config["constraints"]}
def load_flats_from_db(config):

View File

@ -36,10 +36,7 @@ def refine_with_housing_criteria(flats_list, constraint):
for i, flat in enumerate(flats_list):
# Check postal code
postal_code = flat["flatisfy"].get("postal_code", None)
if (
postal_code and
postal_code not in constraint["postal_codes"]
):
if postal_code and postal_code not in constraint["postal_codes"]:
LOGGER.info("Postal code for flat %s is out of range.", flat["id"])
is_ok[i] = is_ok[i] and False
@ -47,37 +44,32 @@ def refine_with_housing_criteria(flats_list, constraint):
for place_name, time in flat["flatisfy"].get("time_to", {}).items():
time = time["time"]
is_within_interval = tools.is_within_interval(
time,
*(constraint["time_to"][place_name]["time"])
time, *(constraint["time_to"][place_name]["time"])
)
if not is_within_interval:
LOGGER.info("Flat %s is too far from place %s: %ds.",
flat["id"], place_name, time)
LOGGER.info(
"Flat %s is too far from place %s: %ds.",
flat["id"],
place_name,
time,
)
is_ok[i] = is_ok[i] and is_within_interval
# Check other fields
for field in ["area", "cost", "rooms", "bedrooms"]:
interval = constraint[field]
is_within_interval = tools.is_within_interval(
flat.get(field, None),
*interval
flat.get(field, None), *interval
)
if not is_within_interval:
LOGGER.info("%s for flat %s is out of range.",
field.capitalize(), flat["id"])
LOGGER.info(
"%s for flat %s is out of range.", field.capitalize(), flat["id"]
)
is_ok[i] = is_ok[i] and is_within_interval
return (
[
flat
for i, flat in enumerate(flats_list)
if is_ok[i]
],
[
flat
for i, flat in enumerate(flats_list)
if not is_ok[i]
]
[flat for i, flat in enumerate(flats_list) if is_ok[i]],
[flat for i, flat in enumerate(flats_list) if not is_ok[i]],
)
@ -104,47 +96,37 @@ def refine_with_details_criteria(flats_list, constraint):
for i, flat in enumerate(flats_list):
# Check number of pictures
has_enough_photos = tools.is_within_interval(
len(flat.get('photos', [])),
constraint['minimum_nb_photos'],
None
len(flat.get("photos", [])), constraint["minimum_nb_photos"], None
)
if not has_enough_photos:
LOGGER.info(
"Flat %s only has %d photos, it should have at least %d.",
flat["id"],
len(flat['photos']),
constraint['minimum_nb_photos']
len(flat["photos"]),
constraint["minimum_nb_photos"],
)
is_ok[i] = False
for term in constraint["description_should_contain"]:
if term.lower() not in flat['text'].lower():
if term.lower() not in flat["text"].lower():
LOGGER.info(
("Description for flat %s does not contain required term '%s'."),
flat["id"],
term
term,
)
is_ok[i] = False
for term in constraint["description_should_not_contain"]:
if term.lower() in flat['text'].lower():
if term.lower() in flat["text"].lower():
LOGGER.info(
("Description for flat %s contains blacklisted term '%s'."),
flat["id"],
term
term,
)
is_ok[i] = False
return (
[
flat
for i, flat in enumerate(flats_list)
if is_ok[i]
],
[
flat
for i, flat in enumerate(flats_list)
if not is_ok[i]
]
[flat for i, flat in enumerate(flats_list) if is_ok[i]],
[flat for i, flat in enumerate(flats_list) if not is_ok[i]],
)
@ -185,14 +167,10 @@ def first_pass(flats_list, constraint, config):
flats_list = metadata.guess_stations(flats_list, constraint, config)
# Remove returned housing posts that do not match criteria
flats_list, ignored_list = refine_with_housing_criteria(flats_list,
constraint)
flats_list, ignored_list = refine_with_housing_criteria(flats_list, constraint)
return {"new": flats_list, "ignored": ignored_list, "duplicate": duplicates_by_urls}
return {
"new": flats_list,
"ignored": ignored_list,
"duplicate": duplicates_by_urls
}
@tools.timeit
def second_pass(flats_list, constraint, config):
@ -226,22 +204,17 @@ def second_pass(flats_list, constraint, config):
flats_list = metadata.compute_travel_times(flats_list, constraint, config)
# Remove returned housing posts that do not match criteria
flats_list, ignored_list = refine_with_housing_criteria(flats_list,
constraint)
flats_list, ignored_list = refine_with_housing_criteria(flats_list, constraint)
# Remove returned housing posts which do not match criteria relying on
# fetched details.
flats_list, ignored_list = refine_with_details_criteria(flats_list,
constraint)
flats_list, ignored_list = refine_with_details_criteria(flats_list, constraint)
if config["serve_images_locally"]:
images.download_images(flats_list, config)
return {
"new": flats_list,
"ignored": ignored_list,
"duplicate": []
}
return {"new": flats_list, "ignored": ignored_list, "duplicate": []}
@tools.timeit
def third_pass(flats_list, config):
@ -260,8 +233,4 @@ def third_pass(flats_list, config):
# Deduplicate the list using every available data
flats_list, duplicate_flats = duplicates.deep_detect(flats_list, config)
return {
"new": flats_list,
"ignored": [],
"duplicate": duplicate_flats
}
return {"new": flats_list, "ignored": [], "duplicate": duplicate_flats}

View File

@ -16,10 +16,12 @@ import PIL.Image
LOGGER = logging.getLogger(__name__)
class MemoryCache(object):
"""
A cache in memory.
"""
@staticmethod
def on_miss(key):
"""
@ -87,6 +89,7 @@ class ImageCache(MemoryCache):
"""
A cache for images, stored in memory.
"""
@staticmethod
def compute_filename(url):
"""
@ -113,10 +116,7 @@ class ImageCache(MemoryCache):
filepath = None
# Try to load from local folder
if self.storage_dir:
filepath = os.path.join(
self.storage_dir,
self.compute_filename(url)
)
filepath = os.path.join(self.storage_dir, self.compute_filename(url))
if os.path.isfile(filepath):
return PIL.Image.open(filepath)
# Otherwise, fetch it

View File

@ -35,14 +35,14 @@ def homogeneize_phone_number(numbers):
clean_numbers = []
for number in numbers.split(','):
for number in numbers.split(","):
number = number.strip()
number = number.replace(".", "")
number = number.replace(" ", "")
number = number.replace("-", "")
number = number.replace("(", "")
number = number.replace(")", "")
number = re.sub(r'^\+\d\d', "", number)
number = re.sub(r"^\+\d\d", "", number)
if not number.startswith("0"):
number = "0" + number
@ -94,12 +94,7 @@ def compare_photos(photo1, photo2, photo_cache, hash_threshold):
return False
def find_number_common_photos(
flat1_photos,
flat2_photos,
photo_cache,
hash_threshold
):
def find_number_common_photos(flat1_photos, flat2_photos, photo_cache, hash_threshold):
"""
Compute the number of common photos between the two lists of photos for the
flats.
@ -174,22 +169,23 @@ def detect(flats_list, key="id", merge=True, should_intersect=False):
# Sort matching flats by backend precedence
matching_flats.sort(
key=lambda flat: next(
i for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE)
i
for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE)
if flat["id"].endswith(backend)
),
reverse=True
reverse=True,
)
if len(matching_flats) > 1:
LOGGER.info("Found duplicates using key \"%s\": %s.",
key,
[flat["id"] for flat in matching_flats])
LOGGER.info(
'Found duplicates using key "%s": %s.',
key,
[flat["id"] for flat in matching_flats],
)
# Otherwise, check the policy
if merge:
# If a merge is requested, do the merge
unique_flats_list.append(
tools.merge_dicts(*matching_flats)
)
unique_flats_list.append(tools.merge_dicts(*matching_flats))
else:
# Otherwise, just keep the most important of them
unique_flats_list.append(matching_flats[-1])
@ -203,8 +199,9 @@ def detect(flats_list, key="id", merge=True, should_intersect=False):
if should_intersect:
# We added some flats twice with the above method, let's deduplicate on
# id.
unique_flats_list, _ = detect(unique_flats_list, key="id", merge=True,
should_intersect=False)
unique_flats_list, _ = detect(
unique_flats_list, key="id", merge=True, should_intersect=False
)
return unique_flats_list, duplicate_flats
@ -250,14 +247,12 @@ def get_duplicate_score(flat1, flat2, photo_cache, hash_threshold):
# They should have the same postal code, if available
if (
"flatisfy" in flat1 and "flatisfy" in flat2 and
flat1["flatisfy"].get("postal_code", None) and
flat2["flatisfy"].get("postal_code", None)
"flatisfy" in flat1
and "flatisfy" in flat2
and flat1["flatisfy"].get("postal_code", None)
and flat2["flatisfy"].get("postal_code", None)
):
assert (
flat1["flatisfy"]["postal_code"] ==
flat2["flatisfy"]["postal_code"]
)
assert flat1["flatisfy"]["postal_code"] == flat2["flatisfy"]["postal_code"]
n_common_items += 1
# TODO: Better text comparison (one included in the other, fuzzymatch)
@ -282,25 +277,17 @@ def get_duplicate_score(flat1, flat2, photo_cache, hash_threshold):
both_are_from_same_backend = (
flat1["id"].split("@")[-1] == flat2["id"].split("@")[-1]
)
both_have_float_part = (
(flat1["area"] % 1) > 0 and (flat2["area"] % 1) > 0
)
both_have_equal_float_part = (
(flat1["area"] % 1) == (flat2["area"] % 1)
)
both_have_float_part = (flat1["area"] % 1) > 0 and (flat2["area"] % 1) > 0
both_have_equal_float_part = (flat1["area"] % 1) == (flat2["area"] % 1)
if both_have_float_part and both_are_from_same_backend:
assert both_have_equal_float_part
if flat1.get("photos", []) and flat2.get("photos", []):
n_common_photos = find_number_common_photos(
flat1["photos"],
flat2["photos"],
photo_cache,
hash_threshold
flat1["photos"], flat2["photos"], photo_cache, hash_threshold
)
min_number_photos = min(len(flat1["photos"]),
len(flat2["photos"]))
min_number_photos = min(len(flat1["photos"]), len(flat2["photos"]))
# Either all the photos are the same, or there are at least
# three common photos.
@ -332,9 +319,7 @@ def deep_detect(flats_list, config):
storage_dir = os.path.join(config["data_directory"], "images")
else:
storage_dir = None
photo_cache = ImageCache(
storage_dir=storage_dir
)
photo_cache = ImageCache(storage_dir=storage_dir)
LOGGER.info("Running deep duplicates detection.")
matching_flats = collections.defaultdict(list)
@ -348,29 +333,30 @@ def deep_detect(flats_list, config):
continue
n_common_items = get_duplicate_score(
flat1,
flat2,
photo_cache,
config["duplicate_image_hash_threshold"]
flat1, flat2, photo_cache, config["duplicate_image_hash_threshold"]
)
# Minimal score to consider they are duplicates
if n_common_items >= config["duplicate_threshold"]:
# Mark flats as duplicates
LOGGER.info(
("Found duplicates using deep detection: (%s, %s). "
"Score is %d."),
(
"Found duplicates using deep detection: (%s, %s). "
"Score is %d."
),
flat1["id"],
flat2["id"],
n_common_items
n_common_items,
)
matching_flats[flat1["id"]].append(flat2["id"])
matching_flats[flat2["id"]].append(flat1["id"])
if photo_cache.total():
LOGGER.debug("Photo cache: hits: %d%% / misses: %d%%.",
photo_cache.hit_rate(),
photo_cache.miss_rate())
LOGGER.debug(
"Photo cache: hits: %d%% / misses: %d%%.",
photo_cache.hit_rate(),
photo_cache.miss_rate(),
)
seen_ids = []
duplicate_flats = []
@ -381,16 +367,13 @@ def deep_detect(flats_list, config):
seen_ids.extend(matching_flats[flat_id])
to_merge = sorted(
[
flat
for flat in flats_list
if flat["id"] in matching_flats[flat_id]
],
[flat for flat in flats_list if flat["id"] in matching_flats[flat_id]],
key=lambda flat: next(
i for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE)
i
for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE)
if flat["id"].endswith(backend)
),
reverse=True
reverse=True,
)
unique_flats_list.append(tools.merge_dicts(*to_merge))
# The ID of the added merged flat will be the one of the last item

View File

@ -29,7 +29,9 @@ def download_images(flats_list, config):
for i, flat in enumerate(flats_list):
LOGGER.info(
"Downloading photos for flat %d/%d: %s.",
i + 1, flats_list_length, flat["id"]
i + 1,
flats_list_length,
flat["id"],
)
for photo in flat["photos"]:
# Download photo

View File

@ -103,7 +103,7 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
if choice in normalized_query
],
key=lambda x: x[1],
reverse=True
reverse=True,
)
if limit:
matches = matches[:limit]
@ -111,10 +111,7 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
# Update confidence
if matches:
max_confidence = max(match[1] for match in matches)
matches = [
(x[0], int(x[1] / max_confidence * 100))
for x in matches
]
matches = [(x[0], int(x[1] / max_confidence * 100)) for x in matches]
# Convert back matches to original strings
# Also filter out matches below threshold
@ -126,32 +123,27 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
return matches
def guess_location_position(location, cities, constraint):
# try to find a city
# Find all fuzzy-matching cities
postal_code = None
position = None
matched_cities = fuzzy_match(
location,
[x.name for x in cities],
limit=None
)
matched_cities = fuzzy_match(location, [x.name for x in cities], limit=None)
if matched_cities:
# Find associated postal codes
matched_postal_codes = []
for matched_city_name, _ in matched_cities:
postal_code_objects_for_city = [
x for x in cities
if x.name == matched_city_name
x for x in cities if x.name == matched_city_name
]
matched_postal_codes.extend(
pc.postal_code
for pc in postal_code_objects_for_city
pc.postal_code for pc in postal_code_objects_for_city
)
# Try to match them with postal codes in config constraint
matched_postal_codes_in_config = (
set(matched_postal_codes) & set(constraint["postal_codes"])
matched_postal_codes_in_config = set(matched_postal_codes) & set(
constraint["postal_codes"]
)
if matched_postal_codes_in_config:
# If there are some matched postal codes which are also in
@ -166,14 +158,17 @@ def guess_location_position(location, cities, constraint):
# take the city position
for matched_city_name, _ in matched_cities:
postal_code_objects_for_city = [
x for x in cities
x
for x in cities
if x.name == matched_city_name and x.postal_code == postal_code
]
if len(postal_code_objects_for_city):
position = {"lat": postal_code_objects_for_city[0].lat, "lng": postal_code_objects_for_city[0].lng}
position = {
"lat": postal_code_objects_for_city[0].lat,
"lng": postal_code_objects_for_city[0].lng,
}
LOGGER.debug(
("Found position %s using city %s."),
position, matched_city_name
("Found position %s using city %s."), position, matched_city_name
)
break
@ -194,25 +189,20 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
:return: An updated list of flats dict with guessed postal code.
"""
opendata = {
"postal_codes": data.load_data(PostalCode, constraint, config)
}
opendata = {"postal_codes": data.load_data(PostalCode, constraint, config)}
for flat in flats_list:
location = flat.get("location", None)
if not location:
addr = flat.get("address", None)
if addr:
location = addr['full_address']
location = addr["full_address"]
if not location:
# Skip everything if empty location
LOGGER.info(
(
"No location field for flat %s, skipping postal "
"code lookup. (%s)"
),
("No location field for flat %s, skipping postal " "code lookup. (%s)"),
flat["id"],
flat.get("address")
flat.get("address"),
)
continue
@ -230,17 +220,22 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
LOGGER.debug(
"Found postal code in location field for flat %s: %s.",
flat["id"], postal_code
flat["id"],
postal_code,
)
except AssertionError:
postal_code = None
# Then fetch position (and postal_code is couldn't be found earlier)
if postal_code:
cities = [x for x in opendata["postal_codes"] if x.postal_code == postal_code]
cities = [
x for x in opendata["postal_codes"] if x.postal_code == postal_code
]
(_, position) = guess_location_position(location, cities, constraint)
else:
(postal_code, position) = guess_location_position(location, opendata["postal_codes"], constraint)
(postal_code, position) = guess_location_position(
location, opendata["postal_codes"], constraint
)
# Check that postal code is not too far from the ones listed in config,
# limit bad fuzzy matching
@ -256,17 +251,19 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
(x.lat, x.lng)
for x in opendata["postal_codes"]
if x.postal_code == constraint_postal_code
)
),
)
for constraint_postal_code in constraint["postal_codes"]
)
if distance > distance_threshold:
LOGGER.info(
("Postal code %s found for flat %s @ %s is off-constraints "
"(distance is %dm > %dm). Let's consider it is an "
"artifact match and keep the post without this postal "
"code."),
(
"Postal code %s found for flat %s @ %s is off-constraints "
"(distance is %dm > %dm). Let's consider it is an "
"artifact match and keep the post without this postal "
"code."
),
postal_code,
flat["id"],
location,
@ -282,7 +279,9 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
if existing_postal_code and existing_postal_code != postal_code:
LOGGER.warning(
"Replacing previous postal code %s by %s for flat %s.",
existing_postal_code, postal_code, flat["id"]
existing_postal_code,
postal_code,
flat["id"],
)
flat["flatisfy"]["postal_code"] = postal_code
else:
@ -304,10 +303,10 @@ def guess_stations(flats_list, constraint, config):
:return: An updated list of flats dict with guessed nearby stations.
"""
distance_threshold = config['max_distance_housing_station']
distance_threshold = config["max_distance_housing_station"]
opendata = {
"postal_codes": data.load_data(PostalCode, constraint, config),
"stations": data.load_data(PublicTransport, constraint, config)
"stations": data.load_data(PublicTransport, constraint, config),
}
for flat in flats_list:
@ -316,13 +315,12 @@ def guess_stations(flats_list, constraint, config):
if not flat_station:
# Skip everything if empty station
LOGGER.info(
"No stations field for flat %s, skipping stations lookup.",
flat["id"]
"No stations field for flat %s, skipping stations lookup.", flat["id"]
)
continue
# Weboob modules can return several stations in a comma-separated list.
flat_stations = flat_station.split(',')
flat_stations = flat_station.split(",")
# But some stations containing a comma exist, so let's add the initial
# value to the list of stations to check if there was one.
if len(flat_stations) > 1:
@ -334,7 +332,7 @@ def guess_stations(flats_list, constraint, config):
tentative_station,
[x.name for x in opendata["stations"]],
limit=10,
threshold=50
threshold=50,
)
# Keep only one occurrence of each station
@ -361,32 +359,34 @@ def guess_stations(flats_list, constraint, config):
]
for station_data in stations_objects:
distance = tools.distance(
(station_data.lat, station_data.lng),
postal_code_gps
(station_data.lat, station_data.lng), postal_code_gps
)
if distance < distance_threshold:
# If at least one of the coordinates for a given
# station is close enough, that's ok and we can add
# the station
good_matched_stations.append({
"key": station[0],
"name": station_data.name,
"confidence": station[1],
"gps": (station_data.lat, station_data.lng)
})
good_matched_stations.append(
{
"key": station[0],
"name": station_data.name,
"confidence": station[1],
"gps": (station_data.lat, station_data.lng),
}
)
break
LOGGER.info(
("Station %s is too far from flat %s (%dm > %dm), "
"discarding this station."),
(
"Station %s is too far from flat %s (%dm > %dm), "
"discarding this station."
),
station[0],
flat["id"],
int(distance),
int(distance_threshold)
int(distance_threshold),
)
else:
LOGGER.info(
"No postal code for flat %s, skipping stations detection.",
flat["id"]
"No postal code for flat %s, skipping stations detection.", flat["id"]
)
if not good_matched_stations:
@ -394,7 +394,7 @@ def guess_stations(flats_list, constraint, config):
LOGGER.info(
"No stations found for flat %s, matching %s.",
flat["id"],
flat["station"]
flat["station"],
)
continue
@ -402,29 +402,20 @@ def guess_stations(flats_list, constraint, config):
"Found stations for flat %s: %s (matching %s).",
flat["id"],
", ".join(x["name"] for x in good_matched_stations),
flat["station"]
flat["station"],
)
# If some stations were already filled in and the result is different,
# display some warning to the user
if (
"matched_stations" in flat["flatisfy"] and
(
# Do a set comparison, as ordering is not important
set([
station["name"]
for station in flat["flatisfy"]["matched_stations"]
]) !=
set([
station["name"]
for station in good_matched_stations
])
)
if "matched_stations" in flat["flatisfy"] and (
# Do a set comparison, as ordering is not important
set([station["name"] for station in flat["flatisfy"]["matched_stations"]])
!= set([station["name"] for station in good_matched_stations])
):
LOGGER.warning(
"Replacing previously fetched stations for flat %s. Found "
"stations differ from the previously found ones.",
flat["id"]
flat["id"],
)
flat["flatisfy"]["matched_stations"] = good_matched_stations
@ -449,9 +440,8 @@ def compute_travel_times(flats_list, constraint, config):
if not flat["flatisfy"].get("matched_stations", []):
# Skip any flat without matched stations
LOGGER.info(
"Skipping travel time computation for flat %s. No matched "
"stations.",
flat["id"]
"Skipping travel time computation for flat %s. No matched " "stations.",
flat["id"],
)
continue
@ -467,15 +457,11 @@ def compute_travel_times(flats_list, constraint, config):
for station in flat["flatisfy"]["matched_stations"]:
# Time from station is a dict with time and route
time_from_station_dict = tools.get_travel_time_between(
station["gps"],
place["gps"],
TimeToModes[mode],
config
station["gps"], place["gps"], TimeToModes[mode], config
)
if (
time_from_station_dict and
(time_from_station_dict["time"] < time_to_place_dict or
time_to_place_dict is None)
if time_from_station_dict and (
time_from_station_dict["time"] < time_to_place_dict
or time_to_place_dict is None
):
# If starting from this station makes the route to the
# specified place shorter, update
@ -484,7 +470,10 @@ def compute_travel_times(flats_list, constraint, config):
if time_to_place_dict:
LOGGER.info(
"Travel time between %s and flat %s by %s is %ds.",
place_name, flat["id"], mode, time_to_place_dict["time"]
place_name,
flat["id"],
mode,
time_to_place_dict["time"],
)
flat["flatisfy"]["time_to"][place_name] = time_to_place_dict
return flats_list

View File

@ -11,7 +11,15 @@ import enum
import arrow
from sqlalchemy import (
Boolean, Column, DateTime, Enum, Float, SmallInteger, String, Text, inspect
Boolean,
Column,
DateTime,
Enum,
Float,
SmallInteger,
String,
Text,
inspect,
)
from sqlalchemy.orm import validates
@ -26,6 +34,7 @@ class FlatUtilities(enum.Enum):
"""
An enum of the possible utilities status for a flat entry.
"""
included = 10
unknown = 0
excluded = -10
@ -35,6 +44,7 @@ class FlatStatus(enum.Enum):
"""
An enum of the possible status for a flat entry.
"""
user_deleted = -100
duplicate = -20
ignored = -10
@ -47,21 +57,16 @@ class FlatStatus(enum.Enum):
# List of statuses that are automatically handled, and which the user cannot
# manually set through the UI.
AUTOMATED_STATUSES = [
FlatStatus.new,
FlatStatus.duplicate,
FlatStatus.ignored
]
AUTOMATED_STATUSES = [FlatStatus.new, FlatStatus.duplicate, FlatStatus.ignored]
class Flat(BASE):
"""
SQLAlchemy ORM model to store a flat.
"""
__tablename__ = "flats"
__searchable__ = [
"title", "text", "station", "location", "details", "notes"
]
__searchable__ = ["title", "text", "station", "location", "details", "notes"]
# Weboob data
id = Column(String, primary_key=True)
@ -99,7 +104,7 @@ class Flat(BASE):
# Date for visit
visit_date = Column(DateTime)
@validates('utilities')
@validates("utilities")
def validate_utilities(self, _, utilities):
"""
Utilities validation method
@ -124,8 +129,7 @@ class Flat(BASE):
try:
return getattr(FlatStatus, status)
except (AttributeError, TypeError):
LOGGER.warn("Unkown flat status %s, ignoring it.",
status)
LOGGER.warn("Unkown flat status %s, ignoring it.", status)
return self.status.default.arg
@validates("notation")
@ -137,7 +141,7 @@ class Flat(BASE):
notation = int(notation)
assert notation >= 0 and notation <= 5
except (ValueError, AssertionError):
raise ValueError('notation should be an integer between 0 and 5')
raise ValueError("notation should be an integer between 0 and 5")
return notation
@validates("date")
@ -178,25 +182,22 @@ class Flat(BASE):
# Handle flatisfy metadata
flat_dict = flat_dict.copy()
if "flatisfy" in flat_dict:
flat_dict["flatisfy_stations"] = (
flat_dict["flatisfy"].get("matched_stations", [])
flat_dict["flatisfy_stations"] = flat_dict["flatisfy"].get(
"matched_stations", []
)
flat_dict["flatisfy_postal_code"] = (
flat_dict["flatisfy"].get("postal_code", None)
flat_dict["flatisfy_postal_code"] = flat_dict["flatisfy"].get(
"postal_code", None
)
flat_dict["flatisfy_position"] = (
flat_dict["flatisfy"].get("position", None)
)
flat_dict["flatisfy_time_to"] = (
flat_dict["flatisfy"].get("time_to", {})
)
flat_dict["flatisfy_constraint"] = (
flat_dict["flatisfy"].get("constraint", "default")
flat_dict["flatisfy_position"] = flat_dict["flatisfy"].get("position", None)
flat_dict["flatisfy_time_to"] = flat_dict["flatisfy"].get("time_to", {})
flat_dict["flatisfy_constraint"] = flat_dict["flatisfy"].get(
"constraint", "default"
)
del flat_dict["flatisfy"]
flat_dict = {k: v for k, v in flat_dict.items()
if k in inspect(Flat).columns.keys()}
flat_dict = {
k: v for k, v in flat_dict.items() if k in inspect(Flat).columns.keys()
}
return Flat(**flat_dict)
def __repr__(self):
@ -207,11 +208,7 @@ class Flat(BASE):
Return a dict representation of this flat object that is JSON
serializable.
"""
flat_repr = {
k: v
for k, v in self.__dict__.items()
if not k.startswith("_")
}
flat_repr = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
if isinstance(flat_repr["status"], FlatStatus):
flat_repr["status"] = flat_repr["status"].name
if isinstance(flat_repr["utilities"], FlatUtilities):

View File

@ -7,9 +7,7 @@ from __future__ import absolute_import, print_function, unicode_literals
import logging
from sqlalchemy import (
Column, Float, Integer, String, UniqueConstraint
)
from sqlalchemy import Column, Float, Integer, String, UniqueConstraint
from flatisfy.database.base import BASE
@ -21,6 +19,7 @@ class PostalCode(BASE):
"""
SQLAlchemy ORM model to store a postal code opendata.
"""
__tablename__ = "postal_codes"
id = Column(Integer, primary_key=True)
@ -41,8 +40,4 @@ class PostalCode(BASE):
Return a dict representation of this postal code object that is JSON
serializable.
"""
return {
k: v
for k, v in self.__dict__.items()
if not k.startswith("_")
}
return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}

View File

@ -7,9 +7,7 @@ from __future__ import absolute_import, print_function, unicode_literals
import logging
from sqlalchemy import (
Column, Float, Integer, String
)
from sqlalchemy import Column, Float, Integer, String
from flatisfy.database.base import BASE
@ -21,6 +19,7 @@ class PublicTransport(BASE):
"""
SQLAlchemy ORM model to store public transport opendata.
"""
__tablename__ = "public_transports"
id = Column(Integer, primary_key=True)

View File

@ -30,6 +30,7 @@ class LocalImageCache(ImageCache):
"""
A local cache for images, stored in memory.
"""
@staticmethod
def on_miss(path):
"""
@ -46,48 +47,36 @@ class TestTexts(unittest.TestCase):
"""
Checks string normalizations.
"""
def test_roman_numbers(self):
"""
Checks roman numbers replacement.
"""
self.assertEqual(
"XIV",
tools.convert_arabic_to_roman("14")
)
self.assertEqual("XIV", tools.convert_arabic_to_roman("14"))
self.assertEqual(
"XXXIX",
tools.convert_arabic_to_roman("39")
)
self.assertEqual("XXXIX", tools.convert_arabic_to_roman("39"))
self.assertEqual(
"40",
tools.convert_arabic_to_roman("40")
)
self.assertEqual("40", tools.convert_arabic_to_roman("40"))
self.assertEqual(
"1987",
tools.convert_arabic_to_roman("1987")
)
self.assertEqual("1987", tools.convert_arabic_to_roman("1987"))
self.assertEqual(
"Dans le XVe arrondissement",
tools.convert_arabic_to_roman_in_text("Dans le 15e arrondissement")
tools.convert_arabic_to_roman_in_text("Dans le 15e arrondissement"),
)
self.assertEqual(
"XXeme arr.",
tools.convert_arabic_to_roman_in_text("20eme arr.")
"XXeme arr.", tools.convert_arabic_to_roman_in_text("20eme arr.")
)
self.assertEqual(
"A AIX EN PROVENCE",
tools.convert_arabic_to_roman_in_text("A AIX EN PROVENCE")
tools.convert_arabic_to_roman_in_text("A AIX EN PROVENCE"),
)
self.assertEqual(
"Montigny Le Bretonneux",
tools.convert_arabic_to_roman_in_text("Montigny Le Bretonneux")
tools.convert_arabic_to_roman_in_text("Montigny Le Bretonneux"),
)
def test_roman_numbers_in_text(self):
@ -97,58 +86,43 @@ class TestTexts(unittest.TestCase):
"""
self.assertEqual(
"dans le XVe arrondissement",
tools.normalize_string("Dans le 15e arrondissement")
tools.normalize_string("Dans le 15e arrondissement"),
)
self.assertEqual(
"paris XVe, 75005",
tools.normalize_string("Paris 15e, 75005")
)
self.assertEqual("paris XVe, 75005", tools.normalize_string("Paris 15e, 75005"))
self.assertEqual(
"paris xve, 75005",
tools.normalize_string("Paris XVe, 75005")
)
self.assertEqual("paris xve, 75005", tools.normalize_string("Paris XVe, 75005"))
def test_multiple_whitespaces(self):
"""
Checks whitespaces are collapsed.
"""
self.assertEqual(
"avec ascenseur",
tools.normalize_string("avec ascenseur")
)
self.assertEqual("avec ascenseur", tools.normalize_string("avec ascenseur"))
def test_whitespace_trim(self):
"""
Checks that trailing and beginning whitespaces are trimmed.
"""
self.assertEqual(
"rennes 35000",
tools.normalize_string(" Rennes 35000 ")
)
self.assertEqual("rennes 35000", tools.normalize_string(" Rennes 35000 "))
def test_accents(self):
"""
Checks accents are replaced.
"""
self.assertEqual(
"eeeaui",
tools.normalize_string(u"éèêàüï")
)
self.assertEqual("eeeaui", tools.normalize_string(u"éèêàüï"))
class TestPhoneNumbers(unittest.TestCase):
"""
Checks phone numbers normalizations.
"""
def test_prefix(self):
"""
Checks phone numbers with international prefixes.
"""
self.assertEqual(
"0605040302",
duplicates.homogeneize_phone_number("+33605040302")
"0605040302", duplicates.homogeneize_phone_number("+33605040302")
)
def test_dots_separators(self):
@ -156,8 +130,7 @@ class TestPhoneNumbers(unittest.TestCase):
Checks phone numbers with dots.
"""
self.assertEqual(
"0605040302",
duplicates.homogeneize_phone_number("06.05.04.03.02")
"0605040302", duplicates.homogeneize_phone_number("06.05.04.03.02")
)
def test_spaces_separators(self):
@ -165,8 +138,7 @@ class TestPhoneNumbers(unittest.TestCase):
Checks phone numbers with spaces.
"""
self.assertEqual(
"0605040302",
duplicates.homogeneize_phone_number("06 05 04 03 02")
"0605040302", duplicates.homogeneize_phone_number("06 05 04 03 02")
)
@ -183,92 +155,106 @@ class TestPhotos(unittest.TestCase):
"""
Compares a photo against itself.
"""
photo = {
"url": TESTS_DATA_DIR + "127028739@seloger.jpg"
}
photo = {"url": TESTS_DATA_DIR + "127028739@seloger.jpg"}
self.assertTrue(duplicates.compare_photos(
photo,
photo,
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
self.assertTrue(
duplicates.compare_photos(
photo, photo, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
)
def test_different_photos(self):
"""
Compares two different photos.
"""
self.assertFalse(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
self.assertFalse(
duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD,
)
)
self.assertFalse(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-3@seloger.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
self.assertFalse(
duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-3@seloger.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD,
)
)
def test_matching_photos(self):
"""
Compares two matching photos with different size and source.
"""
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129@explorimmo.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
self.assertTrue(
duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129@explorimmo.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD,
)
)
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129-2@explorimmo.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
self.assertTrue(
duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129-2@explorimmo.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD,
)
)
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739-3@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129-3@explorimmo.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
self.assertTrue(
duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739-3@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129-3@explorimmo.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD,
)
)
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-watermark@seloger.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD
))
self.assertTrue(
duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-watermark@seloger.jpg"},
self.IMAGE_CACHE,
self.HASH_THRESHOLD,
)
)
def test_matching_cropped_photos(self):
"""
Compares two matching photos with one being cropped.
"""
# Fixme: the image hash treshold should be 10 ideally
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "vertical.jpg"},
{"url": TESTS_DATA_DIR + "vertical-cropped.jpg"},
self.IMAGE_CACHE,
20
))
self.assertTrue(
duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "vertical.jpg"},
{"url": TESTS_DATA_DIR + "vertical-cropped.jpg"},
self.IMAGE_CACHE,
20,
)
)
# Fixme: the image hash treshold should be 10 ideally
self.assertTrue(duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "13783671@explorimmo.jpg"},
{"url": TESTS_DATA_DIR + "124910113@seloger.jpg"},
self.IMAGE_CACHE,
20
))
self.assertTrue(
duplicates.compare_photos(
{"url": TESTS_DATA_DIR + "13783671@explorimmo.jpg"},
{"url": TESTS_DATA_DIR + "124910113@seloger.jpg"},
self.IMAGE_CACHE,
20,
)
)
class TestImageCache(unittest.TestCase):
"""
Checks image cache is working as expected.
"""
def __init__(self, *args, **kwargs):
self.IMAGE_CACHE = ImageCache( # pylint: disable=invalid-name
storage_dir=tempfile.mkdtemp(prefix="flatisfy-")
@ -280,27 +266,22 @@ class TestImageCache(unittest.TestCase):
Check that it returns nothing on an invalid URL.
"""
# See https://framagit.org/phyks/Flatisfy/issues/116.
self.assertIsNone(
self.IMAGE_CACHE.get("https://httpbin.org/status/404")
)
self.assertIsNone(
self.IMAGE_CACHE.get("https://httpbin.org/status/500")
)
self.assertIsNone(self.IMAGE_CACHE.get("https://httpbin.org/status/404"))
self.assertIsNone(self.IMAGE_CACHE.get("https://httpbin.org/status/500"))
def test_invalid_data(self):
"""
Check that it returns nothing on an invalid data.
"""
# See https://framagit.org/phyks/Flatisfy/issues/116.
self.assertIsNone(
self.IMAGE_CACHE.get("https://httpbin.org/")
)
self.assertIsNone(self.IMAGE_CACHE.get("https://httpbin.org/"))
class TestDuplicates(unittest.TestCase):
"""
Checks duplicates detection.
"""
DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 8 # pylint: disable=invalid-name
DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15 # pylint: disable=invalid-name
HASH_THRESHOLD = 10 # pylint: disable=invalid-name
@ -326,7 +307,7 @@ class TestDuplicates(unittest.TestCase):
"utilities": "",
"area": random.randint(200, 1500) / 10,
"cost": random.randint(100000, 300000),
"bedrooms": random.randint(1, 4)
"bedrooms": random.randint(1, 4),
}
@staticmethod
@ -351,8 +332,7 @@ class TestDuplicates(unittest.TestCase):
flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1)
score = duplicates.get_duplicate_score(
flat1, flat2,
self.IMAGE_CACHE, self.HASH_THRESHOLD
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -365,8 +345,7 @@ class TestDuplicates(unittest.TestCase):
flat2["cost"] += 1000
score = duplicates.get_duplicate_score(
flat1, flat2,
self.IMAGE_CACHE, self.HASH_THRESHOLD
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -380,8 +359,7 @@ class TestDuplicates(unittest.TestCase):
flat2["rooms"] += 1
score = duplicates.get_duplicate_score(
flat1, flat2,
self.IMAGE_CACHE, self.HASH_THRESHOLD
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -394,8 +372,7 @@ class TestDuplicates(unittest.TestCase):
flat2["area"] += 10
score = duplicates.get_duplicate_score(
flat1, flat2,
self.IMAGE_CACHE, self.HASH_THRESHOLD
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -410,8 +387,7 @@ class TestDuplicates(unittest.TestCase):
flat2["area"] = 50.37
score = duplicates.get_duplicate_score(
flat1, flat2,
self.IMAGE_CACHE, self.HASH_THRESHOLD
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -425,8 +401,7 @@ class TestDuplicates(unittest.TestCase):
flat2["phone"] = "0708091011"
score = duplicates.get_duplicate_score(
flat1, flat2,
self.IMAGE_CACHE, self.HASH_THRESHOLD
flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -435,14 +410,10 @@ class TestDuplicates(unittest.TestCase):
Two flats with same price, area and rooms quantity should be detected
as duplicates.
"""
flats = self.load_files(
"127028739@seloger",
"14428129@explorimmo"
)
flats = self.load_files("127028739@seloger", "14428129@explorimmo")
score = duplicates.get_duplicate_score(
flats[0], flats[1],
self.IMAGE_CACHE, self.HASH_THRESHOLD
flats[0], flats[1], self.IMAGE_CACHE, self.HASH_THRESHOLD
)
self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITH_PHOTOS)
@ -502,8 +473,13 @@ def run():
"""
LOGGER.info("Running tests…")
try:
for testsuite in [TestTexts, TestPhoneNumbers, TestImageCache,
TestDuplicates, TestPhotos]:
for testsuite in [
TestTexts,
TestPhoneNumbers,
TestImageCache,
TestDuplicates,
TestPhotos,
]:
suite = unittest.TestLoader().loadTestsFromTestCase(testsuite)
result = unittest.TextTestRunner(verbosity=2).run(suite)
assert result.wasSuccessful()

View File

@ -3,9 +3,7 @@
This module contains basic utility functions, such as pretty printing of JSON
output, checking that a value is within a given interval etc.
"""
from __future__ import (
absolute_import, division, print_function, unicode_literals
)
from __future__ import absolute_import, division, print_function, unicode_literals
import datetime
import itertools
@ -41,7 +39,7 @@ def next_weekday(d, weekday):
:returns: The datetime object for the next given weekday.
"""
days_ahead = weekday - d.weekday()
if days_ahead <= 0: # Target day already happened this week
if days_ahead <= 0: # Target day already happened this week
days_ahead += 7
return d + datetime.timedelta(days_ahead)
@ -61,8 +59,18 @@ def convert_arabic_to_roman(arabic):
return arabic
to_roman = {
1: 'I', 2: 'II', 3: 'III', 4: 'IV', 5: 'V', 6: 'VI', 7: 'VII',
8: 'VIII', 9: 'IX', 10: 'X', 20: 'XX', 30: 'XXX'
1: "I",
2: "II",
3: "III",
4: "IV",
5: "V",
6: "VI",
7: "VII",
8: "VIII",
9: "IX",
10: "X",
20: "XX",
30: "XXX",
}
roman_chars_list = []
count = 1
@ -71,7 +79,7 @@ def convert_arabic_to_roman(arabic):
if digit != 0:
roman_chars_list.append(to_roman[digit * count])
count *= 10
return ''.join(roman_chars_list[::-1])
return "".join(roman_chars_list[::-1])
def convert_arabic_to_roman_in_text(text):
@ -83,9 +91,7 @@ def convert_arabic_to_roman_in_text(text):
arabic.
"""
return re.sub(
r'(\d+)',
lambda matchobj: convert_arabic_to_roman(matchobj.group(0)),
text
r"(\d+)", lambda matchobj: convert_arabic_to_roman(matchobj.group(0)), text
)
@ -96,11 +102,13 @@ def hash_dict(func):
From https://stackoverflow.com/a/44776960.
"""
class HDict(dict):
"""
Transform mutable dictionnary into immutable. Useful to be compatible
with lru_cache
"""
def __hash__(self):
return hash(json.dumps(self))
@ -108,17 +116,10 @@ def hash_dict(func):
"""
The wrapped function
"""
args = tuple(
[
HDict(arg) if isinstance(arg, dict) else arg
for arg in args
]
)
kwargs = {
k: HDict(v) if isinstance(v, dict) else v
for k, v in kwargs.items()
}
args = tuple([HDict(arg) if isinstance(arg, dict) else arg for arg in args])
kwargs = {k: HDict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
return func(*args, **kwargs)
return wrapped
@ -126,6 +127,7 @@ class DateAwareJSONEncoder(json.JSONEncoder):
"""
Extend the default JSON encoder to serialize datetimes to iso strings.
"""
def default(self, o): # pylint: disable=locally-disabled,E0202
if isinstance(o, (datetime.date, datetime.datetime)):
return o.isoformat()
@ -153,9 +155,9 @@ def pretty_json(data):
"toto": "ok"
}
"""
return json.dumps(data, cls=DateAwareJSONEncoder,
indent=4, separators=(',', ': '),
sort_keys=True)
return json.dumps(
data, cls=DateAwareJSONEncoder, indent=4, separators=(",", ": "), sort_keys=True
)
def batch(iterable, size):
@ -295,8 +297,8 @@ def distance(gps1, gps2):
# pylint: disable=locally-disabled,invalid-name
a = (
math.sin((lat2 - lat1) / 2.0)**2 +
math.cos(lat1) * math.cos(lat2) * math.sin((long2 - long1) / 2.0)**2
math.sin((lat2 - lat1) / 2.0) ** 2
+ math.cos(lat1) * math.cos(lat2) * math.sin((long2 - long1) / 2.0) ** 2
)
c = 2.0 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
earth_radius = 6371000
@ -327,7 +329,9 @@ def merge_dicts(*args):
if len(args) == 1:
return args[0]
flat1, flat2 = args[:2] # pylint: disable=locally-disabled,unbalanced-tuple-unpacking,line-too-long
flat1, flat2 = args[
:2
] # pylint: disable=locally-disabled,unbalanced-tuple-unpacking,line-too-long
merged_flat = {}
for k, value2 in flat2.items():
value1 = flat1.get(k, None)
@ -385,13 +389,14 @@ def get_travel_time_between(latlng_from, latlng_to, mode, config):
"from": "%s;%s" % (latlng_from[1], latlng_from[0]),
"to": "%s;%s" % (latlng_to[1], latlng_to[0]),
"datetime": date_from.isoformat(),
"count": 1
"count": 1,
}
try:
# Do the query to Navitia API
req = requests.get(
NAVITIA_ENDPOINT, params=payload,
auth=(config["navitia_api_key"], "")
NAVITIA_ENDPOINT,
params=payload,
auth=(config["navitia_api_key"], ""),
)
req.raise_for_status()
@ -400,28 +405,31 @@ def get_travel_time_between(latlng_from, latlng_to, mode, config):
for section in journeys["sections"]:
if section["type"] == "public_transport":
# Public transport
sections.append({
"geojson": section["geojson"],
"color": (
section["display_informations"].get("color", None)
)
})
sections.append(
{
"geojson": section["geojson"],
"color": (
section["display_informations"].get("color", None)
),
}
)
elif section["type"] == "street_network":
# Walking
sections.append({
"geojson": section["geojson"],
"color": None
})
sections.append({"geojson": section["geojson"], "color": None})
else:
# Skip anything else
continue
except (requests.exceptions.RequestException,
ValueError, IndexError, KeyError) as exc:
except (
requests.exceptions.RequestException,
ValueError,
IndexError,
KeyError,
) as exc:
# Ignore any possible exception
LOGGER.warning(
"An exception occurred during travel time lookup on "
"Navitia: %s.",
str(exc)
str(exc),
)
else:
LOGGER.warning(
@ -430,50 +438,45 @@ def get_travel_time_between(latlng_from, latlng_to, mode, config):
)
elif mode in [TimeToModes.WALK, TimeToModes.BIKE, TimeToModes.CAR]:
MAPBOX_MODES = {
TimeToModes.WALK: 'mapbox/walking',
TimeToModes.BIKE: 'mapbox/cycling',
TimeToModes.CAR: 'mapbox/driving'
TimeToModes.WALK: "mapbox/walking",
TimeToModes.BIKE: "mapbox/cycling",
TimeToModes.CAR: "mapbox/driving",
}
# Check that Mapbox API key is available
if config["mapbox_api_key"]:
try:
service = mapbox.Directions(
access_token=config['mapbox_api_key']
)
service = mapbox.Directions(access_token=config["mapbox_api_key"])
origin = {
'type': 'Feature',
'properties': {'name': 'Start'},
'geometry': {
'type': 'Point',
'coordinates': [latlng_from[1], latlng_from[0]]}}
"type": "Feature",
"properties": {"name": "Start"},
"geometry": {
"type": "Point",
"coordinates": [latlng_from[1], latlng_from[0]],
},
}
destination = {
'type': 'Feature',
'properties': {'name': 'End'},
'geometry': {
'type': 'Point',
'coordinates': [latlng_to[1], latlng_to[0]]}}
response = service.directions(
[origin, destination], MAPBOX_MODES[mode]
)
"type": "Feature",
"properties": {"name": "End"},
"geometry": {
"type": "Point",
"coordinates": [latlng_to[1], latlng_to[0]],
},
}
response = service.directions([origin, destination], MAPBOX_MODES[mode])
response.raise_for_status()
route = response.geojson()['features'][0]
route = response.geojson()["features"][0]
# Fix longitude/latitude inversion in geojson output
geometry = route['geometry']
geometry['coordinates'] = [
(x[1], x[0]) for x in geometry['coordinates']
geometry = route["geometry"]
geometry["coordinates"] = [
(x[1], x[0]) for x in geometry["coordinates"]
]
sections = [{
"geojson": geometry,
"color": "000"
}]
travel_time = route['properties']['duration']
except (requests.exceptions.RequestException,
IndexError, KeyError) as exc:
sections = [{"geojson": geometry, "color": "000"}]
travel_time = route["properties"]["duration"]
except (requests.exceptions.RequestException, IndexError, KeyError) as exc:
# Ignore any possible exception
LOGGER.warning(
"An exception occurred during travel time lookup on "
"Mapbox: %s.",
str(exc)
"An exception occurred during travel time lookup on " "Mapbox: %s.",
str(exc),
)
else:
LOGGER.warning(
@ -482,10 +485,7 @@ def get_travel_time_between(latlng_from, latlng_to, mode, config):
)
if travel_time:
return {
"time": travel_time,
"sections": sections
}
return {"time": travel_time, "sections": sections}
return None
@ -493,6 +493,7 @@ def timeit(func):
"""
A decorator that logs how much time was spent in the function.
"""
def wrapped(*args, **kwargs):
"""
The wrapped function
@ -502,4 +503,5 @@ def timeit(func):
runtime = time.time() - before
LOGGER.info("%s -- Execution took %s seconds.", func.__name__, runtime)
return res
return wrapped

View File

@ -2,9 +2,7 @@
"""
This module contains the definition of the Bottle web app.
"""
from __future__ import (
absolute_import, division, print_function, unicode_literals
)
from __future__ import absolute_import, division, print_function, unicode_literals
import functools
import json
@ -25,13 +23,13 @@ class QuietWSGIRefServer(bottle.WSGIRefServer):
Quiet implementation of Bottle built-in WSGIRefServer, as `Canister` is
handling the logging through standard Python logging.
"""
# pylint: disable=locally-disabled,too-few-public-methods
quiet = True
def run(self, app):
app.log.info(
'Server is now up and ready! Listening on %s:%s.' %
(self.host, self.port)
"Server is now up and ready! Listening on %s:%s." % (self.host, self.port)
)
super(QuietWSGIRefServer, self).run(app)
@ -42,12 +40,10 @@ def _serve_static_file(filename):
"""
return bottle.static_file(
filename,
root=os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"static"
)
root=os.path.join(os.path.dirname(os.path.realpath(__file__)), "static"),
)
def get_app(config):
"""
Get a Bottle app instance with all the routes set-up.
@ -72,40 +68,42 @@ def get_app(config):
)
# Enable CORS
@app.hook('after_request')
@app.hook("after_request")
def enable_cors():
"""
Add CORS headers at each request.
"""
# The str() call is required as we import unicode_literal and WSGI
# headers list should have plain str type.
bottle.response.headers[str('Access-Control-Allow-Origin')] = str('*')
bottle.response.headers[str('Access-Control-Allow-Methods')] = str(
'PUT, GET, POST, DELETE, OPTIONS, PATCH'
bottle.response.headers[str("Access-Control-Allow-Origin")] = str("*")
bottle.response.headers[str("Access-Control-Allow-Methods")] = str(
"PUT, GET, POST, DELETE, OPTIONS, PATCH"
)
bottle.response.headers[str('Access-Control-Allow-Headers')] = str(
'Origin, Accept, Content-Type, X-Requested-With, X-CSRF-Token'
bottle.response.headers[str("Access-Control-Allow-Headers")] = str(
"Origin, Accept, Content-Type, X-Requested-With, X-CSRF-Token"
)
# API v1 routes
app.route("/api/v1", ["GET", "OPTIONS"], api_routes.index_v1)
app.route("/api/v1/time_to_places", ["GET", "OPTIONS"],
api_routes.time_to_places_v1)
app.route(
"/api/v1/time_to_places", ["GET", "OPTIONS"], api_routes.time_to_places_v1
)
app.route("/api/v1/flats", ["GET", "OPTIONS"], api_routes.flats_v1)
app.route("/api/v1/flats/:flat_id", ["GET", "OPTIONS"], api_routes.flat_v1)
app.route("/api/v1/flats/:flat_id", ["PATCH", "OPTIONS"],
api_routes.update_flat_v1)
app.route("/api/v1/flats/:flat_id", ["PATCH", "OPTIONS"], api_routes.update_flat_v1)
app.route("/api/v1/ics/visits.ics", ["GET", "OPTIONS"],
api_routes.ics_feed_v1)
app.route("/api/v1/ics/visits.ics", ["GET", "OPTIONS"], api_routes.ics_feed_v1)
app.route("/api/v1/search", ["POST", "OPTIONS"], api_routes.search_v1)
app.route("/api/v1/opendata", ["GET", "OPTIONS"], api_routes.opendata_index_v1)
app.route("/api/v1/opendata/postal_codes", ["GET", "OPTIONS"],
api_routes.opendata_postal_codes_v1)
app.route(
"/api/v1/opendata/postal_codes",
["GET", "OPTIONS"],
api_routes.opendata_postal_codes_v1,
)
app.route("/api/v1/metadata", ["GET", "OPTIONS"], api_routes.metadata_v1)
@ -113,29 +111,28 @@ def get_app(config):
app.route("/", "GET", lambda: _serve_static_file("index.html"))
# Static files
app.route("/favicon.ico", "GET",
lambda: _serve_static_file("favicon.ico"))
app.route("/favicon.ico", "GET", lambda: _serve_static_file("favicon.ico"))
app.route(
"/assets/<filename:path>", "GET",
lambda filename: _serve_static_file("/assets/{}".format(filename))
"/assets/<filename:path>",
"GET",
lambda filename: _serve_static_file("/assets/{}".format(filename)),
)
app.route(
"/img/<filename:path>", "GET",
lambda filename: _serve_static_file("/img/{}".format(filename))
"/img/<filename:path>",
"GET",
lambda filename: _serve_static_file("/img/{}".format(filename)),
)
app.route(
"/.well-known/<filename:path>", "GET",
lambda filename: _serve_static_file("/.well-known/{}".format(filename))
"/.well-known/<filename:path>",
"GET",
lambda filename: _serve_static_file("/.well-known/{}".format(filename)),
)
app.route(
"/data/img/<filename:path>", "GET",
"/data/img/<filename:path>",
"GET",
lambda filename: bottle.static_file(
filename,
root=os.path.join(
config["data_directory"],
"images"
)
)
filename, root=os.path.join(config["data_directory"], "images")
),
)
return app

View File

@ -7,9 +7,7 @@ This module is heavily based on code from
[Bottle-SQLAlchemy](https://github.com/iurisilvio/bottle-sqlalchemy) which is
licensed under MIT license.
"""
from __future__ import (
absolute_import, division, print_function, unicode_literals
)
from __future__ import absolute_import, division, print_function, unicode_literals
import functools
import inspect
@ -22,7 +20,8 @@ class ConfigPlugin(object):
A Bottle plugin to automatically pass the config object to the routes
specifying they need it.
"""
name = 'config'
name = "config"
api = 2
KEYWORD = "config"
@ -41,9 +40,7 @@ class ConfigPlugin(object):
if not isinstance(other, ConfigPlugin):
continue
else:
raise bottle.PluginError(
"Found another conflicting Config plugin."
)
raise bottle.PluginError("Found another conflicting Config plugin.")
def apply(self, callback, route):
"""

View File

@ -7,9 +7,7 @@ This module is heavily based on code from
[Bottle-SQLAlchemy](https://github.com/iurisilvio/bottle-sqlalchemy) which is
licensed under MIT license.
"""
from __future__ import (
absolute_import, division, print_function, unicode_literals
)
from __future__ import absolute_import, division, print_function, unicode_literals
import inspect
@ -21,7 +19,8 @@ class DatabasePlugin(object):
A Bottle plugin to automatically pass an SQLAlchemy database session object
to the routes specifying they need it.
"""
name = 'database'
name = "database"
api = 2
KEYWORD = "db"
@ -41,9 +40,7 @@ class DatabasePlugin(object):
if not isinstance(other, DatabasePlugin):
continue
else:
raise bottle.PluginError(
"Found another conflicting Database plugin."
)
raise bottle.PluginError("Found another conflicting Database plugin.")
def apply(self, callback, route):
"""
@ -64,6 +61,7 @@ class DatabasePlugin(object):
if self.KEYWORD not in callback_args:
# If no need for a db session, call the route callback
return callback
def wrapper(*args, **kwargs):
"""
Wrap the callback in a call to get_session.
@ -72,6 +70,7 @@ class DatabasePlugin(object):
# Get a db session and pass it to the callback
kwargs[self.KEYWORD] = session
return callback(*args, **kwargs)
return wrapper

View File

@ -2,9 +2,7 @@
"""
This module contains the definition of the web app API routes.
"""
from __future__ import (
absolute_import, division, print_function, unicode_literals
)
from __future__ import absolute_import, division, print_function, unicode_literals
import datetime
import itertools
@ -60,26 +58,24 @@ def _JSONApiSpec(query, model, default_sorting=None):
# Handle pagination according to JSON API spec
page_number, page_size = 0, None
try:
if 'page[size]' in query:
page_size = int(query['page[size]'])
if "page[size]" in query:
page_size = int(query["page[size]"])
assert page_size > 0
if 'page[number]' in query:
page_number = int(query['page[number]'])
if "page[number]" in query:
page_number = int(query["page[number]"])
assert page_number >= 0
except (AssertionError, ValueError):
raise ValueError("Invalid pagination provided.")
# Handle sorting according to JSON API spec
sorting = []
if 'sort' in query:
for index in query['sort'].split(','):
if "sort" in query:
for index in query["sort"].split(","):
try:
sort_field = getattr(model, index.lstrip('-'))
sort_field = getattr(model, index.lstrip("-"))
except AttributeError:
raise ValueError(
"Invalid sorting key provided: {}.".format(index)
)
if index.startswith('-'):
raise ValueError("Invalid sorting key provided: {}.".format(index))
if index.startswith("-"):
sort_field = sort_field.desc()
sorting.append(sort_field)
# Default sorting options
@ -88,9 +84,7 @@ def _JSONApiSpec(query, model, default_sorting=None):
sorting.append(getattr(model, default_sorting))
except AttributeError:
raise ValueError(
"Invalid default sorting key provided: {}.".format(
default_sorting
)
"Invalid default sorting key provided: {}.".format(default_sorting)
)
return filters, page_number, page_size, sorting
@ -125,7 +119,7 @@ def _serialize_flat(flat, config):
flat["flatisfy_postal_code"] = {
"postal_code": flat["flatisfy_postal_code"],
"name": postal_code_data.name,
"gps": (postal_code_data.lat, postal_code_data.lng)
"gps": (postal_code_data.lat, postal_code_data.lng),
}
except (AssertionError, StopIteration):
flat["flatisfy_postal_code"] = {}
@ -148,7 +142,7 @@ def index_v1():
"search": "/api/v1/search",
"ics": "/api/v1/ics/visits.ics",
"time_to_places": "/api/v1/time_to_places",
"metadata": "/api/v1/metadata"
"metadata": "/api/v1/metadata",
}
@ -179,36 +173,32 @@ def flats_v1(config, db):
:return: The available flats objects in a JSON ``data`` dict.
"""
if bottle.request.method == 'OPTIONS':
if bottle.request.method == "OPTIONS":
# CORS
return ''
return ""
try:
try:
filters, page_number, page_size, sorting = _JSONApiSpec(
bottle.request.query,
flat_model.Flat,
default_sorting='cost'
bottle.request.query, flat_model.Flat, default_sorting="cost"
)
except ValueError as exc:
return JSONError(400, str(exc))
# Build flat list
db_query = (
db.query(flat_model.Flat).filter_by(**filters).order_by(*sorting)
)
db_query = db.query(flat_model.Flat).filter_by(**filters).order_by(*sorting)
flats = [
_serialize_flat(flat, config)
for flat in itertools.islice(
db_query,
page_number * page_size if page_size else None,
page_number * page_size + page_size if page_size else None
page_number * page_size + page_size if page_size else None,
)
]
return {
"data": flats,
"page": page_number,
"items_per_page": page_size if page_size else len(flats)
"items_per_page": page_size if page_size else len(flats),
}
except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc))
@ -224,7 +214,7 @@ def flat_v1(flat_id, config, db):
:return: The flat object in a JSON ``data`` dict.
"""
if bottle.request.method == 'OPTIONS':
if bottle.request.method == "OPTIONS":
# CORS
return {}
@ -234,9 +224,7 @@ def flat_v1(flat_id, config, db):
if not flat:
return JSONError(404, "No flat with id {}.".format(flat_id))
return {
"data": _serialize_flat(flat, config)
}
return {"data": _serialize_flat(flat, config)}
except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc))
@ -260,7 +248,7 @@ def update_flat_v1(flat_id, config, db):
:return: The new flat object in a JSON ``data`` dict.
"""
if bottle.request.method == 'OPTIONS':
if bottle.request.method == "OPTIONS":
# CORS
return {}
@ -274,14 +262,9 @@ def update_flat_v1(flat_id, config, db):
for key, value in json_body.items():
setattr(flat, key, value)
except ValueError as exc:
return JSONError(
400,
"Invalid payload provided: {}.".format(str(exc))
)
return JSONError(400, "Invalid payload provided: {}.".format(str(exc)))
return {
"data": _serialize_flat(flat, config)
}
return {"data": _serialize_flat(flat, config)}
except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc))
@ -297,7 +280,7 @@ def time_to_places_v1(config):
:return: The JSON dump of the places to compute time to (dict of places
names mapped to GPS coordinates).
"""
if bottle.request.method == 'OPTIONS':
if bottle.request.method == "OPTIONS":
# CORS
return {}
@ -305,12 +288,9 @@ def time_to_places_v1(config):
places = {}
for constraint_name, constraint in config["constraints"].items():
places[constraint_name] = {
k: v["gps"]
for k, v in constraint["time_to"].items()
k: v["gps"] for k, v in constraint["time_to"].items()
}
return {
"data": places
}
return {"data": places}
except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc))
@ -345,7 +325,7 @@ def search_v1(db, config):
:return: The matching flat objects in a JSON ``data`` dict.
"""
if bottle.request.method == 'OPTIONS':
if bottle.request.method == "OPTIONS":
# CORS
return {}
@ -357,30 +337,29 @@ def search_v1(db, config):
try:
filters, page_number, page_size, sorting = _JSONApiSpec(
bottle.request.query,
flat_model.Flat,
default_sorting='cost'
bottle.request.query, flat_model.Flat, default_sorting="cost"
)
except ValueError as exc:
return JSONError(400, str(exc))
flats_db_query = (flat_model.Flat
.search_query(db, query)
.filter_by(**filters)
.order_by(*sorting))
flats_db_query = (
flat_model.Flat.search_query(db, query)
.filter_by(**filters)
.order_by(*sorting)
)
flats = [
_serialize_flat(flat, config)
for flat in itertools.islice(
flats_db_query,
page_number * page_size if page_size else None,
page_number * page_size + page_size if page_size else None
page_number * page_size + page_size if page_size else None,
)
]
return {
"data": flats,
"page": page_number,
"items_per_page": page_size if page_size else len(flats)
"items_per_page": page_size if page_size else len(flats),
}
except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc))
@ -396,7 +375,7 @@ def ics_feed_v1(config, db):
:return: The ICS feed for the visits.
"""
if bottle.request.method == 'OPTIONS':
if bottle.request.method == "OPTIONS":
# CORS
return {}
@ -407,24 +386,24 @@ def ics_feed_v1(config, db):
)
for flat in flats_with_visits:
vevent = cal.add('vevent')
vevent.add('dtstart').value = flat.visit_date
vevent.add('dtend').value = (
flat.visit_date + datetime.timedelta(hours=1)
)
vevent.add('summary').value = 'Visit - {}'.format(flat.title)
vevent = cal.add("vevent")
vevent.add("dtstart").value = flat.visit_date
vevent.add("dtend").value = flat.visit_date + datetime.timedelta(hours=1)
vevent.add("summary").value = "Visit - {}".format(flat.title)
description = (
'{} (area: {}, cost: {} {})\n{}#/flat/{}\n'.format(
flat.title, flat.area, flat.cost, flat.currency,
config['website_url'], flat.id
)
description = "{} (area: {}, cost: {} {})\n{}#/flat/{}\n".format(
flat.title,
flat.area,
flat.cost,
flat.currency,
config["website_url"],
flat.id,
)
description += '\n{}\n'.format(flat.text)
description += "\n{}\n".format(flat.text)
if flat.notes:
description += '\n{}\n'.format(flat.notes)
description += "\n{}\n".format(flat.notes)
vevent.add('description').value = description
vevent.add("description").value = description
except Exception: # pylint: disable= broad-except
pass
@ -439,13 +418,11 @@ def opendata_index_v1():
GET /api/v1/opendata
"""
if bottle.request.method == 'OPTIONS':
if bottle.request.method == "OPTIONS":
# CORS
return {}
return {
"postal_codes": "/api/v1/opendata/postal_codes"
}
return {"postal_codes": "/api/v1/opendata/postal_codes"}
def opendata_postal_codes_v1(db):
@ -476,36 +453,36 @@ def opendata_postal_codes_v1(db):
:return: The postal codes data from opendata.
"""
if bottle.request.method == 'OPTIONS':
if bottle.request.method == "OPTIONS":
# CORS
return {}
try:
try:
filters, page_number, page_size, sorting = _JSONApiSpec(
bottle.request.query,
PostalCode,
default_sorting='postal_code'
bottle.request.query, PostalCode, default_sorting="postal_code"
)
except ValueError as exc:
return JSONError(400, str(exc))
db_query = db.query(PostalCode).filter_by(**filters).order_by(*sorting)
postal_codes = [
x.json_api_repr() for x in itertools.islice(
x.json_api_repr()
for x in itertools.islice(
db_query,
page_number * page_size if page_size else None,
page_number * page_size + page_size if page_size else None
page_number * page_size + page_size if page_size else None,
)
]
return {
"data": postal_codes,
"page": page_number,
"items_per_page": page_size if page_size else len(postal_codes)
"items_per_page": page_size if page_size else len(postal_codes),
}
except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc))
def metadata_v1(config):
"""
API v1 metadata of the application.
@ -516,25 +493,18 @@ def metadata_v1(config):
:return: The application metadata.
"""
if bottle.request.method == 'OPTIONS':
if bottle.request.method == "OPTIONS":
# CORS
return {}
try:
last_update = None
try:
ts_file = os.path.join(
config['data_directory'],
'timestamp'
)
ts_file = os.path.join(config["data_directory"], "timestamp")
last_update = os.path.getmtime(ts_file)
except OSError:
pass
return {
'data': {
'last_update': last_update
}
}
return {"data": {"last_update": last_update}}
except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc))

View File

@ -36,8 +36,7 @@ def run_migrations_offline():
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url, target_metadata=target_metadata, literal_binds=True)
context.configure(url=url, target_metadata=target_metadata, literal_binds=True)
with context.begin_transaction():
context.run_migrations()
@ -52,18 +51,17 @@ def run_migrations_online():
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix='sqlalchemy.',
poolclass=pool.NullPool)
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata
)
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:

View File

@ -10,21 +10,15 @@ import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '8155b83242eb'
revision = "8155b83242eb"
down_revision = None
branch_labels = None
depends_on = None
def upgrade():
op.add_column(
'flats',
sa.Column('is_expired', sa.Boolean(), default=False)
)
op.add_column("flats", sa.Column("is_expired", sa.Boolean(), default=False))
def downgrade():
op.drop_column(
'flats',
'is_expired'
)
op.drop_column("flats", "is_expired")

13
wsgi.py
View File

@ -12,10 +12,9 @@ import flatisfy.config
from flatisfy.web import app as web_app
class Args():
class Args:
config = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"config/config.json"
os.path.dirname(os.path.realpath(__file__)), "config/config.json"
)
@ -24,9 +23,11 @@ LOGGER = logging.getLogger("flatisfy")
CONFIG = flatisfy.config.load_config(Args())
if CONFIG is None:
LOGGER.error("Invalid configuration. Exiting. "
"Run init-config before if this is the first time "
"you run Flatisfy.")
LOGGER.error(
"Invalid configuration. Exiting. "
"Run init-config before if this is the first time "
"you run Flatisfy."
)
sys.exit(1)