@ -18,7 +18,8 @@
import os import os
import sys import sys
sys.path.insert(0, os.path.abspath('..'))
sys.path.insert(0, os.path.abspath(".."))
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
@ -30,19 +31,19 @@ sys.path.insert(0, os.path.abspath('..'))
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones. # ones.
extensions = [ extensions = [
'sphinx.ext.autodoc', "sphinx.ext.autodoc",
'sphinx.ext.viewcode', "sphinx.ext.viewcode",
] ]
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates'] templates_path = ["_templates"]
# The suffix(es) of source filenames. # The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string: # You can specify multiple suffix as a list of string:
# #
source_suffix = ['.rst', '.md'] source_suffix = [".rst", ".md"]
source_parsers = { source_parsers = {
'.md': 'recommonmark.parser.CommonMarkParser', ".md": "recommonmark.parser.CommonMarkParser",
} }
# The encoding of source files. # The encoding of source files.
@ -50,21 +51,21 @@ source_parsers = {
# source_encoding = 'utf-8-sig' # source_encoding = 'utf-8-sig'
# The master toctree document. # The master toctree document.
master_doc = 'index' master_doc = "index"
# General information about the project. # General information about the project.
project = u'Flatisfy' project = u"Flatisfy"
copyright = u'2017, Phyks (Lucas Verney)' copyright = u"2017, Phyks (Lucas Verney)"
author = u'Phyks (Lucas Verney)' author = u"Phyks (Lucas Verney)"
# The version info for the project you're documenting, acts as replacement for # The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the # |version| and |release|, also used in various other places throughout the
# built documents. # built documents.
# #
# The short X.Y version. # The short X.Y version.
version = u'0.1' version = u"0.1"
# The full version, including alpha/beta/rc tags. # The full version, including alpha/beta/rc tags.
release = u'0.1' release = u"0.1"
# The language for content autogenerated by Sphinx. Refer to documentation # The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages. # for a list of supported languages.
@ -85,7 +86,7 @@ language = None
# List of patterns, relative to source directory, that match files and # List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files. # directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path # This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# The reST default role (used for this markup: `text`) to use for all # The reST default role (used for this markup: `text`) to use for all
# documents. # documents.
@ -107,7 +108,7 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# show_authors = False # show_authors = False
# The name of the Pygments (syntax highlighting) style to use. # The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx' pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting. # A list of ignored prefixes for module index sorting.
# modindex_common_prefix = [] # modindex_common_prefix = []
@ -124,7 +125,7 @@ todo_include_todos = False
# The theme to use for HTML and HTML Help pages. See the documentation for # The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes. # a list of builtin themes.
# #
html_theme = 'classic' html_theme = "classic"
# Theme options are theme-specific and customize the look and feel of a theme # Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the # further. For a list of options available for each theme, see the
@ -158,7 +159,7 @@ html_theme = 'classic'
# Add any paths that contain custom static files (such as style sheets) here, # Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files, # relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css". # so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static'] html_static_path = ["_static"]
# Add any extra paths that contain custom files (such as robots.txt or # Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied # .htaccess) here, relative to this directory. These files are copied
@ -238,7 +239,7 @@ html_static_path = ['_static']
# html_search_scorer = 'scorer.js' # html_search_scorer = 'scorer.js'
# Output file base name for HTML help builder. # Output file base name for HTML help builder.
htmlhelp_basename = 'Flatisfydoc' htmlhelp_basename = "Flatisfydoc"
# -- Options for LaTeX output --------------------------------------------- # -- Options for LaTeX output ---------------------------------------------
@ -246,15 +247,12 @@ latex_elements = {
# The paper size ('letterpaper' or 'a4paper'). # The paper size ('letterpaper' or 'a4paper').
# #
# 'papersize': 'letterpaper', # 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt'). # The font size ('10pt', '11pt' or '12pt').
# #
# 'pointsize': '10pt', # 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble. # Additional stuff for the LaTeX preamble.
# #
# 'preamble': '', # 'preamble': '',
# Latex figure (float) alignment # Latex figure (float) alignment
# #
# 'figure_align': 'htbp', # 'figure_align': 'htbp',
@ -264,8 +262,13 @@ latex_elements = {
# (source start file, target name, title, # (source start file, target name, title,
# author, documentclass [howto, manual, or own class]). # author, documentclass [howto, manual, or own class]).
latex_documents = [ latex_documents = [
(master_doc, 'Flatisfy.tex', u'Flatisfy Documentation', (
u'Phyks (Lucas Verney)', 'manual'), master_doc,
u"Flatisfy Documentation",
u"Phyks (Lucas Verney)",
] ]
# The name of an image file (relative to this directory) to place at the top of # The name of an image file (relative to this directory) to place at the top of
@ -305,10 +308,7 @@ latex_documents = [
# One entry per manual page. List of tuples # One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section). # (source start file, name, description, authors, manual section).
man_pages = [ man_pages = [(master_doc, "flatisfy", u"Flatisfy Documentation", [author], 1)]
(master_doc, 'flatisfy', u'Flatisfy Documentation',
[author], 1)
# If true, show URL addresses after external links. # If true, show URL addresses after external links.
# #
@ -321,9 +321,15 @@ man_pages = [
# (source start file, target name, title, author, # (source start file, target name, title, author,
# dir menu entry, description, category) # dir menu entry, description, category)
texinfo_documents = [ texinfo_documents = [
(master_doc, 'Flatisfy', u'Flatisfy Documentation', (
author, 'Flatisfy', 'One line description of project.', master_doc,
'Miscellaneous'), "Flatisfy",
u"Flatisfy Documentation",
"One line description of project.",
] ]
# Documents to append as an appendix to all manuals. # Documents to append as an appendix to all manuals.

@ -17,6 +17,7 @@ from flatisfy import data
from flatisfy import fetch from flatisfy import fetch
from flatisfy import tools from flatisfy import tools
from flatisfy import tests from flatisfy import tests
# pylint: enable=locally-disabled,wrong-import-position # pylint: enable=locally-disabled,wrong-import-position
@ -27,68 +28,59 @@ def parse_args(argv=None):
""" """
Create parser and parse arguments. Create parser and parse arguments.
""" """
parser = argparse.ArgumentParser(prog="Flatisfy", parser = argparse.ArgumentParser(
description="Find the perfect flat.") prog="Flatisfy", description="Find the perfect flat."
# Parent parser containing arguments common to any subcommand # Parent parser containing arguments common to any subcommand
parent_parser = argparse.ArgumentParser(add_help=False) parent_parser = argparse.ArgumentParser(add_help=False)
parent_parser.add_argument( parent_parser.add_argument(
"--data-dir", "--data-dir", help="Location of Flatisfy data directory."
help="Location of Flatisfy data directory." )
parent_parser.add_argument("--config", help="Configuration file to use.")
choices=[0, 1, 2, 3],
help="Number of passes to do on the filtered data.",
) )
parent_parser.add_argument( parent_parser.add_argument(
"--config", "--max-entries", type=int, help="Maximum number of entries to fetch."
help="Configuration file to use."
) )
parent_parser.add_argument( parent_parser.add_argument(
"--passes", choices=[0, 1, 2, 3], type=int, "-v", "--verbose", action="store_true", help="Verbose logging output."
help="Number of passes to do on the filtered data."
) )
parent_parser.add_argument("-vv", action="store_true", help="Debug logging output.")
parent_parser.add_argument( parent_parser.add_argument(
"--max-entries", type=int, "--constraints",
help="Maximum number of entries to fetch." type=str,
) help="Comma-separated list of constraints to consider.",
"-v", "--verbose", action="store_true",
help="Verbose logging output."
"-vv", action="store_true",
help="Debug logging output."
"--constraints", type=str,
help="Comma-separated list of constraints to consider."
) )
# Subcommands # Subcommands
subparsers = parser.add_subparsers( subparsers = parser.add_subparsers(dest="cmd", help="Available subcommands")
dest="cmd", help="Available subcommands"
# Build data subcommand # Build data subcommand
subparsers.add_parser( subparsers.add_parser(
"build-data", parents=[parent_parser], "build-data", parents=[parent_parser], help="Build necessary data"
help="Build necessary data"
) )
# Init config subcommand # Init config subcommand
parser_init_config = subparsers.add_parser( parser_init_config = subparsers.add_parser(
"init-config", parents=[parent_parser], "init-config", parents=[parent_parser], help="Initialize empty configuration."
help="Initialize empty configuration."
) )
parser_init_config.add_argument( parser_init_config.add_argument(
"output", nargs="?", help="Output config file. Use '-' for stdout." "output", nargs="?", help="Output config file. Use '-' for stdout."
) )
# Fetch subcommand parser # Fetch subcommand parser
subparsers.add_parser("fetch", parents=[parent_parser], subparsers.add_parser("fetch", parents=[parent_parser], help="Fetch housings posts")
help="Fetch housings posts")
# Filter subcommand parser # Filter subcommand parser
parser_filter = subparsers.add_parser( parser_filter = subparsers.add_parser(
"filter", parents=[parent_parser], "filter",
help="Filter housings posts according to constraints in config." parents=[parent_parser],
help="Filter housings posts according to constraints in config.",
) )
parser_filter.add_argument( parser_filter.add_argument(
"--input", "--input",
@ -97,34 +89,31 @@ def parse_args(argv=None):
"no additional fetching of infos is done, and the script outputs " "no additional fetching of infos is done, and the script outputs "
"a filtered JSON dump on stdout. If not provided, update status " "a filtered JSON dump on stdout. If not provided, update status "
"of the flats in the database." "of the flats in the database."
) ),
) )
# Import subcommand parser # Import subcommand parser
import_filter = subparsers.add_parser( import_filter = subparsers.add_parser(
"import", parents=[parent_parser], "import", parents=[parent_parser], help="Import housing posts in database."
help="Import housing posts in database.") )
import_filter.add_argument( import_filter.add_argument(
"--new-only", "--new-only",
action="store_true", action="store_true",
help=( help=("Download new housing posts only but do not refresh existing ones"),
"Download new housing posts only but do not refresh existing ones"
) )
# Purge subcommand parser # Purge subcommand parser
subparsers.add_parser("purge", parents=[parent_parser], subparsers.add_parser("purge", parents=[parent_parser], help="Purge database.")
help="Purge database.")
# Serve subcommand parser # Serve subcommand parser
parser_serve = subparsers.add_parser("serve", parents=[parent_parser], parser_serve = subparsers.add_parser(
help="Serve the web app.") "serve", parents=[parent_parser], help="Serve the web app."
parser_serve.add_argument("--port", type=int, help="Port to bind to.") parser_serve.add_argument("--port", type=int, help="Port to bind to.")
parser_serve.add_argument("--host", help="Host to listen on.") parser_serve.add_argument("--host", help="Host to listen on.")
# Test subcommand parser # Test subcommand parser
subparsers.add_parser("test", parents=[parent_parser], subparsers.add_parser("test", parents=[parent_parser], help="Unit testing.")
help="Unit testing.")
return parser.parse_args(argv) return parser.parse_args(argv)
@ -139,15 +128,15 @@ def main():
# Set logger # Set logger
if args.vv: if args.vv:
logging.getLogger('').setLevel(logging.DEBUG) logging.getLogger("").setLevel(logging.DEBUG)
logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG) logging.getLogger("sqlalchemy.engine").setLevel(logging.DEBUG)
elif args.verbose: elif args.verbose:
logging.getLogger('').setLevel(logging.INFO) logging.getLogger("").setLevel(logging.INFO)
# sqlalchemy INFO level is way too loud, just stick with WARNING # sqlalchemy INFO level is way too loud, just stick with WARNING
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING) logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
else: else:
logging.getLogger('').setLevel(logging.WARNING) logging.getLogger("").setLevel(logging.WARNING)
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING) logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
# Init-config command # Init-config command
if args.cmd == "init-config": if args.cmd == "init-config":
@ -161,9 +150,11 @@ def main():
else: else:
config = flatisfy.config.load_config(args, check_with_data=True) config = flatisfy.config.load_config(args, check_with_data=True)
if config is None: if config is None:
LOGGER.error("Invalid configuration. Exiting. " LOGGER.error(
"Invalid configuration. Exiting. "
"Run init-config before if this is the first time " "Run init-config before if this is the first time "
"you run Flatisfy.") "you run Flatisfy."
sys.exit(1) sys.exit(1)
# Purge command # Purge command
@ -179,18 +170,16 @@ def main():
if args.cmd == "fetch": if args.cmd == "fetch":
# Fetch and filter flats list # Fetch and filter flats list
fetched_flats = fetch.fetch_flats(config) fetched_flats = fetch.fetch_flats(config)
fetched_flats = cmds.filter_fetched_flats(config, fetched_flats = cmds.filter_fetched_flats(
fetched_flats=fetched_flats, config, fetched_flats=fetched_flats, fetch_details=True
fetch_details=True) )
# Sort by cost # Sort by cost
fetched_flats = { fetched_flats = {
k: tools.sort_list_of_dicts_by(v["new"], "cost") k: tools.sort_list_of_dicts_by(v["new"], "cost")
for k, v in fetched_flats.items() for k, v in fetched_flats.items()
} }
print( print(tools.pretty_json(fetched_flats))
return return
# Filter command # Filter command
elif args.cmd == "filter": elif args.cmd == "filter":
@ -199,9 +188,7 @@ def main():
fetched_flats = fetch.load_flats_from_file(args.input, config) fetched_flats = fetch.load_flats_from_file(args.input, config)
fetched_flats = cmds.filter_fetched_flats( fetched_flats = cmds.filter_fetched_flats(
config, config, fetched_flats=fetched_flats, fetch_details=False
) )
# Sort by cost # Sort by cost
@ -211,9 +198,7 @@ def main():
} }
# Output to stdout # Output to stdout
print( print(tools.pretty_json(fetched_flats))
else: else:
cmds.import_and_filter(config, load_from_db=True) cmds.import_and_filter(config, load_from_db=True)
return return

@ -23,7 +23,9 @@ import time
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, past_flats=None): def filter_flats_list(
config, constraint_name, flats_list, fetch_details=True, past_flats=None
""" """
Filter the available flats list. Then, filter it according to criteria. Filter the available flats list. Then, filter it according to criteria.
@ -45,13 +47,9 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
except KeyError: except KeyError:
LOGGER.error( LOGGER.error(
"Missing constraint %s. Skipping filtering for these posts.", "Missing constraint %s. Skipping filtering for these posts.",
constraint_name constraint_name,
) )
return { return {"new": [], "duplicate": [], "ignored": []}
"new": [],
"duplicate": [],
"ignored": []
first_pass_result = collections.defaultdict(list) first_pass_result = collections.defaultdict(list)
second_pass_result = collections.defaultdict(list) second_pass_result = collections.defaultdict(list)
@ -59,9 +57,7 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
# Do a first pass with the available infos to try to remove as much # Do a first pass with the available infos to try to remove as much
# unwanted postings as possible # unwanted postings as possible
if config["passes"] > 0: if config["passes"] > 0:
first_pass_result = flatisfy.filters.first_pass(flats_list, first_pass_result = flatisfy.filters.first_pass(flats_list, constraint, config)
else: else:
first_pass_result["new"] = flats_list first_pass_result["new"] = flats_list
@ -95,8 +91,7 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
# Do a third pass to deduplicate better # Do a third pass to deduplicate better
if config["passes"] > 2: if config["passes"] > 2:
third_pass_result = flatisfy.filters.third_pass( third_pass_result = flatisfy.filters.third_pass(
second_pass_result["new"], second_pass_result["new"], config
) )
else: else:
third_pass_result["new"] = second_pass_result["new"] third_pass_result["new"] = second_pass_result["new"]
@ -104,15 +99,15 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
return { return {
"new": third_pass_result["new"], "new": third_pass_result["new"],
"duplicate": ( "duplicate": (
first_pass_result["duplicate"] + first_pass_result["duplicate"]
second_pass_result["duplicate"] + + second_pass_result["duplicate"]
third_pass_result["duplicate"] + third_pass_result["duplicate"]
), ),
"ignored": ( "ignored": (
first_pass_result["ignored"] + first_pass_result["ignored"]
second_pass_result["ignored"] + + second_pass_result["ignored"]
third_pass_result["ignored"] + third_pass_result["ignored"]
) ),
} }
@ -134,7 +129,7 @@ def filter_fetched_flats(config, fetched_flats, fetch_details=True, past_flats={
constraint_name, constraint_name,
flats_list, flats_list,
fetch_details, fetch_details,
past_flats.get(constraint_name, None) past_flats.get(constraint_name, None),
) )
return fetched_flats return fetched_flats
@ -156,9 +151,12 @@ def import_and_filter(config, load_from_db=False, new_only=False):
else: else:
fetched_flats = fetch.fetch_flats(config) fetched_flats = fetch.fetch_flats(config)
# Do not fetch additional details if we loaded data from the db. # Do not fetch additional details if we loaded data from the db.
flats_by_status = filter_fetched_flats(config, fetched_flats=fetched_flats, flats_by_status = filter_fetched_flats(
fetch_details=(not load_from_db), fetch_details=(not load_from_db),
past_flats=past_flats if new_only else {}) past_flats=past_flats if new_only else {},
# Create database connection # Create database connection
get_session = database.init_db(config["database"], config["search_index"]) get_session = database.init_db(config["database"], config["search_index"])
@ -175,7 +173,7 @@ def import_and_filter(config, load_from_db=False, new_only=False):
# Set is_expired to true for all existing flats. # Set is_expired to true for all existing flats.
# This will be set back to false if we find them during importing. # This will be set back to false if we find them during importing.
for flat in session.query(flat_model.Flat).all(): for flat in session.query(flat_model.Flat).all():
flat.is_expired = True; flat.is_expired = True
for status, flats_list in flatten_flats_by_status.items(): for status, flats_list in flatten_flats_by_status.items():
# Build SQLAlchemy Flat model objects for every available flat # Build SQLAlchemy Flat model objects for every available flat
@ -195,9 +193,7 @@ def import_and_filter(config, load_from_db=False, new_only=False):
# status if the user defined it # status if the user defined it
flat_object = flats_objects[] flat_object = flats_objects[]
if each.status in flat_model.AUTOMATED_STATUSES: if each.status in flat_model.AUTOMATED_STATUSES:
flat_object.status = getattr( flat_object.status = getattr(flat_model.FlatStatus, status)
flat_model.FlatStatus, status
else: else:
flat_object.status = each.status flat_object.status = each.status
@ -223,11 +219,8 @@ def import_and_filter(config, load_from_db=False, new_only=False):"Found {len(new_flats)} new flats.")"Found {len(new_flats)} new flats.")
# Touch a file to indicate last update timestamp # Touch a file to indicate last update timestamp
ts_file = os.path.join( ts_file = os.path.join(config["data_directory"], "timestamp")
config["data_directory"], with open(ts_file, "w"):
with open(ts_file, 'w'):
os.utime(ts_file, None) os.utime(ts_file, None)"Done!")"Done!")
@ -270,5 +263,8 @@ def serve(config):
# standard logging # standard logging
server = web_app.QuietWSGIRefServer server = web_app.QuietWSGIRefServer
print("Launching web viewer running on http://%s:%s" % (config["host"], config["port"])) print(
"Launching web viewer running on http://%s:%s"
% (config["host"], config["port"])
)["host"], port=config["port"], server=server)["host"], port=config["port"], server=server)

@ -42,7 +42,7 @@ DEFAULT_CONFIG = {
"vendu", "vendu",
"Vendu", "Vendu",
"recherche" "recherche",
], ],
"time_to": {} # Dict mapping names to {"gps": [lat, lng], "time_to": {} # Dict mapping names to {"gps": [lat, lng],
# "time": (min, max), # "time": (min, max),
@ -91,7 +91,7 @@ DEFAULT_CONFIG = {
"backends": None, "backends": None,
# Should email notifications be sent? # Should email notifications be sent?
"send_email": False, "send_email": False,
"smtp_server": 'localhost', "smtp_server": "localhost",
"smtp_port": 25, "smtp_port": 25,
"smtp_username": None, "smtp_username": None,
"smtp_password": None, "smtp_password": None,
@ -115,6 +115,7 @@ def validate_config(config, check_with_data):
check the config values. check the config values.
:return: ``True`` if the configuration is valid, ``False`` otherwise. :return: ``True`` if the configuration is valid, ``False`` otherwise.
""" """
def _check_constraints_bounds(bounds): def _check_constraints_bounds(bounds):
""" """
Check the bounds for numeric constraints. Check the bounds for numeric constraints.
@ -122,12 +123,7 @@ def validate_config(config, check_with_data):
assert isinstance(bounds, list) assert isinstance(bounds, list)
assert len(bounds) == 2 assert len(bounds) == 2
assert all( assert all(
x is None or x is None or (isinstance(x, (float, int)) and x >= 0) for x in bounds
isinstance(x, (float, int)) and
x >= 0
for x in bounds
) )
if bounds[0] is not None and bounds[1] is not None: if bounds[0] is not None and bounds[1] is not None:
assert bounds[1] > bounds[0] assert bounds[1] > bounds[0]
@ -140,25 +136,45 @@ def validate_config(config, check_with_data):
# pylint: disable=locally-disabled,line-too-long # pylint: disable=locally-disabled,line-too-long
assert config["passes"] in [0, 1, 2, 3] assert config["passes"] in [0, 1, 2, 3]
assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0) # noqa: E501 assert config["max_entries"] is None or (
isinstance(config["max_entries"], int) and config["max_entries"] > 0
) # noqa: E501
assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501 assert config["data_directory"] is None or isinstance(
config["data_directory"], str
) # noqa: E501
assert os.path.isdir(config["data_directory"]) assert os.path.isdir(config["data_directory"])
assert isinstance(config["search_index"], str) assert isinstance(config["search_index"], str)
assert config["modules_path"] is None or isinstance(config["modules_path"], str) # noqa: E501 assert config["modules_path"] is None or isinstance(
config["modules_path"], str
) # noqa: E501
assert config["database"] is None or isinstance(config["database"], str) # noqa: E501 assert config["database"] is None or isinstance(
config["database"], str
) # noqa: E501
assert isinstance(config["port"], int) assert isinstance(config["port"], int)
assert isinstance(config["host"], str) assert isinstance(config["host"], str)
assert config["webserver"] is None or isinstance(config["webserver"], str) # noqa: E501 assert config["webserver"] is None or isinstance(
assert config["backends"] is None or isinstance(config["backends"], list) # noqa: E501 config["webserver"], str
) # noqa: E501
assert config["backends"] is None or isinstance(
config["backends"], list
) # noqa: E501
assert isinstance(config["send_email"], bool) assert isinstance(config["send_email"], bool)
assert config["smtp_server"] is None or isinstance(config["smtp_server"], str) # noqa: E501 assert config["smtp_server"] is None or isinstance(
assert config["smtp_port"] is None or isinstance(config["smtp_port"], int) # noqa: E501 config["smtp_server"], str
assert config["smtp_username"] is None or isinstance(config["smtp_username"], str) # noqa: E501 ) # noqa: E501
assert config["smtp_password"] is None or isinstance(config["smtp_password"], str) # noqa: E501 assert config["smtp_port"] is None or isinstance(
config["smtp_port"], int
) # noqa: E501
assert config["smtp_username"] is None or isinstance(
config["smtp_username"], str
) # noqa: E501
assert config["smtp_password"] is None or isinstance(
config["smtp_password"], str
) # noqa: E501
assert config["smtp_to"] is None or isinstance(config["smtp_to"], list) assert config["smtp_to"] is None or isinstance(config["smtp_to"], list)
assert isinstance(config["store_personal_data"], bool) assert isinstance(config["store_personal_data"], bool)
@ -167,10 +183,16 @@ def validate_config(config, check_with_data):
assert isinstance(config["duplicate_image_hash_threshold"], int) assert isinstance(config["duplicate_image_hash_threshold"], int)
# API keys # API keys
assert config["navitia_api_key"] is None or isinstance(config["navitia_api_key"], str) # noqa: E501 assert config["navitia_api_key"] is None or isinstance(
assert config["mapbox_api_key"] is None or isinstance(config["mapbox_api_key"], str) # noqa: E501 config["navitia_api_key"], str
) # noqa: E501
assert config["mapbox_api_key"] is None or isinstance(
config["mapbox_api_key"], str
) # noqa: E501
assert config["ignore_station"] is None or isinstance(config["ignore_station"], bool) # noqa: E501 assert config["ignore_station"] is None or isinstance(
config["ignore_station"], bool
) # noqa: E501
# Ensure constraints are ok # Ensure constraints are ok
assert config["constraints"] assert config["constraints"]
@ -191,8 +213,7 @@ def validate_config(config, check_with_data):
assert isinstance(term, str) assert isinstance(term, str)
assert "description_should_not_contain" in constraint assert "description_should_not_contain" in constraint
assert isinstance(constraint["description_should_not_contain"], assert isinstance(constraint["description_should_not_contain"], list)
if constraint["description_should_not_contain"]: if constraint["description_should_not_contain"]:
for term in constraint["description_should_not_contain"]: for term in constraint["description_should_not_contain"]:
assert isinstance(term, str) assert isinstance(term, str)
@ -269,20 +290,19 @@ def load_config(args=None, check_with_data=True):
LOGGER.error( LOGGER.error(
"Unable to load configuration from file, " "Unable to load configuration from file, "
"using default configuration: %s.", "using default configuration: %s.",
exc exc,
) )
# Overload config with arguments # Overload config with arguments
if args and getattr(args, "passes", None) is not None: if args and getattr(args, "passes", None) is not None:
LOGGER.debug( LOGGER.debug(
"Overloading number of passes from CLI arguments: %d.", "Overloading number of passes from CLI arguments: %d.", args.passes
) )
config_data["passes"] = args.passes config_data["passes"] = args.passes
if args and getattr(args, "max_entries", None) is not None: if args and getattr(args, "max_entries", None) is not None:
LOGGER.debug( LOGGER.debug(
"Overloading maximum number of entries from CLI arguments: %d.", "Overloading maximum number of entries from CLI arguments: %d.",
args.max_entries args.max_entries,
) )
config_data["max_entries"] = args.max_entries config_data["max_entries"] = args.max_entries
if args and getattr(args, "port", None) is not None: if args and getattr(args, "port", None) is not None:
@ -297,37 +317,37 @@ def load_config(args=None, check_with_data=True):
LOGGER.debug("Overloading data directory from CLI arguments.") LOGGER.debug("Overloading data directory from CLI arguments.")
config_data["data_directory"] = args.data_dir config_data["data_directory"] = args.data_dir
elif config_data["data_directory"] is None: elif config_data["data_directory"] is None:
config_data["data_directory"] = appdirs.user_data_dir( config_data["data_directory"] = appdirs.user_data_dir("flatisfy", "flatisfy")
"flatisfy", LOGGER.debug(
"flatisfy" "Using default XDG data directory: %s.", config_data["data_directory"]
) )
LOGGER.debug("Using default XDG data directory: %s.",
if not os.path.isdir(config_data["data_directory"]): if not os.path.isdir(config_data["data_directory"]):"Creating data directory according to config: %s",
config_data["data_directory"]) "Creating data directory according to config: %s",
os.makedirs(config_data["data_directory"]) os.makedirs(config_data["data_directory"])
os.makedirs(os.path.join(config_data["data_directory"], "images")) os.makedirs(os.path.join(config_data["data_directory"], "images"))
if config_data["database"] is None: if config_data["database"] is None:
config_data["database"] = "sqlite:///" + os.path.join( config_data["database"] = "sqlite:///" + os.path.join(
config_data["data_directory"], config_data["data_directory"], "flatisfy.db"
) )
if config_data["search_index"] is None: if config_data["search_index"] is None:
config_data["search_index"] = os.path.join( config_data["search_index"] = os.path.join(
config_data["data_directory"], config_data["data_directory"], "search_index"
) )
# Handle constraints filtering # Handle constraints filtering
if args and getattr(args, "constraints", None) is not None: if args and getattr(args, "constraints", None) is not None:
("Filtering constraints from config according to CLI argument. " (
"Using only the following constraints: %s."), "Filtering constraints from config according to CLI argument. "
args.constraints.replace(",", ", ") "Using only the following constraints: %s."
args.constraints.replace(",", ", "),
) )
constraints_filter = args.constraints.split(",") constraints_filter = args.constraints.split(",")
config_data["constraints"] = { config_data["constraints"] = {
@ -338,8 +358,8 @@ def load_config(args=None, check_with_data=True):
# Sanitize website url # Sanitize website url
if config_data["website_url"] is not None: if config_data["website_url"] is not None:
if config_data["website_url"][-1] != '/': if config_data["website_url"][-1] != "/":
config_data["website_url"] += '/' config_data["website_url"] += "/"
config_validation = validate_config(config_data, check_with_data) config_validation = validate_config(config_data, check_with_data)
if config_validation is True: if config_validation is True:

@ -16,7 +16,7 @@ BACKENDS_BY_PRECEDENCE = [
"pap", "pap",
"leboncoin", "leboncoin",
"explorimmo", "explorimmo",
"logicimmo" "logicimmo",
] ]

@ -24,11 +24,13 @@ except ImportError:
try: try:
from functools32 import lru_cache from functools32 import lru_cache
except ImportError: except ImportError:
def lru_cache(maxsize=None): # pylint: disable=unused-argument def lru_cache(maxsize=None): # pylint: disable=unused-argument
""" """
Identity implementation of ``lru_cache`` for fallback. Identity implementation of ``lru_cache`` for fallback.
""" """
return lambda func: func return lambda func: func
LOGGER.warning( LOGGER.warning(
"`functools.lru_cache` is not available on your system. Consider " "`functools.lru_cache` is not available on your system. Consider "
"installing `functools32` Python module if using Python2 for " "installing `functools32` Python module if using Python2 for "
@ -49,8 +51,8 @@ def preprocess_data(config, force=False):
get_session = database.init_db(config["database"], config["search_index"]) get_session = database.init_db(config["database"], config["search_index"])
with get_session() as session: with get_session() as session:
is_built = ( is_built = (
session.query(PublicTransport).count() > 0 and session.query(PublicTransport).count() > 0
session.query(PostalCode).count() > 0 and session.query(PostalCode).count() > 0
) )
if is_built and not force: if is_built and not force:
# No need to rebuild the database, skip # No need to rebuild the database, skip
@ -96,10 +98,7 @@ def load_data(model, constraint, config):
# Load data for each area # Load data for each area
areas = list(set(areas)) areas = list(set(areas))
for area in areas: for area in areas:
results.extend( results.extend(session.query(model).filter(model.area == area).all())
.filter(model.area == area).all()
# Expunge loaded data from the session to be able to use them # Expunge loaded data from the session to be able to use them
# afterwards # afterwards
session.expunge_all() session.expunge_all()

@ -24,8 +24,8 @@ MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
titlecase.set_small_word_list( titlecase.set_small_word_list(
# Add French small words # Add French small words
r"l|d|un|une|et|à|a|sur|ou|le|la|de|lès|les|" + r"l|d|un|une|et|à|a|sur|ou|le|la|de|lès|les|"
titlecase.SMALL + titlecase.SMALL
) )
@ -33,7 +33,7 @@ TRANSPORT_DATA_FILES = {
"FR-NW": "stops_fr-nw.txt", "FR-NW": "stops_fr-nw.txt",
"FR-NE": "stops_fr-ne.txt", "FR-NE": "stops_fr-ne.txt",
"FR-SW": "stops_fr-sw.txt", "FR-SW": "stops_fr-sw.txt",
"FR-SE": "stops_fr-se.txt" "FR-SE": "stops_fr-se.txt",
} }
@ -51,8 +51,20 @@ def french_postal_codes_to_quarter(postal_code):
# French departements # French departements
# Taken from Wikipedia data. # Taken from Wikipedia data.
department_to_subdivision = { department_to_subdivision = {
"FR-ARA": ["01", "03", "07", "15", "26", "38", "42", "43", "63", "69", "FR-ARA": [
"73", "74"], "01",
"FR-BFC": ["21", "25", "39", "58", "70", "71", "89", "90"], "FR-BFC": ["21", "25", "39", "58", "70", "71", "89", "90"],
"FR-BRE": ["22", "29", "35", "44", "56"], "FR-BRE": ["22", "29", "35", "44", "56"],
"FR-CVL": ["18", "28", "36", "37", "41", "45"], "FR-CVL": ["18", "28", "36", "37", "41", "45"],
@ -61,19 +73,44 @@ def french_postal_codes_to_quarter(postal_code):
"FR-HDF": ["02", "59", "60", "62", "80"], "FR-HDF": ["02", "59", "60", "62", "80"],
"FR-IDF": ["75", "77", "78", "91", "92", "93", "94", "95"], "FR-IDF": ["75", "77", "78", "91", "92", "93", "94", "95"],
"FR-NOR": ["14", "27", "50", "61", "76"], "FR-NOR": ["14", "27", "50", "61", "76"],
"FR-NAQ": ["16", "17", "19", "23", "24", "33", "40", "47", "64", "79", "FR-NAQ": [
"86", "87"], "16",
"FR-OCC": ["09", "11", "12", "30", "31", "32", "34", "46", "48", "65", "17",
"66", "81", "82"], "19",
"FR-OCC": [
"FR-PDL": ["44", "49", "53", "72", "85"], "FR-PDL": ["44", "49", "53", "72", "85"],
"FR-PAC": ["04", "05", "06", "13", "83", "84"] "FR-PAC": ["04", "05", "06", "13", "83", "84"],
} }
subdivision_to_quarters = { subdivision_to_quarters = {
'FR-IDF': ['FR-IDF'], "FR-IDF": ["FR-IDF"],
'FR-NW': ['FR-BRE', 'FR-CVL', 'FR-NOR', 'FR-PDL'], "FR-NW": ["FR-BRE", "FR-CVL", "FR-NOR", "FR-PDL"],
'FR-NE': ['FR-BFC', 'FR-GES', 'FR-HDF'], "FR-NE": ["FR-BFC", "FR-GES", "FR-HDF"],
'FR-SE': ['FR-ARA', 'FR-COR', 'FR-PAC', 'FR-OCC'], "FR-SE": ["FR-ARA", "FR-COR", "FR-PAC", "FR-OCC"],
'FR-SW': ['FR-NAQ'] "FR-SW": ["FR-NAQ"],
} }
subdivision = next( subdivision = next(
@ -82,7 +119,7 @@ def french_postal_codes_to_quarter(postal_code):
for i, departments in department_to_subdivision.items() for i, departments in department_to_subdivision.items()
if departement in departments if departement in departments
), ),
None None,
) )
return next( return next(
( (
@ -90,7 +127,7 @@ def french_postal_codes_to_quarter(postal_code):
for i, subdivisions in subdivision_to_quarters.items() for i, subdivisions in subdivision_to_quarters.items()
if subdivision in subdivisions if subdivision in subdivisions
), ),
None None,
) )
@ -106,9 +143,7 @@ def _preprocess_laposte():
raw_laposte_data = [] raw_laposte_data = []
# Load opendata file # Load opendata file
try: try:
with with, data_file), "r", encoding="utf-8") as fh:
os.path.join(MODULE_DIR, data_file), "r", encoding='utf-8'
) as fh:
raw_laposte_data = json.load(fh) raw_laposte_data = json.load(fh)
except (IOError, ValueError): except (IOError, ValueError):
LOGGER.error("Invalid raw LaPoste opendata file.") LOGGER.error("Invalid raw LaPoste opendata file.")
@ -126,29 +161,31 @@ def _preprocess_laposte():
if area is None: if area is None:
"No matching area found for postal code %s, skipping it.", "No matching area found for postal code %s, skipping it.",
fields["code_postal"] fields["code_postal"],
) )
continue continue
name = normalize_string( name = normalize_string(
titlecase.titlecase(fields["nom_de_la_commune"]), titlecase.titlecase(fields["nom_de_la_commune"]), lowercase=False
) )
if (fields["code_postal"], name) in seen_postal_codes: if (fields["code_postal"], name) in seen_postal_codes:
continue continue
seen_postal_codes.append((fields["code_postal"], name)) seen_postal_codes.append((fields["code_postal"], name))
postal_codes_data.append(PostalCode( postal_codes_data.append(
area=area, area=area,
postal_code=fields["code_postal"], postal_code=fields["code_postal"],
name=name, name=name,
lat=fields["coordonnees_gps"][0], lat=fields["coordonnees_gps"][0],
lng=fields["coordonnees_gps"][1] lng=fields["coordonnees_gps"][1],
)) )
except KeyError: except KeyError:"Missing data for postal code %s, skipping it.",
fields["code_postal"]) "Missing data for postal code %s, skipping it.", fields["code_postal"]
return postal_codes_data return postal_codes_data
@ -164,17 +201,15 @@ def _preprocess_public_transport():
for area, data_file in TRANSPORT_DATA_FILES.items(): for area, data_file in TRANSPORT_DATA_FILES.items():"Building from public transport data %s.", data_file)"Building from public transport data %s.", data_file)
try: try:
with, data_file), "r", with
encoding='utf-8') as fh: os.path.join(MODULE_DIR, data_file), "r", encoding="utf-8"
) as fh:
filereader = csv.reader(fh) filereader = csv.reader(fh)
next(filereader, None) # Skip first row (headers) next(filereader, None) # Skip first row (headers)
for row in filereader: for row in filereader:
public_transport_data.append(PublicTransport( public_transport_data.append(
name=row[2], PublicTransport(name=row[2], area=area, lat=row[3], lng=row[4])
area=area, )
except (IOError, IndexError): except (IOError, IndexError):
LOGGER.error("Invalid raw opendata file: %s.", data_file) LOGGER.error("Invalid raw opendata file: %s.", data_file)
return [] return []
@ -183,7 +218,4 @@ def _preprocess_public_transport():
# List of all the available preprocessing functions. Order can be important. # List of all the available preprocessing functions. Order can be important.
PREPROCESSING_FUNCTIONS = [ PREPROCESSING_FUNCTIONS = [_preprocess_laposte, _preprocess_public_transport]

@ -47,9 +47,7 @@ def init_db(database_uri=None, search_db_uri=None):
Session = sessionmaker(bind=engine) # pylint: disable=locally-disabled,invalid-name Session = sessionmaker(bind=engine) # pylint: disable=locally-disabled,invalid-name
if search_db_uri: if search_db_uri:
index_service = IndexService( index_service = IndexService(whoosh_base=search_db_uri)
index_service.register_class(flatisfy.models.flat.Flat) index_service.register_class(flatisfy.models.flat.Flat)
@contextmanager @contextmanager

@ -50,4 +50,4 @@ class StringyJSON(types.TypeDecorator):
# TypeEngine.with_variant says "use StringyJSON instead when # TypeEngine.with_variant says "use StringyJSON instead when
# connecting to 'sqlite'" # connecting to 'sqlite'"
# pylint: disable=locally-disabled,invalid-name # pylint: disable=locally-disabled,invalid-name
MagicJSON = types.JSON().with_variant(StringyJSON, 'sqlite') MagicJSON = types.JSON().with_variant(StringyJSON, "sqlite")

@ -30,7 +30,6 @@ from whoosh.qparser import MultifieldParser
class IndexService(object): class IndexService(object):
def __init__(self, config=None, whoosh_base=None): def __init__(self, config=None, whoosh_base=None):
if not whoosh_base and config: if not whoosh_base and config:
whoosh_base = config.get("WHOOSH_BASE") whoosh_base = config.get("WHOOSH_BASE")
@ -84,8 +83,7 @@ class IndexService(object):
primary = primary =
continue continue
if in model_class.__searchable__: if in model_class.__searchable__:
schema[] = whoosh.fields.TEXT( schema[] = whoosh.fields.TEXT(analyzer=StemmingAnalyzer())
return Schema(**schema), primary return Schema(**schema), primary
def before_commit(self, session): def before_commit(self, session):
@ -93,21 +91,24 @@ class IndexService(object):
for model in for model in
model_class = model.__class__ model_class = model.__class__
if hasattr(model_class, '__searchable__'): if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append( self.to_update.setdefault(model_class.__name__, []).append(
("new", model)) ("new", model)
for model in session.deleted: for model in session.deleted:
model_class = model.__class__ model_class = model.__class__
if hasattr(model_class, '__searchable__'): if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append( self.to_update.setdefault(model_class.__name__, []).append(
("deleted", model)) ("deleted", model)
for model in session.dirty: for model in session.dirty:
model_class = model.__class__ model_class = model.__class__
if hasattr(model_class, '__searchable__'): if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append( self.to_update.setdefault(model_class.__name__, []).append(
("changed", model)) ("changed", model)
def after_commit(self, session): def after_commit(self, session):
""" """
@ -129,11 +130,11 @@ class IndexService(object):
# update. # update.
writer.delete_by_term( writer.delete_by_term(
primary_field, text_type(getattr(model, primary_field))) primary_field, text_type(getattr(model, primary_field))
if change_type in ("new", "changed"): if change_type in ("new", "changed"):
attrs = dict((key, getattr(model, key)) attrs = dict((key, getattr(model, key)) for key in searchable)
for key in searchable)
attrs = { attrs = {
attr: text_type(getattr(model, attr)) attr: text_type(getattr(model, attr))
for attr in attrs.keys() for attr in attrs.keys()
@ -158,8 +159,7 @@ class Searcher(object):
self.parser = MultifieldParser(list(fields), index.schema) self.parser = MultifieldParser(list(fields), index.schema)
def __call__(self, session, query, limit=None): def __call__(self, session, query, limit=None):
results = self.index.searcher().search( results = self.index.searcher().search(self.parser.parse(query), limit=limit)
self.parser.parse(query), limit=limit)
keys = [x[self.primary] for x in results] keys = [x[self.primary] for x in results]
primary_column = getattr(self.model_class, self.primary) primary_column = getattr(self.model_class, self.primary)

@ -16,7 +16,9 @@ from email.utils import formatdate, make_msgid
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
def send_email(server, port, subject, _from, _to, txt, html, username=None, password=None): def send_email(
server, port, subject, _from, _to, txt, html, username=None, password=None
""" """
Send an email Send an email
@ -36,15 +38,15 @@ def send_email(server, port, subject, _from, _to, txt, html, username=None, pass
if username or password: if username or password:
server.login(username or "", password or "") server.login(username or "", password or "")
msg = MIMEMultipart('alternative') msg = MIMEMultipart("alternative")
msg['Subject'] = subject msg["Subject"] = subject
msg['From'] = _from msg["From"] = _from
msg['To'] = ', '.join(_to) msg["To"] = ", ".join(_to)
msg['Date'] = formatdate() msg["Date"] = formatdate()
msg['Message-ID'] = make_msgid() msg["Message-ID"] = make_msgid()
msg.attach(MIMEText(txt, 'plain', 'utf-8')) msg.attach(MIMEText(txt, "plain", "utf-8"))
msg.attach(MIMEText(html, 'html', 'utf-8')) msg.attach(MIMEText(html, "html", "utf-8"))
server.sendmail(_from, _to, msg.as_string()) server.sendmail(_from, _to, msg.as_string())
server.quit() server.quit()
@ -61,7 +63,7 @@ def send_notification(config, flats):
if not flats: if not flats:
return return
txt = u'Hello dear user,\n\nThe following new flats have been found:\n\n' txt = "Hello dear user,\n\nThe following new flats have been found:\n\n"
html = """ html = """
<html> <html>
<head></head> <head></head>
@ -81,32 +83,31 @@ def send_notification(config, flats):
cost = str(flat.cost) cost = str(flat.cost)
currency = str(flat.currency) currency = str(flat.currency)
txt += ( txt += "- {}: {}#/flat/{} (area: {}, cost: {} {})\n".format(
'- {}: {}#/flat/{} (area: {}, cost: {} {})\n'.format(
title, website_url, flat_id, area, cost, currency title, website_url, flat_id, area, cost, currency
) )
html += """ html += """
<li> <li>
<a href="{}#/flat/{}">{}</a> <a href="{}#/flat/{}">{}</a>
(area: {}, cost: {} {}) (area: {}, cost: {} {})
</li> </li>
""".format(website_url, flat_id, title, area, cost, currency) """.format(
website_url, flat_id, title, area, cost, currency
html += "</ul>" html += "</ul>"
signature = ( signature = "\nHope you'll find what you were looking for.\n\nBye!\nFlatisfy"
u"\nHope you'll find what you were looking for.\n\nBye!\nFlatisfy"
txt += signature txt += signature
html += signature.replace('\n', '<br>') html += signature.replace("\n", "<br>")
html += """</p> html += """</p>
</body> </body>
</html>""" </html>"""
send_email(config["smtp_server"], send_email(
config["smtp_port"], config["smtp_port"],
"New flats found!", "New flats found!",
config["smtp_from"], config["smtp_from"],
@ -114,4 +115,5 @@ def send_notification(config, flats):
txt, txt,
html, html,
config.get("smtp_username"), config.get("smtp_username"),
config.get("smtp_password")) config.get("smtp_password"),

@ -10,4 +10,5 @@ class DataBuildError(Exception):
""" """
Error occurring on building a data file. Error occurring on building a data file.
""" """
pass pass

@ -24,8 +24,9 @@ try:
from weboob.core.ouiboube import WebNip from weboob.core.ouiboube import WebNip
from import WeboobEncoder from import WeboobEncoder
except ImportError: except ImportError:
LOGGER.error("Weboob is not available on your system. Make sure you " LOGGER.error(
"installed it.") "Weboob is not available on your system. Make sure you " "installed it."
raise raise
@ -34,6 +35,7 @@ class WebOOBProxy(object):
Wrapper around WebOOB ``WebNip`` class, to fetch housing posts without Wrapper around WebOOB ``WebNip`` class, to fetch housing posts without
having to spawn a subprocess. having to spawn a subprocess.
""" """
@staticmethod @staticmethod
def version(): def version():
""" """
@ -78,12 +80,7 @@ class WebOOBProxy(object):
# Create backends # Create backends
self.backends = [ self.backends = [
self.webnip.load_backend( self.webnip.load_backend(module, module, params={}) for module in backends
for module in backends
] ]
def __enter__(self): def __enter__(self):
@ -114,19 +111,15 @@ class WebOOBProxy(object):
except CallErrors as exc: except CallErrors as exc:
# If an error occured, just log it # If an error occured, just log it
LOGGER.error( LOGGER.error(
( ("An error occured while building query for " "postal code %s: %s"),
"An error occured while building query for "
"postal code %s: %s"
postal_code, postal_code,
str(exc) str(exc),
) )
if not matching_cities: if not matching_cities:
# If postal code gave no match, warn the user # If postal code gave no match, warn the user
LOGGER.warn( LOGGER.warn(
"Postal code %s could not be matched with a city.", "Postal code %s could not be matched with a city.", postal_code
) )
# Remove "TOUTES COMMUNES" entry which are duplicates of the individual # Remove "TOUTES COMMUNES" entry which are duplicates of the individual
@ -134,8 +127,9 @@ class WebOOBProxy(object):
matching_cities = [ matching_cities = [
city city
for city in matching_cities for city in matching_cities
if not (city.backend == 'logicimmo' and if not ('TOUTES COMMUNES')) city.backend == "logicimmo" and"TOUTES COMMUNES")
] ]
# Then, build queries by grouping cities by at most 3 # Then, build queries by grouping cities by at most 3
@ -145,10 +139,7 @@ class WebOOBProxy(object):
try: try:
query.house_types = [ query.house_types = [
getattr( getattr(HOUSE_TYPES, house_type.upper())
for house_type in constraints_dict["house_types"] for house_type in constraints_dict["house_types"]
] ]
except AttributeError: except AttributeError:
@ -156,10 +147,7 @@ class WebOOBProxy(object):
return None return None
try: try:
query.type = getattr( query.type = getattr(POSTS_TYPES, constraints_dict["type"].upper())
except AttributeError: except AttributeError:
LOGGER.error("Invalid post type constraint.") LOGGER.error("Invalid post type constraint.")
return None return None
@ -191,15 +179,14 @@ class WebOOBProxy(object):
try: try:
for housing in itertools.islice( for housing in itertools.islice(
'search_housings', "search_housings",
query, query,
# Only run the call on the required backends. # Only run the call on the required backends.
# Otherwise, WebOOB is doing weird stuff and returning # Otherwise, WebOOB is doing weird stuff and returning
# nonsense. # nonsense.
backends=[x for x in self.backends backends=[x for x in self.backends if in useful_backends],
if in useful_backends]
), ),
max_entries max_entries,
): ):
if not store_personal_data: if not store_personal_data: = None = None
@ -207,8 +194,7 @@ class WebOOBProxy(object):
except CallErrors as exc: except CallErrors as exc:
# If an error occured, just log it # If an error occured, just log it
LOGGER.error( LOGGER.error(
"An error occured while fetching the housing posts: %s", "An error occured while fetching the housing posts: %s", str(exc)
) )
return housings return housings
@ -225,9 +211,7 @@ class WebOOBProxy(object):
flat_id, backend_name = full_flat_id.rsplit("@", 1) flat_id, backend_name = full_flat_id.rsplit("@", 1)
try: try:
backend = next( backend = next(
backend backend for backend in self.backends if == backend_name
for backend in self.backends
if == backend_name
) )
except StopIteration: except StopIteration:
LOGGER.error("Backend %s is not available.", backend_name) LOGGER.error("Backend %s is not available.", backend_name)
@ -240,7 +224,7 @@ class WebOOBProxy(object): = None = None
else: else:
# Ensure phone is fetched # Ensure phone is fetched
backend.fillobj(housing, 'phone') backend.fillobj(housing, "phone")
# Otherwise, we miss the @backend afterwards # Otherwise, we miss the @backend afterwards = full_flat_id = full_flat_id
@ -248,9 +232,7 @@ class WebOOBProxy(object):
except Exception as exc: # pylint: disable=broad-except except Exception as exc: # pylint: disable=broad-except
# If an error occured, just log it # If an error occured, just log it
LOGGER.error( LOGGER.error(
"An error occured while fetching housing %s: %s", "An error occured while fetching housing %s: %s", full_flat_id, str(exc)
) )
return "{}" return "{}"
@ -272,15 +254,17 @@ def fetch_flats(config):
housing_posts = [] housing_posts = []
for query in queries: for query in queries:
housing_posts.extend( housing_posts.extend(
webOOB_proxy.query(query, config["max_entries"], webOOB_proxy.query(
config["store_personal_data"]) query, config["max_entries"], config["store_personal_data"]
) )
housing_posts = housing_posts[:config["max_entries"]] )
housing_posts = housing_posts[: config["max_entries"]]"Fetched %d flats.", len(housing_posts))"Fetched %d flats.", len(housing_posts))
constraint_flats_list = [json.loads(flat) for flat in housing_posts] constraint_flats_list = [json.loads(flat) for flat in housing_posts]
constraint_flats_list = [WebOOBProxy.restore_decimal_fields(flat) constraint_flats_list = [
for flat in constraint_flats_list] WebOOBProxy.restore_decimal_fields(flat) for flat in constraint_flats_list
fetched_flats[constraint_name] = constraint_flats_list fetched_flats[constraint_name] = constraint_flats_list
return fetched_flats return fetched_flats
@ -295,8 +279,7 @@ def fetch_details(config, flat_id):
""" """
with WebOOBProxy(config) as webOOB_proxy: with WebOOBProxy(config) as webOOB_proxy:"Loading additional details for flat %s.", flat_id)"Loading additional details for flat %s.", flat_id)
webOOB_output =, webOOB_output =, config["store_personal_data"])
flat_details = json.loads(webOOB_output) flat_details = json.loads(webOOB_output)
flat_details = WebOOBProxy.restore_decimal_fields(flat_details) flat_details = WebOOBProxy.restore_decimal_fields(flat_details)
@ -327,10 +310,7 @@ def load_flats_from_file(json_file, config):"Found %d flats.", len(flats_list))"Found %d flats.", len(flats_list))
except (IOError, ValueError): except (IOError, ValueError):
LOGGER.error("File %s is not a valid dump file.", json_file) LOGGER.error("File %s is not a valid dump file.", json_file)
return { return {constraint_name: flats_list for constraint_name in config["constraints"]}
constraint_name: flats_list
for constraint_name in config["constraints"]
def load_flats_from_db(config): def load_flats_from_db(config):

@ -36,10 +36,7 @@ def refine_with_housing_criteria(flats_list, constraint):
for i, flat in enumerate(flats_list): for i, flat in enumerate(flats_list):
# Check postal code # Check postal code
postal_code = flat["flatisfy"].get("postal_code", None) postal_code = flat["flatisfy"].get("postal_code", None)
if ( if postal_code and postal_code not in constraint["postal_codes"]:
postal_code and
postal_code not in constraint["postal_codes"]
):"Postal code for flat %s is out of range.", flat["id"])"Postal code for flat %s is out of range.", flat["id"])
is_ok[i] = is_ok[i] and False is_ok[i] = is_ok[i] and False
@ -47,37 +44,32 @@ def refine_with_housing_criteria(flats_list, constraint):
for place_name, time in flat["flatisfy"].get("time_to", {}).items(): for place_name, time in flat["flatisfy"].get("time_to", {}).items():
time = time["time"] time = time["time"]
is_within_interval = tools.is_within_interval( is_within_interval = tools.is_within_interval(
time, time, *(constraint["time_to"][place_name]["time"])
) )
if not is_within_interval: if not is_within_interval:"Flat %s is too far from place %s: %ds.",
flat["id"], place_name, time) "Flat %s is too far from place %s: %ds.",
is_ok[i] = is_ok[i] and is_within_interval is_ok[i] = is_ok[i] and is_within_interval
# Check other fields # Check other fields
for field in ["area", "cost", "rooms", "bedrooms"]: for field in ["area", "cost", "rooms", "bedrooms"]:
interval = constraint[field] interval = constraint[field]
is_within_interval = tools.is_within_interval( is_within_interval = tools.is_within_interval(
flat.get(field, None), flat.get(field, None), *interval
) )
if not is_within_interval: if not is_within_interval:"%s for flat %s is out of range.",
field.capitalize(), flat["id"]) "%s for flat %s is out of range.", field.capitalize(), flat["id"]
is_ok[i] = is_ok[i] and is_within_interval is_ok[i] = is_ok[i] and is_within_interval
return ( return (
[ [flat for i, flat in enumerate(flats_list) if is_ok[i]],
flat [flat for i, flat in enumerate(flats_list) if not is_ok[i]],
for i, flat in enumerate(flats_list)
if is_ok[i]
for i, flat in enumerate(flats_list)
if not is_ok[i]
) )
@ -104,47 +96,37 @@ def refine_with_details_criteria(flats_list, constraint):
for i, flat in enumerate(flats_list): for i, flat in enumerate(flats_list):
# Check number of pictures # Check number of pictures
has_enough_photos = tools.is_within_interval( has_enough_photos = tools.is_within_interval(
len(flat.get('photos', [])), len(flat.get("photos", [])), constraint["minimum_nb_photos"], None
) )
if not has_enough_photos: if not has_enough_photos:
"Flat %s only has %d photos, it should have at least %d.", "Flat %s only has %d photos, it should have at least %d.",
flat["id"], flat["id"],
len(flat['photos']), len(flat["photos"]),
constraint['minimum_nb_photos'] constraint["minimum_nb_photos"],
) )
is_ok[i] = False is_ok[i] = False
for term in constraint["description_should_contain"]: for term in constraint["description_should_contain"]:
if term.lower() not in flat['text'].lower(): if term.lower() not in flat["text"].lower():
("Description for flat %s does not contain required term '%s'."), ("Description for flat %s does not contain required term '%s'."),
flat["id"], flat["id"],
term term,
) )
is_ok[i] = False is_ok[i] = False
for term in constraint["description_should_not_contain"]: for term in constraint["description_should_not_contain"]:
if term.lower() in flat['text'].lower(): if term.lower() in flat["text"].lower():
("Description for flat %s contains blacklisted term '%s'."), ("Description for flat %s contains blacklisted term '%s'."),
flat["id"], flat["id"],
term term,
) )
is_ok[i] = False is_ok[i] = False
return ( return (
[ [flat for i, flat in enumerate(flats_list) if is_ok[i]],
flat [flat for i, flat in enumerate(flats_list) if not is_ok[i]],
for i, flat in enumerate(flats_list)
if is_ok[i]
for i, flat in enumerate(flats_list)
if not is_ok[i]
) )
@ -185,14 +167,10 @@ def first_pass(flats_list, constraint, config):
flats_list = metadata.guess_stations(flats_list, constraint, config) flats_list = metadata.guess_stations(flats_list, constraint, config)
# Remove returned housing posts that do not match criteria # Remove returned housing posts that do not match criteria
flats_list, ignored_list = refine_with_housing_criteria(flats_list, flats_list, ignored_list = refine_with_housing_criteria(flats_list, constraint)
return {"new": flats_list, "ignored": ignored_list, "duplicate": duplicates_by_urls}
return {
"new": flats_list,
"ignored": ignored_list,
"duplicate": duplicates_by_urls
@tools.timeit @tools.timeit
def second_pass(flats_list, constraint, config): def second_pass(flats_list, constraint, config):
@ -226,22 +204,17 @@ def second_pass(flats_list, constraint, config):
flats_list = metadata.compute_travel_times(flats_list, constraint, config) flats_list = metadata.compute_travel_times(flats_list, constraint, config)
# Remove returned housing posts that do not match criteria # Remove returned housing posts that do not match criteria
flats_list, ignored_list = refine_with_housing_criteria(flats_list, flats_list, ignored_list = refine_with_housing_criteria(flats_list, constraint)
# Remove returned housing posts which do not match criteria relying on # Remove returned housing posts which do not match criteria relying on
# fetched details. # fetched details.
flats_list, ignored_list = refine_with_details_criteria(flats_list, flats_list, ignored_list = refine_with_details_criteria(flats_list, constraint)
if config["serve_images_locally"]: if config["serve_images_locally"]:
images.download_images(flats_list, config) images.download_images(flats_list, config)
return { return {"new": flats_list, "ignored": ignored_list, "duplicate": []}
"new": flats_list,
"ignored": ignored_list,
"duplicate": []
@tools.timeit @tools.timeit
def third_pass(flats_list, config): def third_pass(flats_list, config):
@ -260,8 +233,4 @@ def third_pass(flats_list, config):
# Deduplicate the list using every available data # Deduplicate the list using every available data
flats_list, duplicate_flats = duplicates.deep_detect(flats_list, config) flats_list, duplicate_flats = duplicates.deep_detect(flats_list, config)
return { return {"new": flats_list, "ignored": [], "duplicate": duplicate_flats}
"new": flats_list,
"ignored": [],
"duplicate": duplicate_flats

View File

@ -16,10 +16,12 @@ import PIL.Image
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
class MemoryCache(object): class MemoryCache(object):
""" """
A cache in memory. A cache in memory.
""" """
@staticmethod @staticmethod
def on_miss(key): def on_miss(key):
""" """
@ -87,6 +89,7 @@ class ImageCache(MemoryCache):
""" """
A cache for images, stored in memory. A cache for images, stored in memory.
""" """
@staticmethod @staticmethod
def compute_filename(url): def compute_filename(url):
""" """
@ -113,10 +116,7 @@ class ImageCache(MemoryCache):
filepath = None filepath = None
# Try to load from local folder # Try to load from local folder
if self.storage_dir: if self.storage_dir:
filepath = os.path.join( filepath = os.path.join(self.storage_dir, self.compute_filename(url))
if os.path.isfile(filepath): if os.path.isfile(filepath):
return return
# Otherwise, fetch it # Otherwise, fetch it

@ -35,14 +35,14 @@ def homogeneize_phone_number(numbers):
clean_numbers = [] clean_numbers = []
for number in numbers.split(','): for number in numbers.split(","):
number = number.strip() number = number.strip()
number = number.replace(".", "") number = number.replace(".", "")
number = number.replace(" ", "") number = number.replace(" ", "")
number = number.replace("-", "") number = number.replace("-", "")
number = number.replace("(", "") number = number.replace("(", "")
number = number.replace(")", "") number = number.replace(")", "")
number = re.sub(r'^\+\d\d', "", number) number = re.sub(r"^\+\d\d", "", number)
if not number.startswith("0"): if not number.startswith("0"):
number = "0" + number number = "0" + number
@ -94,12 +94,7 @@ def compare_photos(photo1, photo2, photo_cache, hash_threshold):
return False return False
def find_number_common_photos( def find_number_common_photos(flat1_photos, flat2_photos, photo_cache, hash_threshold):
""" """
Compute the number of common photos between the two lists of photos for the Compute the number of common photos between the two lists of photos for the
flats. flats.
@ -174,22 +169,23 @@ def detect(flats_list, key="id", merge=True, should_intersect=False):
# Sort matching flats by backend precedence # Sort matching flats by backend precedence
matching_flats.sort( matching_flats.sort(
key=lambda flat: next( key=lambda flat: next(
i for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE) i
for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE)
if flat["id"].endswith(backend) if flat["id"].endswith(backend)
), ),
reverse=True reverse=True,
) )
if len(matching_flats) > 1: if len(matching_flats) > 1:"Found duplicates using key \"%s\": %s.",
'Found duplicates using key "%s": %s.',
key, key,
[flat["id"] for flat in matching_flats]) [flat["id"] for flat in matching_flats],
# Otherwise, check the policy # Otherwise, check the policy
if merge: if merge:
# If a merge is requested, do the merge # If a merge is requested, do the merge
unique_flats_list.append( unique_flats_list.append(tools.merge_dicts(*matching_flats))
else: else:
# Otherwise, just keep the most important of them # Otherwise, just keep the most important of them
unique_flats_list.append(matching_flats[-1]) unique_flats_list.append(matching_flats[-1])
@ -203,8 +199,9 @@ def detect(flats_list, key="id", merge=True, should_intersect=False):
if should_intersect: if should_intersect:
# We added some flats twice with the above method, let's deduplicate on # We added some flats twice with the above method, let's deduplicate on
# id. # id.
unique_flats_list, _ = detect(unique_flats_list, key="id", merge=True, unique_flats_list, _ = detect(
should_intersect=False) unique_flats_list, key="id", merge=True, should_intersect=False
return unique_flats_list, duplicate_flats return unique_flats_list, duplicate_flats
@ -250,14 +247,12 @@ def get_duplicate_score(flat1, flat2, photo_cache, hash_threshold):
# They should have the same postal code, if available # They should have the same postal code, if available
if ( if (
"flatisfy" in flat1 and "flatisfy" in flat2 and "flatisfy" in flat1
flat1["flatisfy"].get("postal_code", None) and and "flatisfy" in flat2
flat2["flatisfy"].get("postal_code", None) and flat1["flatisfy"].get("postal_code", None)
and flat2["flatisfy"].get("postal_code", None)
): ):
assert ( assert flat1["flatisfy"]["postal_code"] == flat2["flatisfy"]["postal_code"]
flat1["flatisfy"]["postal_code"] ==
n_common_items += 1 n_common_items += 1
# TODO: Better text comparison (one included in the other, fuzzymatch) # TODO: Better text comparison (one included in the other, fuzzymatch)
@ -282,25 +277,17 @@ def get_duplicate_score(flat1, flat2, photo_cache, hash_threshold):
both_are_from_same_backend = ( both_are_from_same_backend = (
flat1["id"].split("@")[-1] == flat2["id"].split("@")[-1] flat1["id"].split("@")[-1] == flat2["id"].split("@")[-1]
) )
both_have_float_part = ( both_have_float_part = (flat1["area"] % 1) > 0 and (flat2["area"] % 1) > 0
(flat1["area"] % 1) > 0 and (flat2["area"] % 1) > 0 both_have_equal_float_part = (flat1["area"] % 1) == (flat2["area"] % 1)
both_have_equal_float_part = (
(flat1["area"] % 1) == (flat2["area"] % 1)
if both_have_float_part and both_are_from_same_backend: if both_have_float_part and both_are_from_same_backend:
assert both_have_equal_float_part assert both_have_equal_float_part
if flat1.get("photos", []) and flat2.get("photos", []): if flat1.get("photos", []) and flat2.get("photos", []):
n_common_photos = find_number_common_photos( n_common_photos = find_number_common_photos(
flat1["photos"], flat1["photos"], flat2["photos"], photo_cache, hash_threshold
) )
min_number_photos = min(len(flat1["photos"]), min_number_photos = min(len(flat1["photos"]), len(flat2["photos"]))
# Either all the photos are the same, or there are at least # Either all the photos are the same, or there are at least
# three common photos. # three common photos.
@ -332,9 +319,7 @@ def deep_detect(flats_list, config):
storage_dir = os.path.join(config["data_directory"], "images") storage_dir = os.path.join(config["data_directory"], "images")
else: else:
storage_dir = None storage_dir = None
photo_cache = ImageCache( photo_cache = ImageCache(storage_dir=storage_dir)
)"Running deep duplicates detection.")"Running deep duplicates detection.")
matching_flats = collections.defaultdict(list) matching_flats = collections.defaultdict(list)
@ -348,29 +333,30 @@ def deep_detect(flats_list, config):
continue continue
n_common_items = get_duplicate_score( n_common_items = get_duplicate_score(
flat1, flat1, flat2, photo_cache, config["duplicate_image_hash_threshold"]
) )
# Minimal score to consider they are duplicates # Minimal score to consider they are duplicates
if n_common_items >= config["duplicate_threshold"]: if n_common_items >= config["duplicate_threshold"]:
# Mark flats as duplicates # Mark flats as duplicates
("Found duplicates using deep detection: (%s, %s). " (
"Score is %d."), "Found duplicates using deep detection: (%s, %s). "
"Score is %d."
flat1["id"], flat1["id"],
flat2["id"], flat2["id"],
n_common_items n_common_items,
) )
matching_flats[flat1["id"]].append(flat2["id"]) matching_flats[flat1["id"]].append(flat2["id"])
matching_flats[flat2["id"]].append(flat1["id"]) matching_flats[flat2["id"]].append(flat1["id"])
if if
LOGGER.debug("Photo cache: hits: %d%% / misses: %d%%.", LOGGER.debug(
"Photo cache: hits: %d%% / misses: %d%%.",
photo_cache.hit_rate(), photo_cache.hit_rate(),
photo_cache.miss_rate()) photo_cache.miss_rate(),
seen_ids = [] seen_ids = []
duplicate_flats = [] duplicate_flats = []
@ -381,16 +367,13 @@ def deep_detect(flats_list, config):
seen_ids.extend(matching_flats[flat_id]) seen_ids.extend(matching_flats[flat_id])
to_merge = sorted( to_merge = sorted(
[ [flat for flat in flats_list if flat["id"] in matching_flats[flat_id]],
for flat in flats_list
if flat["id"] in matching_flats[flat_id]
key=lambda flat: next( key=lambda flat: next(
i for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE) i
for (i, backend) in enumerate(BACKENDS_BY_PRECEDENCE)
if flat["id"].endswith(backend) if flat["id"].endswith(backend)
), ),
reverse=True reverse=True,
) )
unique_flats_list.append(tools.merge_dicts(*to_merge)) unique_flats_list.append(tools.merge_dicts(*to_merge))
# The ID of the added merged flat will be the one of the last item # The ID of the added merged flat will be the one of the last item

@ -29,7 +29,9 @@ def download_images(flats_list, config):
for i, flat in enumerate(flats_list): for i, flat in enumerate(flats_list):
"Downloading photos for flat %d/%d: %s.", "Downloading photos for flat %d/%d: %s.",
i + 1, flats_list_length, flat["id"] i + 1,
) )
for photo in flat["photos"]: for photo in flat["photos"]:
# Download photo # Download photo

@ -103,7 +103,7 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
if choice in normalized_query if choice in normalized_query
], ],
key=lambda x: x[1], key=lambda x: x[1],
reverse=True reverse=True,
) )
if limit: if limit:
matches = matches[:limit] matches = matches[:limit]
@ -111,10 +111,7 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
# Update confidence # Update confidence
if matches: if matches:
max_confidence = max(match[1] for match in matches) max_confidence = max(match[1] for match in matches)
matches = [ matches = [(x[0], int(x[1] / max_confidence * 100)) for x in matches]
(x[0], int(x[1] / max_confidence * 100))
for x in matches
# Convert back matches to original strings # Convert back matches to original strings
# Also filter out matches below threshold # Also filter out matches below threshold
@ -126,32 +123,27 @@ def fuzzy_match(query, choices, limit=3, threshold=75):
return matches return matches
def guess_location_position(location, cities, constraint): def guess_location_position(location, cities, constraint):
# try to find a city # try to find a city
# Find all fuzzy-matching cities # Find all fuzzy-matching cities
postal_code = None postal_code = None
position = None position = None
matched_cities = fuzzy_match( matched_cities = fuzzy_match(location, [ for x in cities], limit=None)
[ for x in cities],
if matched_cities: if matched_cities:
# Find associated postal codes # Find associated postal codes
matched_postal_codes = [] matched_postal_codes = []
for matched_city_name, _ in matched_cities: for matched_city_name, _ in matched_cities:
postal_code_objects_for_city = [ postal_code_objects_for_city = [
x for x in cities x for x in cities if == matched_city_name
if == matched_city_name
] ]
matched_postal_codes.extend( matched_postal_codes.extend(
pc.postal_code pc.postal_code for pc in postal_code_objects_for_city
for pc in postal_code_objects_for_city
) )
# Try to match them with postal codes in config constraint # Try to match them with postal codes in config constraint
matched_postal_codes_in_config = ( matched_postal_codes_in_config = set(matched_postal_codes) & set(
set(matched_postal_codes) & set(constraint["postal_codes"]) constraint["postal_codes"]
) )
if matched_postal_codes_in_config: if matched_postal_codes_in_config:
# If there are some matched postal codes which are also in # If there are some matched postal codes which are also in
@ -166,14 +158,17 @@ def guess_location_position(location, cities, constraint):
# take the city position # take the city position
for matched_city_name, _ in matched_cities: for matched_city_name, _ in matched_cities:
postal_code_objects_for_city = [ postal_code_objects_for_city = [
x for x in cities x
for x in cities
if == matched_city_name and x.postal_code == postal_code if == matched_city_name and x.postal_code == postal_code
] ]
if len(postal_code_objects_for_city): if len(postal_code_objects_for_city):
position = {"lat": postal_code_objects_for_city[0].lat, "lng": postal_code_objects_for_city[0].lng} position = {
"lat": postal_code_objects_for_city[0].lat,
"lng": postal_code_objects_for_city[0].lng,
LOGGER.debug( LOGGER.debug(
("Found position %s using city %s."), ("Found position %s using city %s."), position, matched_city_name
position, matched_city_name
) )
break break
@ -194,25 +189,20 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
:return: An updated list of flats dict with guessed postal code. :return: An updated list of flats dict with guessed postal code.
""" """
opendata = { opendata = {"postal_codes": data.load_data(PostalCode, constraint, config)}
"postal_codes": data.load_data(PostalCode, constraint, config)
for flat in flats_list: for flat in flats_list:
location = flat.get("location", None) location = flat.get("location", None)
if not location: if not location:
addr = flat.get("address", None) addr = flat.get("address", None)
if addr: if addr:
location = addr['full_address'] location = addr["full_address"]
if not location: if not location:
# Skip everything if empty location # Skip everything if empty location
( ("No location field for flat %s, skipping postal " "code lookup. (%s)"),
"No location field for flat %s, skipping postal "
"code lookup. (%s)"
flat["id"], flat["id"],
flat.get("address") flat.get("address"),
) )
continue continue
@ -230,17 +220,22 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
LOGGER.debug( LOGGER.debug(
"Found postal code in location field for flat %s: %s.", "Found postal code in location field for flat %s: %s.",
flat["id"], postal_code flat["id"],
) )
except AssertionError: except AssertionError:
postal_code = None postal_code = None
# Then fetch position (and postal_code is couldn't be found earlier) # Then fetch position (and postal_code is couldn't be found earlier)
if postal_code: if postal_code:
cities = [x for x in opendata["postal_codes"] if x.postal_code == postal_code] cities = [
x for x in opendata["postal_codes"] if x.postal_code == postal_code
(_, position) = guess_location_position(location, cities, constraint) (_, position) = guess_location_position(location, cities, constraint)
else: else:
(postal_code, position) = guess_location_position(location, opendata["postal_codes"], constraint) (postal_code, position) = guess_location_position(
location, opendata["postal_codes"], constraint
# Check that postal code is not too far from the ones listed in config, # Check that postal code is not too far from the ones listed in config,
# limit bad fuzzy matching # limit bad fuzzy matching
@ -256,17 +251,19 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
(, x.lng) (, x.lng)
for x in opendata["postal_codes"] for x in opendata["postal_codes"]
if x.postal_code == constraint_postal_code if x.postal_code == constraint_postal_code
) ),
) )
for constraint_postal_code in constraint["postal_codes"] for constraint_postal_code in constraint["postal_codes"]
) )
if distance > distance_threshold: if distance > distance_threshold:
("Postal code %s found for flat %s @ %s is off-constraints " (
"Postal code %s found for flat %s @ %s is off-constraints "
"(distance is %dm > %dm). Let's consider it is an " "(distance is %dm > %dm). Let's consider it is an "
"artifact match and keep the post without this postal " "artifact match and keep the post without this postal "
"code."), "code."
postal_code, postal_code,
flat["id"], flat["id"],
location, location,
@ -282,7 +279,9 @@ def guess_postal_code(flats_list, constraint, config, distance_threshold=20000):
if existing_postal_code and existing_postal_code != postal_code: if existing_postal_code and existing_postal_code != postal_code:
LOGGER.warning( LOGGER.warning(
"Replacing previous postal code %s by %s for flat %s.", "Replacing previous postal code %s by %s for flat %s.",
existing_postal_code, postal_code, flat["id"] existing_postal_code,
) )
flat["flatisfy"]["postal_code"] = postal_code flat["flatisfy"]["postal_code"] = postal_code
else: else:
@ -304,10 +303,10 @@ def guess_stations(flats_list, constraint, config):
:return: An updated list of flats dict with guessed nearby stations. :return: An updated list of flats dict with guessed nearby stations.
""" """
distance_threshold = config['max_distance_housing_station'] distance_threshold = config["max_distance_housing_station"]
opendata = { opendata = {
"postal_codes": data.load_data(PostalCode, constraint, config), "postal_codes": data.load_data(PostalCode, constraint, config),
"stations": data.load_data(PublicTransport, constraint, config) "stations": data.load_data(PublicTransport, constraint, config),
} }
for flat in flats_list: for flat in flats_list:
@ -316,13 +315,12 @@ def guess_stations(flats_list, constraint, config):
if not flat_station: if not flat_station:
# Skip everything if empty station # Skip everything if empty station
"No stations field for flat %s, skipping stations lookup.", "No stations field for flat %s, skipping stations lookup.", flat["id"]
) )
continue continue
# Weboob modules can return several stations in a comma-separated list. # Weboob modules can return several stations in a comma-separated list.
flat_stations = flat_station.split(',') flat_stations = flat_station.split(",")
# But some stations containing a comma exist, so let's add the initial # But some stations containing a comma exist, so let's add the initial
# value to the list of stations to check if there was one. # value to the list of stations to check if there was one.
if len(flat_stations) > 1: if len(flat_stations) > 1:
@ -334,7 +332,7 @@ def guess_stations(flats_list, constraint, config):
tentative_station, tentative_station,
[ for x in opendata["stations"]], [ for x in opendata["stations"]],
limit=10, limit=10,
threshold=50 threshold=50,
) )
# Keep only one occurrence of each station # Keep only one occurrence of each station
@ -361,32 +359,34 @@ def guess_stations(flats_list, constraint, config):
] ]
for station_data in stations_objects: for station_data in stations_objects:
distance = tools.distance( distance = tools.distance(
(, station_data.lng), (, station_data.lng), postal_code_gps
) )
if distance < distance_threshold: if distance < distance_threshold:
# If at least one of the coordinates for a given # If at least one of the coordinates for a given
# station is close enough, that's ok and we can add # station is close enough, that's ok and we can add
# the station # the station
good_matched_stations.append({ good_matched_stations.append(
"key": station[0], "key": station[0],
"name":, "name":,
"confidence": station[1], "confidence": station[1],
"gps": (, station_data.lng) "gps": (, station_data.lng),
}) }
break break
("Station %s is too far from flat %s (%dm > %dm), " (
"discarding this station."), "Station %s is too far from flat %s (%dm > %dm), "
"discarding this station."
station[0], station[0],
flat["id"], flat["id"],
int(distance), int(distance),
int(distance_threshold) int(distance_threshold),
) )
else: else:
"No postal code for flat %s, skipping stations detection.", "No postal code for flat %s, skipping stations detection.", flat["id"]
) )
if not good_matched_stations: if not good_matched_stations:
@ -394,7 +394,7 @@ def guess_stations(flats_list, constraint, config):
"No stations found for flat %s, matching %s.", "No stations found for flat %s, matching %s.",
flat["id"], flat["id"],
flat["station"] flat["station"],
) )
continue continue
@ -402,29 +402,20 @@ def guess_stations(flats_list, constraint, config):
"Found stations for flat %s: %s (matching %s).", "Found stations for flat %s: %s (matching %s).",
flat["id"], flat["id"],
", ".join(x["name"] for x in good_matched_stations), ", ".join(x["name"] for x in good_matched_stations),
flat["station"] flat["station"],
) )
# If some stations were already filled in and the result is different, # If some stations were already filled in and the result is different,
# display some warning to the user # display some warning to the user
if ( if "matched_stations" in flat["flatisfy"] and (
"matched_stations" in flat["flatisfy"] and
# Do a set comparison, as ordering is not important # Do a set comparison, as ordering is not important
set([ set([station["name"] for station in flat["flatisfy"]["matched_stations"]])
station["name"] != set([station["name"] for station in good_matched_stations])
for station in flat["flatisfy"]["matched_stations"]
]) !=
for station in good_matched_stations
): ):
LOGGER.warning( LOGGER.warning(
"Replacing previously fetched stations for flat %s. Found " "Replacing previously fetched stations for flat %s. Found "
"stations differ from the previously found ones.", "stations differ from the previously found ones.",
flat["id"] flat["id"],
) )
flat["flatisfy"]["matched_stations"] = good_matched_stations flat["flatisfy"]["matched_stations"] = good_matched_stations
@ -449,9 +440,8 @@ def compute_travel_times(flats_list, constraint, config):
if not flat["flatisfy"].get("matched_stations", []): if not flat["flatisfy"].get("matched_stations", []):
# Skip any flat without matched stations # Skip any flat without matched stations
"Skipping travel time computation for flat %s. No matched " "Skipping travel time computation for flat %s. No matched " "stations.",
"stations.", flat["id"],
) )
continue continue
@ -467,15 +457,11 @@ def compute_travel_times(flats_list, constraint, config):
for station in flat["flatisfy"]["matched_stations"]: for station in flat["flatisfy"]["matched_stations"]:
# Time from station is a dict with time and route # Time from station is a dict with time and route
time_from_station_dict = tools.get_travel_time_between( time_from_station_dict = tools.get_travel_time_between(
station["gps"], station["gps"], place["gps"], TimeToModes[mode], config
) )
if ( if time_from_station_dict and (
time_from_station_dict and time_from_station_dict["time"] < time_to_place_dict
(time_from_station_dict["time"] < time_to_place_dict or or time_to_place_dict is None
time_to_place_dict is None)
): ):
# If starting from this station makes the route to the # If starting from this station makes the route to the
# specified place shorter, update # specified place shorter, update
@ -484,7 +470,10 @@ def compute_travel_times(flats_list, constraint, config):
if time_to_place_dict: if time_to_place_dict:
"Travel time between %s and flat %s by %s is %ds.", "Travel time between %s and flat %s by %s is %ds.",
place_name, flat["id"], mode, time_to_place_dict["time"] place_name,
) )
flat["flatisfy"]["time_to"][place_name] = time_to_place_dict flat["flatisfy"]["time_to"][place_name] = time_to_place_dict
return flats_list return flats_list

View File

@ -11,7 +11,15 @@ import enum
import arrow import arrow
from sqlalchemy import ( from sqlalchemy import (
Boolean, Column, DateTime, Enum, Float, SmallInteger, String, Text, inspect Boolean,
) )
from sqlalchemy.orm import validates from sqlalchemy.orm import validates
@ -26,6 +34,7 @@ class FlatUtilities(enum.Enum):
""" """
An enum of the possible utilities status for a flat entry. An enum of the possible utilities status for a flat entry.
""" """
included = 10 included = 10
unknown = 0 unknown = 0
excluded = -10 excluded = -10
@ -35,6 +44,7 @@ class FlatStatus(enum.Enum):
""" """
An enum of the possible status for a flat entry. An enum of the possible status for a flat entry.
""" """
user_deleted = -100 user_deleted = -100
duplicate = -20 duplicate = -20
ignored = -10 ignored = -10
@ -47,21 +57,16 @@ class FlatStatus(enum.Enum):
# List of statuses that are automatically handled, and which the user cannot # List of statuses that are automatically handled, and which the user cannot
# manually set through the UI. # manually set through the UI.
AUTOMATED_STATUSES = [ AUTOMATED_STATUSES = [, FlatStatus.duplicate, FlatStatus.ignored],
class Flat(BASE): class Flat(BASE):
""" """
SQLAlchemy ORM model to store a flat. SQLAlchemy ORM model to store a flat.
""" """
__tablename__ = "flats" __tablename__ = "flats"
__searchable__ = [ __searchable__ = ["title", "text", "station", "location", "details", "notes"]
"title", "text", "station", "location", "details", "notes"
# Weboob data # Weboob data
id = Column(String, primary_key=True) id = Column(String, primary_key=True)
@ -99,7 +104,7 @@ class Flat(BASE):
# Date for visit # Date for visit
visit_date = Column(DateTime) visit_date = Column(DateTime)
@validates('utilities') @validates("utilities")
def validate_utilities(self, _, utilities): def validate_utilities(self, _, utilities):
""" """
Utilities validation method Utilities validation method
@ -124,8 +129,7 @@ class Flat(BASE):
try: try:
return getattr(FlatStatus, status) return getattr(FlatStatus, status)
except (AttributeError, TypeError): except (AttributeError, TypeError):
LOGGER.warn("Unkown flat status %s, ignoring it.", LOGGER.warn("Unkown flat status %s, ignoring it.", status)
return self.status.default.arg return self.status.default.arg
@validates("notation") @validates("notation")
@ -137,7 +141,7 @@ class Flat(BASE):
notation = int(notation) notation = int(notation)
assert notation >= 0 and notation <= 5 assert notation >= 0 and notation <= 5
except (ValueError, AssertionError): except (ValueError, AssertionError):
raise ValueError('notation should be an integer between 0 and 5') raise ValueError("notation should be an integer between 0 and 5")
return notation return notation
@validates("date") @validates("date")
@ -178,25 +182,22 @@ class Flat(BASE):
# Handle flatisfy metadata # Handle flatisfy metadata
flat_dict = flat_dict.copy() flat_dict = flat_dict.copy()
if "flatisfy" in flat_dict: if "flatisfy" in flat_dict:
flat_dict["flatisfy_stations"] = ( flat_dict["flatisfy_stations"] = flat_dict["flatisfy"].get(
flat_dict["flatisfy"].get("matched_stations", []) "matched_stations", []
) )
flat_dict["flatisfy_postal_code"] = ( flat_dict["flatisfy_postal_code"] = flat_dict["flatisfy"].get(
flat_dict["flatisfy"].get("postal_code", None) "postal_code", None
) )
flat_dict["flatisfy_position"] = ( flat_dict["flatisfy_position"] = flat_dict["flatisfy"].get("position", None)
flat_dict["flatisfy"].get("position", None) flat_dict["flatisfy_time_to"] = flat_dict["flatisfy"].get("time_to", {})
) flat_dict["flatisfy_constraint"] = flat_dict["flatisfy"].get(
flat_dict["flatisfy_time_to"] = ( "constraint", "default"
flat_dict["flatisfy"].get("time_to", {})
flat_dict["flatisfy_constraint"] = (
flat_dict["flatisfy"].get("constraint", "default")
) )
del flat_dict["flatisfy"] del flat_dict["flatisfy"]
flat_dict = {k: v for k, v in flat_dict.items() flat_dict = {
if k in inspect(Flat).columns.keys()} k: v for k, v in flat_dict.items() if k in inspect(Flat).columns.keys()
return Flat(**flat_dict) return Flat(**flat_dict)
def __repr__(self): def __repr__(self):
@ -207,11 +208,7 @@ class Flat(BASE):
Return a dict representation of this flat object that is JSON Return a dict representation of this flat object that is JSON
serializable. serializable.
""" """
flat_repr = { flat_repr = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
k: v
for k, v in self.__dict__.items()
if not k.startswith("_")
if isinstance(flat_repr["status"], FlatStatus): if isinstance(flat_repr["status"], FlatStatus):
flat_repr["status"] = flat_repr["status"].name flat_repr["status"] = flat_repr["status"].name
if isinstance(flat_repr["utilities"], FlatUtilities): if isinstance(flat_repr["utilities"], FlatUtilities):

View File

@ -7,9 +7,7 @@ from __future__ import absolute_import, print_function, unicode_literals
import logging import logging
from sqlalchemy import ( from sqlalchemy import Column, Float, Integer, String, UniqueConstraint
Column, Float, Integer, String, UniqueConstraint
from flatisfy.database.base import BASE from flatisfy.database.base import BASE
@ -21,6 +19,7 @@ class PostalCode(BASE):
""" """
SQLAlchemy ORM model to store a postal code opendata. SQLAlchemy ORM model to store a postal code opendata.
""" """
__tablename__ = "postal_codes" __tablename__ = "postal_codes"
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
@ -41,8 +40,4 @@ class PostalCode(BASE):
Return a dict representation of this postal code object that is JSON Return a dict representation of this postal code object that is JSON
serializable. serializable.
""" """
return { return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
k: v
for k, v in self.__dict__.items()
if not k.startswith("_")

View File

@ -7,9 +7,7 @@ from __future__ import absolute_import, print_function, unicode_literals
import logging import logging
from sqlalchemy import ( from sqlalchemy import Column, Float, Integer, String
Column, Float, Integer, String
from flatisfy.database.base import BASE from flatisfy.database.base import BASE
@ -21,6 +19,7 @@ class PublicTransport(BASE):
""" """
SQLAlchemy ORM model to store public transport opendata. SQLAlchemy ORM model to store public transport opendata.
""" """
__tablename__ = "public_transports" __tablename__ = "public_transports"
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)

View File

@ -30,6 +30,7 @@ class LocalImageCache(ImageCache):
""" """
A local cache for images, stored in memory. A local cache for images, stored in memory.
""" """
@staticmethod @staticmethod
def on_miss(path): def on_miss(path):
""" """
@ -46,48 +47,36 @@ class TestTexts(unittest.TestCase):
""" """
Checks string normalizations. Checks string normalizations.
""" """
def test_roman_numbers(self): def test_roman_numbers(self):
""" """
Checks roman numbers replacement. Checks roman numbers replacement.
""" """
self.assertEqual( self.assertEqual("XIV", tools.convert_arabic_to_roman("14"))
self.assertEqual( self.assertEqual("XXXIX", tools.convert_arabic_to_roman("39"))
self.assertEqual( self.assertEqual("40", tools.convert_arabic_to_roman("40"))
self.assertEqual( self.assertEqual("1987", tools.convert_arabic_to_roman("1987"))
self.assertEqual( self.assertEqual(
"Dans le XVe arrondissement", "Dans le XVe arrondissement",
tools.convert_arabic_to_roman_in_text("Dans le 15e arrondissement") tools.convert_arabic_to_roman_in_text("Dans le 15e arrondissement"),
) )
self.assertEqual( self.assertEqual(
"XXeme arr.", "XXeme arr.", tools.convert_arabic_to_roman_in_text("20eme arr.")
tools.convert_arabic_to_roman_in_text("20eme arr.")
) )
self.assertEqual( self.assertEqual(
tools.convert_arabic_to_roman_in_text("A AIX EN PROVENCE") tools.convert_arabic_to_roman_in_text("A AIX EN PROVENCE"),
) )
self.assertEqual( self.assertEqual(
"Montigny Le Bretonneux", "Montigny Le Bretonneux",
tools.convert_arabic_to_roman_in_text("Montigny Le Bretonneux") tools.convert_arabic_to_roman_in_text("Montigny Le Bretonneux"),
) )
def test_roman_numbers_in_text(self): def test_roman_numbers_in_text(self):
@ -97,58 +86,43 @@ class TestTexts(unittest.TestCase):
""" """
self.assertEqual( self.assertEqual(
"dans le XVe arrondissement", "dans le XVe arrondissement",
tools.normalize_string("Dans le 15e arrondissement") tools.normalize_string("Dans le 15e arrondissement"),
) )
self.assertEqual( self.assertEqual("paris XVe, 75005", tools.normalize_string("Paris 15e, 75005"))
"paris XVe, 75005",
tools.normalize_string("Paris 15e, 75005")
self.assertEqual( self.assertEqual("paris xve, 75005", tools.normalize_string("Paris XVe, 75005"))
"paris xve, 75005",
tools.normalize_string("Paris XVe, 75005")
def test_multiple_whitespaces(self): def test_multiple_whitespaces(self):
""" """
Checks whitespaces are collapsed. Checks whitespaces are collapsed.
""" """
self.assertEqual( self.assertEqual("avec ascenseur", tools.normalize_string("avec ascenseur"))
"avec ascenseur",
tools.normalize_string("avec ascenseur")
def test_whitespace_trim(self): def test_whitespace_trim(self):
""" """
Checks that trailing and beginning whitespaces are trimmed. Checks that trailing and beginning whitespaces are trimmed.
""" """
self.assertEqual( self.assertEqual("rennes 35000", tools.normalize_string(" Rennes 35000 "))
"rennes 35000",
tools.normalize_string(" Rennes 35000 ")
def test_accents(self): def test_accents(self):
""" """
Checks accents are replaced. Checks accents are replaced.
""" """
self.assertEqual( self.assertEqual("eeeaui", tools.normalize_string(u"éèêàüï"))
class TestPhoneNumbers(unittest.TestCase): class TestPhoneNumbers(unittest.TestCase):
""" """
Checks phone numbers normalizations. Checks phone numbers normalizations.
""" """
def test_prefix(self): def test_prefix(self):
""" """
Checks phone numbers with international prefixes. Checks phone numbers with international prefixes.
""" """
self.assertEqual( self.assertEqual(
"0605040302", "0605040302", duplicates.homogeneize_phone_number("+33605040302")
) )
def test_dots_separators(self): def test_dots_separators(self):
@ -156,8 +130,7 @@ class TestPhoneNumbers(unittest.TestCase):
Checks phone numbers with dots. Checks phone numbers with dots.
""" """
self.assertEqual( self.assertEqual(
"0605040302", "0605040302", duplicates.homogeneize_phone_number("")
) )
def test_spaces_separators(self): def test_spaces_separators(self):
@ -165,8 +138,7 @@ class TestPhoneNumbers(unittest.TestCase):
Checks phone numbers with spaces. Checks phone numbers with spaces.
""" """
self.assertEqual( self.assertEqual(
"0605040302", "0605040302", duplicates.homogeneize_phone_number("06 05 04 03 02")
duplicates.homogeneize_phone_number("06 05 04 03 02")
) )
@ -183,92 +155,106 @@ class TestPhotos(unittest.TestCase):
""" """
Compares a photo against itself. Compares a photo against itself.
""" """
photo = { photo = {"url": TESTS_DATA_DIR + "127028739@seloger.jpg"}
"url": TESTS_DATA_DIR + "127028739@seloger.jpg"
self.assertTrue(duplicates.compare_photos( self.assertTrue(
photo, duplicates.compare_photos(
photo, photo, photo, self.IMAGE_CACHE, self.HASH_THRESHOLD
def test_different_photos(self): def test_different_photos(self):
""" """
Compares two different photos. Compares two different photos.
""" """
self.assertFalse(duplicates.compare_photos( self.assertFalse(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"}, {"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"}, {"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
)) )
self.assertFalse(duplicates.compare_photos( self.assertFalse(
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"}, {"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-3@seloger.jpg"}, {"url": TESTS_DATA_DIR + "127028739-3@seloger.jpg"},
)) )
def test_matching_photos(self): def test_matching_photos(self):
""" """
Compares two matching photos with different size and source. Compares two matching photos with different size and source.
""" """
self.assertTrue(duplicates.compare_photos( self.assertTrue(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"}, {"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129@explorimmo.jpg"}, {"url": TESTS_DATA_DIR + "14428129@explorimmo.jpg"},
)) )
self.assertTrue(duplicates.compare_photos( self.assertTrue(
{"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"}, {"url": TESTS_DATA_DIR + "127028739-2@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129-2@explorimmo.jpg"}, {"url": TESTS_DATA_DIR + "14428129-2@explorimmo.jpg"},
)) )
self.assertTrue(duplicates.compare_photos( self.assertTrue(
{"url": TESTS_DATA_DIR + "127028739-3@seloger.jpg"}, {"url": TESTS_DATA_DIR + "127028739-3@seloger.jpg"},
{"url": TESTS_DATA_DIR + "14428129-3@explorimmo.jpg"}, {"url": TESTS_DATA_DIR + "14428129-3@explorimmo.jpg"},
)) )
self.assertTrue(duplicates.compare_photos( self.assertTrue(
{"url": TESTS_DATA_DIR + "127028739@seloger.jpg"}, {"url": TESTS_DATA_DIR + "127028739@seloger.jpg"},
{"url": TESTS_DATA_DIR + "127028739-watermark@seloger.jpg"}, {"url": TESTS_DATA_DIR + "127028739-watermark@seloger.jpg"},
)) )
def test_matching_cropped_photos(self): def test_matching_cropped_photos(self):
""" """
Compares two matching photos with one being cropped. Compares two matching photos with one being cropped.
""" """
# Fixme: the image hash treshold should be 10 ideally # Fixme: the image hash treshold should be 10 ideally
self.assertTrue(duplicates.compare_photos( self.assertTrue(
{"url": TESTS_DATA_DIR + "vertical.jpg"}, {"url": TESTS_DATA_DIR + "vertical.jpg"},
{"url": TESTS_DATA_DIR + "vertical-cropped.jpg"}, {"url": TESTS_DATA_DIR + "vertical-cropped.jpg"},
20 20,
)) )
# Fixme: the image hash treshold should be 10 ideally # Fixme: the image hash treshold should be 10 ideally
self.assertTrue(duplicates.compare_photos( self.assertTrue(
{"url": TESTS_DATA_DIR + "13783671@explorimmo.jpg"}, {"url": TESTS_DATA_DIR + "13783671@explorimmo.jpg"},
{"url": TESTS_DATA_DIR + "124910113@seloger.jpg"}, {"url": TESTS_DATA_DIR + "124910113@seloger.jpg"},
20 20,
)) )
class TestImageCache(unittest.TestCase): class TestImageCache(unittest.TestCase):
""" """
Checks image cache is working as expected. Checks image cache is working as expected.
""" """
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.IMAGE_CACHE = ImageCache( # pylint: disable=invalid-name self.IMAGE_CACHE = ImageCache( # pylint: disable=invalid-name
storage_dir=tempfile.mkdtemp(prefix="flatisfy-") storage_dir=tempfile.mkdtemp(prefix="flatisfy-")
@ -280,27 +266,22 @@ class TestImageCache(unittest.TestCase):
Check that it returns nothing on an invalid URL. Check that it returns nothing on an invalid URL.
""" """
# See # See
self.assertIsNone( self.assertIsNone(self.IMAGE_CACHE.get(""))
self.IMAGE_CACHE.get("") self.assertIsNone(self.IMAGE_CACHE.get(""))
def test_invalid_data(self): def test_invalid_data(self):
""" """
Check that it returns nothing on an invalid data. Check that it returns nothing on an invalid data.
""" """
# See # See
self.assertIsNone( self.assertIsNone(self.IMAGE_CACHE.get(""))
class TestDuplicates(unittest.TestCase): class TestDuplicates(unittest.TestCase):
""" """
Checks duplicates detection. Checks duplicates detection.
""" """
DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 8 # pylint: disable=invalid-name DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS = 8 # pylint: disable=invalid-name
DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15 # pylint: disable=invalid-name DUPLICATES_MIN_SCORE_WITH_PHOTOS = 15 # pylint: disable=invalid-name
HASH_THRESHOLD = 10 # pylint: disable=invalid-name HASH_THRESHOLD = 10 # pylint: disable=invalid-name
@ -326,7 +307,7 @@ class TestDuplicates(unittest.TestCase):
"utilities": "", "utilities": "",
"area": random.randint(200, 1500) / 10, "area": random.randint(200, 1500) / 10,
"cost": random.randint(100000, 300000), "cost": random.randint(100000, 300000),
"bedrooms": random.randint(1, 4) "bedrooms": random.randint(1, 4),
} }
@staticmethod @staticmethod
@ -351,8 +332,7 @@ class TestDuplicates(unittest.TestCase):
flat1 = self.generate_fake_flat() flat1 = self.generate_fake_flat()
flat2 = copy.deepcopy(flat1) flat2 = copy.deepcopy(flat1)
score = duplicates.get_duplicate_score( score = duplicates.get_duplicate_score(
flat1, flat2, flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
) )
self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -365,8 +345,7 @@ class TestDuplicates(unittest.TestCase):
flat2["cost"] += 1000 flat2["cost"] += 1000
score = duplicates.get_duplicate_score( score = duplicates.get_duplicate_score(
flat1, flat2, flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
) )
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -380,8 +359,7 @@ class TestDuplicates(unittest.TestCase):
flat2["rooms"] += 1 flat2["rooms"] += 1
score = duplicates.get_duplicate_score( score = duplicates.get_duplicate_score(
flat1, flat2, flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
) )
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -394,8 +372,7 @@ class TestDuplicates(unittest.TestCase):
flat2["area"] += 10 flat2["area"] += 10
score = duplicates.get_duplicate_score( score = duplicates.get_duplicate_score(
flat1, flat2, flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
) )
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -410,8 +387,7 @@ class TestDuplicates(unittest.TestCase):
flat2["area"] = 50.37 flat2["area"] = 50.37
score = duplicates.get_duplicate_score( score = duplicates.get_duplicate_score(
flat1, flat2, flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
) )
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -425,8 +401,7 @@ class TestDuplicates(unittest.TestCase):
flat2["phone"] = "0708091011" flat2["phone"] = "0708091011"
score = duplicates.get_duplicate_score( score = duplicates.get_duplicate_score(
flat1, flat2, flat1, flat2, self.IMAGE_CACHE, self.HASH_THRESHOLD
) )
self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS) self.assertLess(score, self.DUPLICATES_MIN_SCORE_WITHOUT_PHOTOS)
@ -435,14 +410,10 @@ class TestDuplicates(unittest.TestCase):
Two flats with same price, area and rooms quantity should be detected Two flats with same price, area and rooms quantity should be detected
as duplicates. as duplicates.
""" """
flats = self.load_files( flats = self.load_files("127028739@seloger", "14428129@explorimmo")
score = duplicates.get_duplicate_score( score = duplicates.get_duplicate_score(
flats[0], flats[1], flats[0], flats[1], self.IMAGE_CACHE, self.HASH_THRESHOLD
) )
self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITH_PHOTOS) self.assertGreaterEqual(score, self.DUPLICATES_MIN_SCORE_WITH_PHOTOS)
@ -502,8 +473,13 @@ def run():
""" """"Running tests…")"Running tests…")
try: try:
for testsuite in [TestTexts, TestPhoneNumbers, TestImageCache, for testsuite in [
TestDuplicates, TestPhotos]: TestTexts,
suite = unittest.TestLoader().loadTestsFromTestCase(testsuite) suite = unittest.TestLoader().loadTestsFromTestCase(testsuite)
result = unittest.TextTestRunner(verbosity=2).run(suite) result = unittest.TextTestRunner(verbosity=2).run(suite)
assert result.wasSuccessful() assert result.wasSuccessful()

View File

@ -3,9 +3,7 @@
This module contains basic utility functions, such as pretty printing of JSON This module contains basic utility functions, such as pretty printing of JSON
output, checking that a value is within a given interval etc. output, checking that a value is within a given interval etc.
""" """
from __future__ import ( from __future__ import absolute_import, division, print_function, unicode_literals
absolute_import, division, print_function, unicode_literals
import datetime import datetime
import itertools import itertools
@ -61,8 +59,18 @@ def convert_arabic_to_roman(arabic):
return arabic return arabic
to_roman = { to_roman = {
1: 'I', 2: 'II', 3: 'III', 4: 'IV', 5: 'V', 6: 'VI', 7: 'VII', 1: "I",
8: 'VIII', 9: 'IX', 10: 'X', 20: 'XX', 30: 'XXX' 2: "II",
3: "III",
4: "IV",
5: "V",
6: "VI",
7: "VII",
8: "VIII",
9: "IX",
10: "X",
20: "XX",
30: "XXX",
} }
roman_chars_list = [] roman_chars_list = []
count = 1 count = 1
@ -71,7 +79,7 @@ def convert_arabic_to_roman(arabic):
if digit != 0: if digit != 0:
roman_chars_list.append(to_roman[digit * count]) roman_chars_list.append(to_roman[digit * count])
count *= 10 count *= 10
return ''.join(roman_chars_list[::-1]) return "".join(roman_chars_list[::-1])
def convert_arabic_to_roman_in_text(text): def convert_arabic_to_roman_in_text(text):
@ -83,9 +91,7 @@ def convert_arabic_to_roman_in_text(text):
arabic. arabic.
""" """
return re.sub( return re.sub(
r'(\d+)', r"(\d+)", lambda matchobj: convert_arabic_to_roman(, text
lambda matchobj: convert_arabic_to_roman(,
) )
@ -96,11 +102,13 @@ def hash_dict(func):
From From
""" """
class HDict(dict): class HDict(dict):
""" """
Transform mutable dictionnary into immutable. Useful to be compatible Transform mutable dictionnary into immutable. Useful to be compatible
with lru_cache with lru_cache
""" """
def __hash__(self): def __hash__(self):
return hash(json.dumps(self)) return hash(json.dumps(self))
@ -108,17 +116,10 @@ def hash_dict(func):
""" """
The wrapped function The wrapped function
""" """
args = tuple( args = tuple([HDict(arg) if isinstance(arg, dict) else arg for arg in args])
[ kwargs = {k: HDict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
HDict(arg) if isinstance(arg, dict) else arg
for arg in args
kwargs = {
k: HDict(v) if isinstance(v, dict) else v
for k, v in kwargs.items()
return func(*args, **kwargs) return func(*args, **kwargs)
return wrapped return wrapped
@ -126,6 +127,7 @@ class DateAwareJSONEncoder(json.JSONEncoder):
""" """
Extend the default JSON encoder to serialize datetimes to iso strings. Extend the default JSON encoder to serialize datetimes to iso strings.
""" """
def default(self, o): # pylint: disable=locally-disabled,E0202 def default(self, o): # pylint: disable=locally-disabled,E0202
if isinstance(o, (, datetime.datetime)): if isinstance(o, (, datetime.datetime)):
return o.isoformat() return o.isoformat()
@ -153,9 +155,9 @@ def pretty_json(data):
"toto": "ok" "toto": "ok"
} }
""" """
return json.dumps(data, cls=DateAwareJSONEncoder, return json.dumps(
indent=4, separators=(',', ': '), data, cls=DateAwareJSONEncoder, indent=4, separators=(",", ": "), sort_keys=True
sort_keys=True) )
def batch(iterable, size): def batch(iterable, size):
@ -295,8 +297,8 @@ def distance(gps1, gps2):
# pylint: disable=locally-disabled,invalid-name # pylint: disable=locally-disabled,invalid-name
a = ( a = (
math.sin((lat2 - lat1) / 2.0)**2 + math.sin((lat2 - lat1) / 2.0) ** 2
math.cos(lat1) * math.cos(lat2) * math.sin((long2 - long1) / 2.0)**2 + math.cos(lat1) * math.cos(lat2) * math.sin((long2 - long1) / 2.0) ** 2
) )
c = 2.0 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) c = 2.0 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
earth_radius = 6371000 earth_radius = 6371000
@ -327,7 +329,9 @@ def merge_dicts(*args):
if len(args) == 1: if len(args) == 1:
return args[0] return args[0]
flat1, flat2 = args[:2] # pylint: disable=locally-disabled,unbalanced-tuple-unpacking,line-too-long flat1, flat2 = args[
] # pylint: disable=locally-disabled,unbalanced-tuple-unpacking,line-too-long
merged_flat = {} merged_flat = {}
for k, value2 in flat2.items(): for k, value2 in flat2.items():
value1 = flat1.get(k, None) value1 = flat1.get(k, None)
@ -385,13 +389,14 @@ def get_travel_time_between(latlng_from, latlng_to, mode, config):
"from": "%s;%s" % (latlng_from[1], latlng_from[0]), "from": "%s;%s" % (latlng_from[1], latlng_from[0]),
"to": "%s;%s" % (latlng_to[1], latlng_to[0]), "to": "%s;%s" % (latlng_to[1], latlng_to[0]),
"datetime": date_from.isoformat(), "datetime": date_from.isoformat(),
"count": 1 "count": 1,
} }
try: try:
# Do the query to Navitia API # Do the query to Navitia API
req = requests.get( req = requests.get(
auth=(config["navitia_api_key"], "") params=payload,
auth=(config["navitia_api_key"], ""),
) )
req.raise_for_status() req.raise_for_status()
@ -400,28 +405,31 @@ def get_travel_time_between(latlng_from, latlng_to, mode, config):
for section in journeys["sections"]: for section in journeys["sections"]:
if section["type"] == "public_transport": if section["type"] == "public_transport":
# Public transport # Public transport
sections.append({ sections.append(
"geojson": section["geojson"], "geojson": section["geojson"],
"color": ( "color": (
section["display_informations"].get("color", None) section["display_informations"].get("color", None)
) )
elif section["type"] == "street_network": elif section["type"] == "street_network":
# Walking # Walking
sections.append({ sections.append({"geojson": section["geojson"], "color": None})
"geojson": section["geojson"],
"color": None
else: else:
# Skip anything else # Skip anything else
continue continue
except (requests.exceptions.RequestException, except (
ValueError, IndexError, KeyError) as exc: requests.exceptions.RequestException,
) as exc:
# Ignore any possible exception # Ignore any possible exception
LOGGER.warning( LOGGER.warning(
"An exception occurred during travel time lookup on " "An exception occurred during travel time lookup on "
"Navitia: %s.", "Navitia: %s.",
str(exc) str(exc),
) )
else: else:
LOGGER.warning( LOGGER.warning(
@ -430,50 +438,45 @@ def get_travel_time_between(latlng_from, latlng_to, mode, config):
) )
elif mode in [TimeToModes.WALK, TimeToModes.BIKE, TimeToModes.CAR]: elif mode in [TimeToModes.WALK, TimeToModes.BIKE, TimeToModes.CAR]:
TimeToModes.WALK: 'mapbox/walking', TimeToModes.WALK: "mapbox/walking",
TimeToModes.BIKE: 'mapbox/cycling', TimeToModes.BIKE: "mapbox/cycling",
TimeToModes.CAR: 'mapbox/driving' TimeToModes.CAR: "mapbox/driving",
} }
# Check that Mapbox API key is available # Check that Mapbox API key is available
if config["mapbox_api_key"]: if config["mapbox_api_key"]:
try: try:
service = mapbox.Directions( service = mapbox.Directions(access_token=config["mapbox_api_key"])
origin = { origin = {
'type': 'Feature', "type": "Feature",
'properties': {'name': 'Start'}, "properties": {"name": "Start"},
'geometry': { "geometry": {
'type': 'Point', "type": "Point",
'coordinates': [latlng_from[1], latlng_from[0]]}} "coordinates": [latlng_from[1], latlng_from[0]],
destination = { destination = {
'type': 'Feature', "type": "Feature",
'properties': {'name': 'End'}, "properties": {"name": "End"},
'geometry': { "geometry": {
'type': 'Point', "type": "Point",
'coordinates': [latlng_to[1], latlng_to[0]]}} "coordinates": [latlng_to[1], latlng_to[0]],
response = service.directions( },
[origin, destination], MAPBOX_MODES[mode] }
) response = service.directions([origin, destination], MAPBOX_MODES[mode])
response.raise_for_status() response.raise_for_status()
route = response.geojson()['features'][0] route = response.geojson()["features"][0]
# Fix longitude/latitude inversion in geojson output # Fix longitude/latitude inversion in geojson output
geometry = route['geometry'] geometry = route["geometry"]
geometry['coordinates'] = [ geometry["coordinates"] = [
(x[1], x[0]) for x in geometry['coordinates'] (x[1], x[0]) for x in geometry["coordinates"]
] ]
sections = [{ sections = [{"geojson": geometry, "color": "000"}]
"geojson": geometry, travel_time = route["properties"]["duration"]
"color": "000" except (requests.exceptions.RequestException, IndexError, KeyError) as exc:
travel_time = route['properties']['duration']
except (requests.exceptions.RequestException,
IndexError, KeyError) as exc:
# Ignore any possible exception # Ignore any possible exception
LOGGER.warning( LOGGER.warning(
"An exception occurred during travel time lookup on " "An exception occurred during travel time lookup on " "Mapbox: %s.",
"Mapbox: %s.", str(exc),
) )
else: else:
LOGGER.warning( LOGGER.warning(
@ -482,10 +485,7 @@ def get_travel_time_between(latlng_from, latlng_to, mode, config):
) )
if travel_time: if travel_time:
return { return {"time": travel_time, "sections": sections}
"time": travel_time,
"sections": sections
return None return None
@ -493,6 +493,7 @@ def timeit(func):
""" """
A decorator that logs how much time was spent in the function. A decorator that logs how much time was spent in the function.
""" """
def wrapped(*args, **kwargs): def wrapped(*args, **kwargs):
""" """
The wrapped function The wrapped function
@ -502,4 +503,5 @@ def timeit(func):
runtime = time.time() - before runtime = time.time() - before"%s -- Execution took %s seconds.", func.__name__, runtime)"%s -- Execution took %s seconds.", func.__name__, runtime)
return res return res
return wrapped return wrapped

View File

@ -2,9 +2,7 @@
""" """
This module contains the definition of the Bottle web app. This module contains the definition of the Bottle web app.
""" """
from __future__ import ( from __future__ import absolute_import, division, print_function, unicode_literals
absolute_import, division, print_function, unicode_literals
import functools import functools
import json import json
@ -25,13 +23,13 @@ class QuietWSGIRefServer(bottle.WSGIRefServer):
Quiet implementation of Bottle built-in WSGIRefServer, as `Canister` is Quiet implementation of Bottle built-in WSGIRefServer, as `Canister` is
handling the logging through standard Python logging. handling the logging through standard Python logging.
""" """
# pylint: disable=locally-disabled,too-few-public-methods # pylint: disable=locally-disabled,too-few-public-methods
quiet = True quiet = True
def run(self, app): def run(self, app):
'Server is now up and ready! Listening on %s:%s.' % "Server is now up and ready! Listening on %s:%s." % (, self.port)
(, self.port)
) )
super(QuietWSGIRefServer, self).run(app) super(QuietWSGIRefServer, self).run(app)
@ -42,12 +40,10 @@ def _serve_static_file(filename):
""" """
return bottle.static_file( return bottle.static_file(
filename, filename,
root=os.path.join( root=os.path.join(os.path.dirname(os.path.realpath(__file__)), "static"),
) )
def get_app(config): def get_app(config):
""" """
Get a Bottle app instance with all the routes set-up. Get a Bottle app instance with all the routes set-up.
@ -72,40 +68,42 @@ def get_app(config):
) )
# Enable CORS # Enable CORS
@app.hook('after_request') @app.hook("after_request")
def enable_cors(): def enable_cors():
""" """
Add CORS headers at each request. Add CORS headers at each request.
""" """
# The str() call is required as we import unicode_literal and WSGI # The str() call is required as we import unicode_literal and WSGI
# headers list should have plain str type. # headers list should have plain str type.
bottle.response.headers[str('Access-Control-Allow-Origin')] = str('*') bottle.response.headers[str("Access-Control-Allow-Origin")] = str("*")
bottle.response.headers[str('Access-Control-Allow-Methods')] = str( bottle.response.headers[str("Access-Control-Allow-Methods")] = str(
) )
bottle.response.headers[str('Access-Control-Allow-Headers')] = str( bottle.response.headers[str("Access-Control-Allow-Headers")] = str(
'Origin, Accept, Content-Type, X-Requested-With, X-CSRF-Token' "Origin, Accept, Content-Type, X-Requested-With, X-CSRF-Token"
) )
# API v1 routes # API v1 routes
app.route("/api/v1", ["GET", "OPTIONS"], api_routes.index_v1) app.route("/api/v1", ["GET", "OPTIONS"], api_routes.index_v1)
app.route("/api/v1/time_to_places", ["GET", "OPTIONS"], app.route(
api_routes.time_to_places_v1) "/api/v1/time_to_places", ["GET", "OPTIONS"], api_routes.time_to_places_v1
app.route("/api/v1/flats", ["GET", "OPTIONS"], api_routes.flats_v1) app.route("/api/v1/flats", ["GET", "OPTIONS"], api_routes.flats_v1)
app.route("/api/v1/flats/:flat_id", ["GET", "OPTIONS"], api_routes.flat_v1) app.route("/api/v1/flats/:flat_id", ["GET", "OPTIONS"], api_routes.flat_v1)
app.route("/api/v1/flats/:flat_id", ["PATCH", "OPTIONS"], app.route("/api/v1/flats/:flat_id", ["PATCH", "OPTIONS"], api_routes.update_flat_v1)
app.route("/api/v1/ics/visits.ics", ["GET", "OPTIONS"], app.route("/api/v1/ics/visits.ics", ["GET", "OPTIONS"], api_routes.ics_feed_v1)
app.route("/api/v1/search", ["POST", "OPTIONS"], api_routes.search_v1) app.route("/api/v1/search", ["POST", "OPTIONS"], api_routes.search_v1)
app.route("/api/v1/opendata", ["GET", "OPTIONS"], api_routes.opendata_index_v1) app.route("/api/v1/opendata", ["GET", "OPTIONS"], api_routes.opendata_index_v1)
app.route("/api/v1/opendata/postal_codes", ["GET", "OPTIONS"], app.route(
api_routes.opendata_postal_codes_v1) "/api/v1/opendata/postal_codes",
app.route("/api/v1/metadata", ["GET", "OPTIONS"], api_routes.metadata_v1) app.route("/api/v1/metadata", ["GET", "OPTIONS"], api_routes.metadata_v1)
@ -113,29 +111,28 @@ def get_app(config):
app.route("/", "GET", lambda: _serve_static_file("index.html")) app.route("/", "GET", lambda: _serve_static_file("index.html"))
# Static files # Static files
app.route("/favicon.ico", "GET", app.route("/favicon.ico", "GET", lambda: _serve_static_file("favicon.ico"))
lambda: _serve_static_file("favicon.ico"))
app.route( app.route(
"/assets/<filename:path>", "GET", "/assets/<filename:path>",
lambda filename: _serve_static_file("/assets/{}".format(filename)) "GET",
lambda filename: _serve_static_file("/assets/{}".format(filename)),
) )
app.route( app.route(
"/img/<filename:path>", "GET", "/img/<filename:path>",
lambda filename: _serve_static_file("/img/{}".format(filename)) "GET",
lambda filename: _serve_static_file("/img/{}".format(filename)),
) )
app.route( app.route(
"/.well-known/<filename:path>", "GET", "/.well-known/<filename:path>",
lambda filename: _serve_static_file("/.well-known/{}".format(filename)) "GET",
lambda filename: _serve_static_file("/.well-known/{}".format(filename)),
) )
app.route( app.route(
"/data/img/<filename:path>", "GET", "/data/img/<filename:path>",
lambda filename: bottle.static_file( lambda filename: bottle.static_file(
filename, filename, root=os.path.join(config["data_directory"], "images")
root=os.path.join( ),
) )
return app return app

View File

@ -7,9 +7,7 @@ This module is heavily based on code from
[Bottle-SQLAlchemy]( which is [Bottle-SQLAlchemy]( which is
licensed under MIT license. licensed under MIT license.
""" """
from __future__ import ( from __future__ import absolute_import, division, print_function, unicode_literals
absolute_import, division, print_function, unicode_literals
import functools import functools
import inspect import inspect
@ -22,7 +20,8 @@ class ConfigPlugin(object):
A Bottle plugin to automatically pass the config object to the routes A Bottle plugin to automatically pass the config object to the routes
specifying they need it. specifying they need it.
""" """
name = 'config'
name = "config"
api = 2 api = 2
KEYWORD = "config" KEYWORD = "config"
@ -41,9 +40,7 @@ class ConfigPlugin(object):
if not isinstance(other, ConfigPlugin): if not isinstance(other, ConfigPlugin):
continue continue
else: else:
raise bottle.PluginError( raise bottle.PluginError("Found another conflicting Config plugin.")
"Found another conflicting Config plugin."
def apply(self, callback, route): def apply(self, callback, route):
""" """

View File

@ -7,9 +7,7 @@ This module is heavily based on code from
[Bottle-SQLAlchemy]( which is [Bottle-SQLAlchemy]( which is
licensed under MIT license. licensed under MIT license.
""" """
from __future__ import ( from __future__ import absolute_import, division, print_function, unicode_literals
absolute_import, division, print_function, unicode_literals
import inspect import inspect
@ -21,7 +19,8 @@ class DatabasePlugin(object):
A Bottle plugin to automatically pass an SQLAlchemy database session object A Bottle plugin to automatically pass an SQLAlchemy database session object
to the routes specifying they need it. to the routes specifying they need it.
""" """
name = 'database'
name = "database"
api = 2 api = 2
KEYWORD = "db" KEYWORD = "db"
@ -41,9 +40,7 @@ class DatabasePlugin(object):
if not isinstance(other, DatabasePlugin): if not isinstance(other, DatabasePlugin):
continue continue
else: else:
raise bottle.PluginError( raise bottle.PluginError("Found another conflicting Database plugin.")
"Found another conflicting Database plugin."
def apply(self, callback, route): def apply(self, callback, route):
""" """
@ -64,6 +61,7 @@ class DatabasePlugin(object):
if self.KEYWORD not in callback_args: if self.KEYWORD not in callback_args:
# If no need for a db session, call the route callback # If no need for a db session, call the route callback
return callback return callback
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
""" """
Wrap the callback in a call to get_session. Wrap the callback in a call to get_session.
@ -72,6 +70,7 @@ class DatabasePlugin(object):
# Get a db session and pass it to the callback # Get a db session and pass it to the callback
kwargs[self.KEYWORD] = session kwargs[self.KEYWORD] = session
return callback(*args, **kwargs) return callback(*args, **kwargs)
return wrapper return wrapper

View File

@ -2,9 +2,7 @@
""" """
This module contains the definition of the web app API routes. This module contains the definition of the web app API routes.
""" """
from __future__ import ( from __future__ import absolute_import, division, print_function, unicode_literals
absolute_import, division, print_function, unicode_literals
import datetime import datetime
import itertools import itertools
@ -60,26 +58,24 @@ def _JSONApiSpec(query, model, default_sorting=None):
# Handle pagination according to JSON API spec # Handle pagination according to JSON API spec
page_number, page_size = 0, None page_number, page_size = 0, None
try: try:
if 'page[size]' in query: if "page[size]" in query:
page_size = int(query['page[size]']) page_size = int(query["page[size]"])
assert page_size > 0 assert page_size > 0
if 'page[number]' in query: if "page[number]" in query:
page_number = int(query['page[number]']) page_number = int(query["page[number]"])
assert page_number >= 0 assert page_number >= 0
except (AssertionError, ValueError): except (AssertionError, ValueError):
raise ValueError("Invalid pagination provided.") raise ValueError("Invalid pagination provided.")
# Handle sorting according to JSON API spec # Handle sorting according to JSON API spec
sorting = [] sorting = []
if 'sort' in query: if "sort" in query:
for index in query['sort'].split(','): for index in query["sort"].split(","):
try: try:
sort_field = getattr(model, index.lstrip('-')) sort_field = getattr(model, index.lstrip("-"))
except AttributeError: except AttributeError:
raise ValueError( raise ValueError("Invalid sorting key provided: {}.".format(index))
"Invalid sorting key provided: {}.".format(index) if index.startswith("-"):
if index.startswith('-'):
sort_field = sort_field.desc() sort_field = sort_field.desc()
sorting.append(sort_field) sorting.append(sort_field)
# Default sorting options # Default sorting options
@ -88,9 +84,7 @@ def _JSONApiSpec(query, model, default_sorting=None):
sorting.append(getattr(model, default_sorting)) sorting.append(getattr(model, default_sorting))
except AttributeError: except AttributeError:
raise ValueError( raise ValueError(
"Invalid default sorting key provided: {}.".format( "Invalid default sorting key provided: {}.".format(default_sorting)
) )
return filters, page_number, page_size, sorting return filters, page_number, page_size, sorting
@ -125,7 +119,7 @@ def _serialize_flat(flat, config):
flat["flatisfy_postal_code"] = { flat["flatisfy_postal_code"] = {
"postal_code": flat["flatisfy_postal_code"], "postal_code": flat["flatisfy_postal_code"],
"name":, "name":,
"gps": (, postal_code_data.lng) "gps": (, postal_code_data.lng),
} }
except (AssertionError, StopIteration): except (AssertionError, StopIteration):
flat["flatisfy_postal_code"] = {} flat["flatisfy_postal_code"] = {}
@ -148,7 +142,7 @@ def index_v1():
"search": "/api/v1/search", "search": "/api/v1/search",
"ics": "/api/v1/ics/visits.ics", "ics": "/api/v1/ics/visits.ics",
"time_to_places": "/api/v1/time_to_places", "time_to_places": "/api/v1/time_to_places",
"metadata": "/api/v1/metadata" "metadata": "/api/v1/metadata",
} }
@ -179,36 +173,32 @@ def flats_v1(config, db):
:return: The available flats objects in a JSON ``data`` dict. :return: The available flats objects in a JSON ``data`` dict.
""" """
if bottle.request.method == 'OPTIONS': if bottle.request.method == "OPTIONS":
return '' return ""
try: try:
try: try:
filters, page_number, page_size, sorting = _JSONApiSpec( filters, page_number, page_size, sorting = _JSONApiSpec(
bottle.request.query, bottle.request.query, flat_model.Flat, default_sorting="cost"
) )
except ValueError as exc: except ValueError as exc:
return JSONError(400, str(exc)) return JSONError(400, str(exc))
# Build flat list # Build flat list
db_query = ( db_query = db.query(flat_model.Flat).filter_by(**filters).order_by(*sorting)
flats = [ flats = [
_serialize_flat(flat, config) _serialize_flat(flat, config)
for flat in itertools.islice( for flat in itertools.islice(
db_query, db_query,
page_number * page_size if page_size else None, page_number * page_size if page_size else None,
page_number * page_size + page_size if page_size else None page_number * page_size + page_size if page_size else None,
) )
] ]
return { return {
"data": flats, "data": flats,
"page": page_number, "page": page_number,
"items_per_page": page_size if page_size else len(flats) "items_per_page": page_size if page_size else len(flats),
} }
except Exception as exc: # pylint: disable= broad-except except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc)) return JSONError(500, str(exc))
@ -224,7 +214,7 @@ def flat_v1(flat_id, config, db):
:return: The flat object in a JSON ``data`` dict. :return: The flat object in a JSON ``data`` dict.
""" """
if bottle.request.method == 'OPTIONS': if bottle.request.method == "OPTIONS":
return {} return {}
@ -234,9 +224,7 @@ def flat_v1(flat_id, config, db):
if not flat: if not flat:
return JSONError(404, "No flat with id {}.".format(flat_id)) return JSONError(404, "No flat with id {}.".format(flat_id))
return { return {"data": _serialize_flat(flat, config)}
"data": _serialize_flat(flat, config)
except Exception as exc: # pylint: disable= broad-except except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc)) return JSONError(500, str(exc))
@ -260,7 +248,7 @@ def update_flat_v1(flat_id, config, db):
:return: The new flat object in a JSON ``data`` dict. :return: The new flat object in a JSON ``data`` dict.
""" """
if bottle.request.method == 'OPTIONS': if bottle.request.method == "OPTIONS":
return {} return {}
@ -274,14 +262,9 @@ def update_flat_v1(flat_id, config, db):
for key, value in json_body.items(): for key, value in json_body.items():
setattr(flat, key, value) setattr(flat, key, value)
except ValueError as exc: except ValueError as exc:
return JSONError( return JSONError(400, "Invalid payload provided: {}.".format(str(exc)))
"Invalid payload provided: {}.".format(str(exc))
return { return {"data": _serialize_flat(flat, config)}
"data": _serialize_flat(flat, config)
except Exception as exc: # pylint: disable= broad-except except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc)) return JSONError(500, str(exc))
@ -297,7 +280,7 @@ def time_to_places_v1(config):
:return: The JSON dump of the places to compute time to (dict of places :return: The JSON dump of the places to compute time to (dict of places
names mapped to GPS coordinates). names mapped to GPS coordinates).
""" """
if bottle.request.method == 'OPTIONS': if bottle.request.method == "OPTIONS":
return {} return {}
@ -305,12 +288,9 @@ def time_to_places_v1(config):
places = {} places = {}
for constraint_name, constraint in config["constraints"].items(): for constraint_name, constraint in config["constraints"].items():
places[constraint_name] = { places[constraint_name] = {
k: v["gps"] k: v["gps"] for k, v in constraint["time_to"].items()
for k, v in constraint["time_to"].items()
return {
"data": places
} }
return {"data": places}
except Exception as exc: # pylint: disable= broad-except except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc)) return JSONError(500, str(exc))
@ -345,7 +325,7 @@ def search_v1(db, config):
:return: The matching flat objects in a JSON ``data`` dict. :return: The matching flat objects in a JSON ``data`` dict.
""" """
if bottle.request.method == 'OPTIONS': if bottle.request.method == "OPTIONS":
return {} return {}
@ -357,30 +337,29 @@ def search_v1(db, config):
try: try:
filters, page_number, page_size, sorting = _JSONApiSpec( filters, page_number, page_size, sorting = _JSONApiSpec(
bottle.request.query, bottle.request.query, flat_model.Flat, default_sorting="cost"
) )
except ValueError as exc: except ValueError as exc:
return JSONError(400, str(exc)) return JSONError(400, str(exc))
flats_db_query = (flat_model.Flat flats_db_query = (
.search_query(db, query) flat_model.Flat.search_query(db, query)
.filter_by(**filters) .filter_by(**filters)
.order_by(*sorting)) .order_by(*sorting)
flats = [ flats = [
_serialize_flat(flat, config) _serialize_flat(flat, config)
for flat in itertools.islice( for flat in itertools.islice(
flats_db_query, flats_db_query,
page_number * page_size if page_size else None, page_number * page_size if page_size else None,
page_number * page_size + page_size if page_size else None page_number * page_size + page_size if page_size else None,
) )
] ]
return { return {
"data": flats, "data": flats,
"page": page_number, "page": page_number,
"items_per_page": page_size if page_size else len(flats) "items_per_page": page_size if page_size else len(flats),
} }
except Exception as exc: # pylint: disable= broad-except except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc)) return JSONError(500, str(exc))
@ -396,7 +375,7 @@ def ics_feed_v1(config, db):
:return: The ICS feed for the visits. :return: The ICS feed for the visits.
""" """
if bottle.request.method == 'OPTIONS': if bottle.request.method == "OPTIONS":
return {} return {}
@ -407,24 +386,24 @@ def ics_feed_v1(config, db):
) )
for flat in flats_with_visits: for flat in flats_with_visits:
vevent = cal.add('vevent') vevent = cal.add("vevent")
vevent.add('dtstart').value = flat.visit_date vevent.add("dtstart").value = flat.visit_date
vevent.add('dtend').value = ( vevent.add("dtend").value = flat.visit_date + datetime.timedelta(hours=1)
flat.visit_date + datetime.timedelta(hours=1) vevent.add("summary").value = "Visit - {}".format(flat.title)
vevent.add('summary').value = 'Visit - {}'.format(flat.title)
description = ( description = "{} (area: {}, cost: {} {})\n{}#/flat/{}\n".format(
'{} (area: {}, cost: {} {})\n{}#/flat/{}\n'.format( flat.title,
flat.title, flat.area, flat.cost, flat.currency, flat.area,
config['website_url'], flat.cost,
) )
) description += "\n{}\n".format(flat.text)
description += '\n{}\n'.format(flat.text)
if flat.notes: if flat.notes:
description += '\n{}\n'.format(flat.notes) description += "\n{}\n".format(flat.notes)
vevent.add('description').value = description vevent.add("description").value = description
except Exception: # pylint: disable= broad-except except Exception: # pylint: disable= broad-except
pass pass
@ -439,13 +418,11 @@ def opendata_index_v1():
GET /api/v1/opendata GET /api/v1/opendata
""" """
if bottle.request.method == 'OPTIONS': if bottle.request.method == "OPTIONS":
return {} return {}
return { return {"postal_codes": "/api/v1/opendata/postal_codes"}
"postal_codes": "/api/v1/opendata/postal_codes"
def opendata_postal_codes_v1(db): def opendata_postal_codes_v1(db):
@ -476,36 +453,36 @@ def opendata_postal_codes_v1(db):
:return: The postal codes data from opendata. :return: The postal codes data from opendata.
""" """
if bottle.request.method == 'OPTIONS': if bottle.request.method == "OPTIONS":
return {} return {}
try: try:
try: try:
filters, page_number, page_size, sorting = _JSONApiSpec( filters, page_number, page_size, sorting = _JSONApiSpec(
bottle.request.query, bottle.request.query, PostalCode, default_sorting="postal_code"
) )
except ValueError as exc: except ValueError as exc:
return JSONError(400, str(exc)) return JSONError(400, str(exc))
db_query = db.query(PostalCode).filter_by(**filters).order_by(*sorting) db_query = db.query(PostalCode).filter_by(**filters).order_by(*sorting)
postal_codes = [ postal_codes = [
x.json_api_repr() for x in itertools.islice( x.json_api_repr()
for x in itertools.islice(
db_query, db_query,
page_number * page_size if page_size else None, page_number * page_size if page_size else None,
page_number * page_size + page_size if page_size else None page_number * page_size + page_size if page_size else None,
) )
] ]
return { return {
"data": postal_codes, "data": postal_codes,
"page": page_number, "page": page_number,
"items_per_page": page_size if page_size else len(postal_codes) "items_per_page": page_size if page_size else len(postal_codes),
} }
except Exception as exc: # pylint: disable= broad-except except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc)) return JSONError(500, str(exc))
def metadata_v1(config): def metadata_v1(config):
""" """
API v1 metadata of the application. API v1 metadata of the application.
@ -516,25 +493,18 @@ def metadata_v1(config):
:return: The application metadata. :return: The application metadata.
""" """
if bottle.request.method == 'OPTIONS': if bottle.request.method == "OPTIONS":
return {} return {}
try: try:
last_update = None last_update = None
try: try:
ts_file = os.path.join( ts_file = os.path.join(config["data_directory"], "timestamp")
last_update = os.path.getmtime(ts_file) last_update = os.path.getmtime(ts_file)
except OSError: except OSError:
pass pass
return { return {"data": {"last_update": last_update}}
'data': {
'last_update': last_update
except Exception as exc: # pylint: disable= broad-except except Exception as exc: # pylint: disable= broad-except
return JSONError(500, str(exc)) return JSONError(500, str(exc))

View File

@ -36,8 +36,7 @@ def run_migrations_offline():
""" """
url = config.get_main_option("sqlalchemy.url") url = config.get_main_option("sqlalchemy.url")
context.configure( context.configure(url=url, target_metadata=target_metadata, literal_binds=True)
url=url, target_metadata=target_metadata, literal_binds=True)
with context.begin_transaction(): with context.begin_transaction():
context.run_migrations() context.run_migrations()
@ -52,18 +51,17 @@ def run_migrations_online():
""" """
connectable = engine_from_config( connectable = engine_from_config(
config.get_section(config.config_ini_section), config.get_section(config.config_ini_section),
prefix='sqlalchemy.', prefix="sqlalchemy.",
poolclass=pool.NullPool) poolclass=pool.NullPool,
with connectable.connect() as connection: with connectable.connect() as connection:
context.configure( context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction(): with context.begin_transaction():
context.run_migrations() context.run_migrations()
if context.is_offline_mode(): if context.is_offline_mode():
run_migrations_offline() run_migrations_offline()
else: else:

View File

@ -10,21 +10,15 @@ import sqlalchemy as sa
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
revision = '8155b83242eb' revision = "8155b83242eb"
down_revision = None down_revision = None
branch_labels = None branch_labels = None
depends_on = None depends_on = None
def upgrade(): def upgrade():
op.add_column( op.add_column("flats", sa.Column("is_expired", sa.Boolean(), default=False))
sa.Column('is_expired', sa.Boolean(), default=False)
def downgrade(): def downgrade():
op.drop_column( op.drop_column("flats", "is_expired")

View File

@ -12,10 +12,9 @@ import flatisfy.config
from flatisfy.web import app as web_app from flatisfy.web import app as web_app
class Args(): class Args:
config = os.path.join( config = os.path.join(
os.path.dirname(os.path.realpath(__file__)), os.path.dirname(os.path.realpath(__file__)), "config/config.json"
) )
@ -24,9 +23,11 @@ LOGGER = logging.getLogger("flatisfy")
CONFIG = flatisfy.config.load_config(Args()) CONFIG = flatisfy.config.load_config(Args())
if CONFIG is None: if CONFIG is None:
LOGGER.error("Invalid configuration. Exiting. " LOGGER.error(
"Invalid configuration. Exiting. "
"Run init-config before if this is the first time " "Run init-config before if this is the first time "
"you run Flatisfy.") "you run Flatisfy."
sys.exit(1) sys.exit(1)