Browse Source

reformat with black

master
Gautier P 2 years ago
parent
commit
42909bd46f
  1. 88
      doc/conf.py
  2. 113
      flatisfy/__main__.py
  3. 62
      flatisfy/cmds.py
  4. 114
      flatisfy/config.py
  5. 2
      flatisfy/constants.py
  6. 11
      flatisfy/data.py
  7. 120
      flatisfy/data_files/__init__.py
  8. 4
      flatisfy/database/__init__.py
  9. 2
      flatisfy/database/types.py
  10. 28
      flatisfy/database/whooshalchemy.py
  11. 58
      flatisfy/email.py
  12. 1
      flatisfy/exceptions.py
  13. 90
      flatisfy/fetch.py
  14. 93
      flatisfy/filters/__init__.py
  15. 8
      flatisfy/filters/cache.py
  16. 97
      flatisfy/filters/duplicates.py
  17. 4
      flatisfy/filters/images.py
  18. 159
      flatisfy/filters/metadata.py
  19. 61
      flatisfy/models/flat.py
  20. 11
      flatisfy/models/postal_code.py
  21. 5
      flatisfy/models/public_transport.py
  22. 252
      flatisfy/tests.py
  23. 160
      flatisfy/tools.py
  24. 73
      flatisfy/web/app.py
  25. 11
      flatisfy/web/configplugin.py
  26. 13
      flatisfy/web/dbplugin.py
  27. 158
      flatisfy/web/routes/api.py
  28. 14
      migrations/env.py
  29. 12
      migrations/versions/8155b83242eb_add_is_expired.py
  30. 13
      wsgi.py

88
doc/conf.py

@ -18,7 +18,8 @@ @@ -18,7 +18,8 @@
import os
import sys
sys.path.insert(0, os.path.abspath('..'))
sys.path.insert(0, os.path.abspath(".."))
# -- General configuration ------------------------------------------------
@ -30,19 +31,19 @@ sys.path.insert(0, os.path.abspath('..')) @@ -30,19 +31,19 @@ sys.path.insert(0, os.path.abspath('..'))
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.viewcode',
"sphinx.ext.autodoc",
"sphinx.ext.viewcode",
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
templates_path = ["_templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = ['.rst', '.md']
source_suffix = [".rst", ".md"]
source_parsers = {
'.md': 'recommonmark.parser.CommonMarkParser',
".md": "recommonmark.parser.CommonMarkParser",
}
# The encoding of source files.
@ -50,21 +51,21 @@ source_parsers = { @@ -50,21 +51,21 @@ source_parsers = {
# source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
master_doc = "index"
# General information about the project.
project = u'Flatisfy'
copyright = u'2017, Phyks (Lucas Verney)'
author = u'Phyks (Lucas Verney)'
project = u"Flatisfy"
copyright = u"2017, Phyks (Lucas Verney)"
author = u"Phyks (Lucas Verney)"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = u'0.1'
version = u"0.1"
# The full version, including alpha/beta/rc tags.
release = u'0.1'
release = u"0.1"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@ -85,7 +86,7 @@ language = None @@ -85,7 +86,7 @@ language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# The reST default role (used for this markup: `text`) to use for all
# documents.
@ -107,7 +108,7 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] @@ -107,7 +108,7 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
# modindex_common_prefix = []
@ -124,7 +125,7 @@ todo_include_todos = False @@ -124,7 +125,7 @@ todo_include_todos = False
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'classic'
html_theme = "classic"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
@ -158,7 +159,7 @@ html_theme = 'classic' @@ -158,7 +159,7 @@ html_theme = 'classic'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ["_static"]
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
@ -238,34 +239,36 @@ html_static_path = ['_static'] @@ -238,34 +239,36 @@ html_static_path = ['_static']
# html_search_scorer = 'scorer.js'
# Output file base name for HTML help builder.
htmlhelp_basename = 'Flatisfydoc'
htmlhelp_basename = "Flatisfydoc"
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'Flatisfy.tex', u'Flatisfy Documentation',
u'Phyks (Lucas Verney)', 'manual'),
(
master_doc,
"Flatisfy.tex",
u"Flatisfy Documentation",
u"Phyks (Lucas Verney)",
"manual",
),
]
# The name of an image file (relative to this directory) to place at the top of
@ -305,10 +308,7 @@ latex_documents = [ @@ -305,10 +308,7 @@ latex_documents = [
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'flatisfy', u'Flatisfy Documentation',
[author], 1)
]
man_pages = [(master_doc, "flatisfy", u"Flatisfy Documentation", [author], 1)]
# If true, show URL addresses after external links.
#
@ -321,9 +321,15 @@ man_pages = [ @@ -321,9 +321,15 @@ man_pages = [
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'Flatisfy', u'Flatisfy Documentation',
author, 'Flatisfy', 'One line description of project.',
'Miscellaneous'),
(
master_doc,
"Flatisfy",
u"Flatisfy Documentation",
author,
"Flatisfy",
"One line description of project.",
"Miscellaneous",
),
]
# Documents to append as an appendix to all manuals.

113
flatisfy/__main__.py

@ -17,6 +17,7 @@ from flatisfy import data @@ -17,6 +17,7 @@ from flatisfy import data
from flatisfy import fetch
from flatisfy import tools
from flatisfy import tests
# pylint: enable=locally-disabled,wrong-import-position
@ -27,68 +28,59 @@ def parse_args(argv=None): @@ -27,68 +28,59 @@ def parse_args(argv=None):
"""
Create parser and parse arguments.
"""
parser = argparse.ArgumentParser(prog="Flatisfy",
description="Find the perfect flat.")
parser = argparse.ArgumentParser(
prog="Flatisfy", description="Find the perfect flat."
)
# Parent parser containing arguments common to any subcommand
parent_parser = argparse.ArgumentParser(add_help=False)
parent_parser.add_argument(
"--data-dir",
help="Location of Flatisfy data directory."
)
parent_parser.add_argument(
"--config",
help="Configuration file to use."
"--data-dir", help="Location of Flatisfy data directory."
)
parent_parser.add_argument("--config", help="Configuration file to use.")
parent_parser.add_argument(
"--passes", choices=[0, 1, 2, 3], type=int,
help="Number of passes to do on the filtered data."
"--passes",
choices=[0, 1, 2, 3],
type=int,
help="Number of passes to do on the filtered data.",
)
parent_parser.add_argument(
"--max-entries", type=int,
help="Maximum number of entries to fetch."
"--max-entries", type=int, help="Maximum number of entries to fetch."
)
parent_parser.add_argument(
"-v", "--verbose", action="store_true",
help="Verbose logging output."
"-v", "--verbose", action="store_true", help="Verbose logging output."
)
parent_parser.add_argument("-vv", action="store_true", help="Debug logging output.")
parent_parser.add_argument(
"-vv", action="store_true",
help="Debug logging output."
)
parent_parser.add_argument(
"--constraints", type=str,
help="Comma-separated list of constraints to consider."
"--constraints",
type=str,
help="Comma-separated list of constraints to consider.",
)
# Subcommands
subparsers = parser.add_subparsers(
dest="cmd", help="Available subcommands"
)
subparsers = parser.add_subparsers(dest="cmd", help="Available subcommands")
# Build data subcommand
subparsers.add_parser(
"build-data", parents=[parent_parser],
help="Build necessary data"
"build-data", parents=[parent_parser], help="Build necessary data"
)
# Init config subcommand
parser_init_config = subparsers.add_parser(
"init-config", parents=[parent_parser],
help="Initialize empty configuration."
"init-config", parents=[parent_parser], help="Initialize empty configuration."
)
parser_init_config.add_argument(
"output", nargs="?", help="Output config file. Use '-' for stdout."
)
# Fetch subcommand parser
subparsers.add_parser("fetch", parents=[parent_parser],
help="Fetch housings posts")
subparsers.add_parser("fetch", parents=[parent_parser], help="Fetch housings posts")
# Filter subcommand parser
parser_filter = subparsers.add_parser(
"filter", parents=[parent_parser],
help="Filter housings posts according to constraints in config."
"filter",
parents=[parent_parser],
help="Filter housings posts according to constraints in config.",
)
parser_filter.add_argument(
"--input",
@ -97,34 +89,31 @@ def parse_args(argv=None): @@ -97,34 +89,31 @@ def parse_args(argv=None):
"no additional fetching of infos is done, and the script outputs "
"a filtered JSON dump on stdout. If not provided, update status "
"of the flats in the database."
)
),
)
# Import subcommand parser
import_filter = subparsers.add_parser(
"import", parents=[parent_parser],
help="Import housing posts in database.")
"import", parents=[parent_parser], help="Import housing posts in database."
)
import_filter.add_argument(
"--new-only",
action="store_true",
help=(
"Download new housing posts only but do not refresh existing ones"
)
help=("Download new housing posts only but do not refresh existing ones"),
)
# Purge subcommand parser
subparsers.add_parser("purge", parents=[parent_parser],
help="Purge database.")
subparsers.add_parser("purge", parents=[parent_parser], help="Purge database.")
# Serve subcommand parser
parser_serve = subparsers.add_parser("serve", parents=[parent_parser],
help="Serve the web app.")
parser_serve = subparsers.add_parser(
"serve", parents=[parent_parser], help="Serve the web app."
)
parser_serve.add_argument("--port", type=int, help="Port to bind to.")
parser_serve.add_argument("--host", help="Host to listen on.")
# Test subcommand parser
subparsers.add_parser("test", parents=[parent_parser],
help="Unit testing.")
subparsers.add_parser("test", parents=[parent_parser], help="Unit testing.")
return parser.parse_args(argv)
@ -139,15 +128,15 @@ def main(): @@ -139,15 +128,15 @@ def main():
# Set logger
if args.vv:
logging.getLogger('').setLevel(logging.DEBUG)
logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG)
logging.getLogger("").setLevel(logging.DEBUG)
logging.getLogger("sqlalchemy.engine").setLevel(logging.DEBUG)
elif args.verbose:
logging.getLogger('').setLevel(logging.INFO)
logging.getLogger("").setLevel(logging.INFO)
# sqlalchemy INFO level is way too loud, just stick with WARNING
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
else:
logging.getLogger('').setLevel(logging.WARNING)
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
logging.getLogger("").setLevel(logging.WARNING)
logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
# Init-config command
if args.cmd == "init-config":
@ -161,9 +150,11 @@ def main(): @@ -161,9 +150,11 @@ def main():
else:
config = flatisfy.config.load_config(args, check_with_data=True)
if config is None:
LOGGER.error("Invalid configuration. Exiting. "
"Run init-config before if this is the first time "
"you run Flatisfy.")
LOGGER.error(
"Invalid configuration. Exiting. "
"Run init-config before if this is the first time "
"you run Flatisfy."
)
sys.exit(1)
# Purge command
@ -179,18 +170,16 @@ def main(): @@ -179,18 +170,16 @@ def main():
if args.cmd == "fetch":
# Fetch and filter flats list
fetched_flats = fetch.fetch_flats(config)
fetched_flats = cmds.filter_fetched_flats(config,
fetched_flats=fetched_flats,
fetch_details=True)
fetched_flats = cmds.filter_fetched_flats(
config, fetched_flats=fetched_flats, fetch_details=True
)
# Sort by cost
fetched_flats = {
k: tools.sort_list_of_dicts_by(v["new"], "cost")
for k, v in fetched_flats.items()
}
print(
tools.pretty_json(fetched_flats)
)
print(tools.pretty_json(fetched_flats))
return
# Filter command
elif args.cmd == "filter":
@ -199,9 +188,7 @@ def main(): @@ -199,9 +188,7 @@ def main():
fetched_flats = fetch.load_flats_from_file(args.input, config)
fetched_flats = cmds.filter_fetched_flats(
config,
fetched_flats=fetched_flats,
fetch_details=False
config, fetched_flats=fetched_flats, fetch_details=False
)
# Sort by cost
@ -211,9 +198,7 @@ def main(): @@ -211,9 +198,7 @@ def main():
}
# Output to stdout
print(
tools.pretty_json(fetched_flats)
)
print(tools.pretty_json(fetched_flats))
else:
cmds.import_and_filter(config, load_from_db=True)
return

62
flatisfy/cmds.py

@ -23,7 +23,9 @@ import time @@ -23,7 +23,9 @@ import time
LOGGER = logging.getLogger(__name__)
def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, past_flats=None):
def filter_flats_list(
config, constraint_name, flats_list, fetch_details=True, past_flats=None
):
"""
Filter the available flats list. Then, filter it according to criteria.
@ -45,13 +47,9 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p @@ -45,13 +47,9 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
except KeyError:
LOGGER.error(
"Missing constraint %s. Skipping filtering for these posts.",
constraint_name
constraint_name,
)
return {
"new": [],
"duplicate": [],
"ignored": []
}
return {"new": [], "duplicate": [], "ignored": []}
first_pass_result = collections.defaultdict(list)
second_pass_result = collections.defaultdict(list)
@ -59,9 +57,7 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p @@ -59,9 +57,7 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
# Do a first pass with the available infos to try to remove as much
# unwanted postings as possible
if config["passes"] > 0:
first_pass_result = flatisfy.filters.first_pass(flats_list,
constraint,
config)
first_pass_result = flatisfy.filters.first_pass(flats_list, constraint, config)
else:
first_pass_result["new"] = flats_list
@ -95,8 +91,7 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p @@ -95,8 +91,7 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
# Do a third pass to deduplicate better
if config["passes"] > 2:
third_pass_result = flatisfy.filters.third_pass(
second_pass_result["new"],
config
second_pass_result["new"], config
)
else:
third_pass_result["new"] = second_pass_result["new"]
@ -104,15 +99,15 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p @@ -104,15 +99,15 @@ def filter_flats_list(config, constraint_name, flats_list, fetch_details=True, p
return {
"new": third_pass_result["new"],
"duplicate": (
first_pass_result["duplicate"] +
second_pass_result["duplicate"] +
third_pass_result["duplicate"]
first_pass_result["duplicate"]
+ second_pass_result["duplicate"]
+ third_pass_result["duplicate"]
),
"ignored": (
first_pass_result["ignored"] +
second_pass_result["ignored"] +
third_pass_result["ignored"]
)
first_pass_result["ignored"]
+ second_pass_result["ignored"]
+ third_pass_result["ignored"]
),
}
@ -134,7 +129,7 @@ def filter_fetched_flats(config, fetched_flats, fetch_details=True, past_flats={ @@ -134,7 +129,7 @@ def filter_fetched_flats(config, fetched_flats, fetch_details=True, past_flats={
constraint_name,
flats_list,
fetch_details,
past_flats.get(constraint_name, None)
past_flats.get(constraint_name, None),
)
return fetched_flats
@ -156,9 +151,12 @@ def import_and_filter(config, load_from_db=False, new_only=False): @@ -156,9 +151,12 @@ def import_and_filter(config, load_from_db=False, new_only=False):
else:
fetched_flats = fetch.fetch_flats(config)
# Do not fetch additional details if we loaded data from the db.
flats_by_status = filter_fetched_flats(config, fetched_flats=fetched_flats,
fetch_details=(not load_from_db),
past_flats=past_flats if new_only else {})
flats_by_status = filter_fetched_flats(
config,
fetched_flats=fetched_flats,
fetch_details=(not load_from_db),
past_flats=past_flats if new_only else {},
)
# Create database connection
get_session = database.init_db(config["database"], config["search_index"])
@ -175,7 +173,7 @@ def import_and_filter(config, load_from_db=False, new_only=False): @@ -175,7 +173,7 @@ def import_and_filter(config, load_from_db=False, new_only=False):
# Set is_expired to true for all existing flats.
# This will be set back to false if we find them during importing.
for flat in session.query(flat_model.Flat).all():
flat.is_expired = True;
flat.is_expired = True
for status, flats_list in flatten_flats_by_status.items():
# Build SQLAlchemy Flat model objects for every available flat
@ -195,9 +193,7 @@ def import_and_filter(config, load_from_db=False, new_only=False): @@ -195,9 +193,7 @@ def import_and_filter(config, load_from_db=False, new_only=False):
# status if the user defined it
flat_object = flats_objects[each.id]
if each.status in flat_model.AUTOMATED_STATUSES:
flat_object.status = getattr(
flat_model.FlatStatus, status
)
flat_object.status = getattr(flat_model.FlatStatus, status)
else:
flat_object.status = each.status
@ -223,11 +219,8 @@ def import_and_filter(config, load_from_db=False, new_only=False): @@ -223,11 +219,8 @@ def import_and_filter(config, load_from_db=False, new_only=False):
LOGGER.info(f"Found {len(new_flats)} new flats.")
# Touch a file to indicate last update timestamp
ts_file = os.path.join(
config["data_directory"],
"timestamp"
)
with open(ts_file, 'w'):
ts_file = os.path.join(config["data_directory"], "timestamp")
with open(ts_file, "w"):
os.utime(ts_file, None)
LOGGER.info("Done!")
@ -270,5 +263,8 @@ def serve(config): @@ -270,5 +263,8 @@ def serve(config):
# standard logging
server = web_app.QuietWSGIRefServer
print("Launching web viewer running on http://%s:%s" % (config["host"], config["port"]))
print(
"Launching web viewer running on http://%s:%s"
% (config["host"], config["port"])
)
app.run(host=config["host"], port=config["port"], server=server)

114
flatisfy/config.py

@ -30,7 +30,7 @@ DEFAULT_CONFIG = { @@ -30,7 +30,7 @@ DEFAULT_CONFIG = {
"default": {
"type": None, # RENT, SALE, SHARING
"house_types": [], # List of house types, must be in APART, HOUSE,
# PARKING, LAND, OTHER or UNKNOWN
# PARKING, LAND, OTHER or UNKNOWN
"postal_codes": [], # List of postal codes
"area": (None, None), # (min, max) in m^2
"cost": (None, None), # (min, max) in currency unit
@ -42,12 +42,12 @@ DEFAULT_CONFIG = { @@ -42,12 +42,12 @@ DEFAULT_CONFIG = {
"vendu",
"Vendu",
"VENDU",
"recherche"
"recherche",
],
"time_to": {} # Dict mapping names to {"gps": [lat, lng],
# "time": (min, max),
# "mode": Valid mode }
# Time is in seconds
# "time": (min, max),
# "mode": Valid mode }
# Time is in seconds
}
},
# Whether or not to store personal data from housing posts (phone number
@ -91,7 +91,7 @@ DEFAULT_CONFIG = { @@ -91,7 +91,7 @@ DEFAULT_CONFIG = {
"backends": None,
# Should email notifications be sent?
"send_email": False,
"smtp_server": 'localhost',
"smtp_server": "localhost",
"smtp_port": 25,
"smtp_username": None,
"smtp_password": None,
@ -115,6 +115,7 @@ def validate_config(config, check_with_data): @@ -115,6 +115,7 @@ def validate_config(config, check_with_data):
check the config values.
:return: ``True`` if the configuration is valid, ``False`` otherwise.
"""
def _check_constraints_bounds(bounds):
"""
Check the bounds for numeric constraints.
@ -122,12 +123,7 @@ def validate_config(config, check_with_data): @@ -122,12 +123,7 @@ def validate_config(config, check_with_data):
assert isinstance(bounds, list)
assert len(bounds) == 2
assert all(
x is None or
(
isinstance(x, (float, int)) and
x >= 0
)
for x in bounds
x is None or (isinstance(x, (float, int)) and x >= 0) for x in bounds
)
if bounds[0] is not None and bounds[1] is not None:
assert bounds[1] > bounds[0]
@ -140,25 +136,45 @@ def validate_config(config, check_with_data): @@ -140,25 +136,45 @@ def validate_config(config, check_with_data):
# pylint: disable=locally-disabled,line-too-long
assert config["passes"] in [0, 1, 2, 3]
assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0) # noqa: E501
assert config["max_entries"] is None or (
isinstance(config["max_entries"], int) and config["max_entries"] > 0
) # noqa: E501
assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501
assert config["data_directory"] is None or isinstance(
config["data_directory"], str
) # noqa: E501
assert os.path.isdir(config["data_directory"])
assert isinstance(config["search_index"], str)
assert config["modules_path"] is None or isinstance(config["modules_path"], str) # noqa: E501
assert config["modules_path"] is None or isinstance(
config["modules_path"], str
) # noqa: E501
assert config["database"] is None or isinstance(config["database"], str) # noqa: E501
assert config["database"] is None or isinstance(
config["database"], str
) # noqa: E501
assert isinstance(config["port"], int)
assert isinstance(config["host"], str)
assert config["webserver"] is None or isinstance(config["webserver"], str) # noqa: E501
assert config["backends"] is None or isinstance(config["backends"], list) # noqa: E501
assert config["webserver"] is None or isinstance(
config["webserver"], str
) # noqa: E501
assert config["backends"] is None or isinstance(
config["backends"], list
) # noqa: E501
assert isinstance(config["send_email"], bool)
assert config["smtp_server"] is None or isinstance(config["smtp_server"], str) # noqa: E501
assert config["smtp_port"] is None or isinstance(config["smtp_port"], int) # noqa: E501
assert config["smtp_username"] is None or isinstance(config["smtp_username"], str) # noqa: E501
assert config["smtp_password"] is None or isinstance(config["smtp_password"], str) # noqa: E501
assert config["smtp_server"] is None or isinstance(
config["smtp_server"], str
) # noqa: E501
assert config["smtp_port"] is None or isinstance(
config["smtp_port"], int
) # noqa: E501
assert config["smtp_username"] is None or isinstance(
config["smtp_username"], str
) # noqa: E501
assert config["smtp_password"] is None or isinstance(
config["smtp_password"], str
) # noqa: E501
assert config["smtp_to"] is None or isinstance(config["smtp_to"], list)
assert isinstance(config["store_personal_data"], bool)
@ -167,10 +183,16 @@ def validate_config(config, check_with_data): @@ -167,10 +183,16 @@ def validate_config(config, check_with_data):
assert isinstance(config["duplicate_image_hash_threshold"], int)
# API keys
assert config["navitia_api_key"] is None or isinstance(config["navitia_api_key"], str) # noqa: E501
assert config["mapbox_api_key"] is None or isinstance(config["mapbox_api_key"], str) # noqa: E501
assert config["navitia_api_key"] is None or isinstance(
config["navitia_api_key"], str
) # noqa: E501
assert config["mapbox_api_key"] is None or isinstance(
config["mapbox_api_key"], str
) # noqa: E501
assert config["ignore_station"] is None or isinstance(config["ignore_station"], bool) # noqa: E501
assert config["ignore_station"] is None or isinstance(
config["ignore_station"], bool
) # noqa: E501
# Ensure constraints are ok
assert config["constraints"]
@ -191,8 +213,7 @@ def validate_config(config, check_with_data): @@ -191,8 +213,7 @@ def validate_config(config, check_with_data):
assert isinstance(term, str)
assert "description_should_not_contain" in constraint
assert isinstance(constraint["description_should_not_contain"],
list)
assert isinstance(constraint["description_should_not_contain"], list)
if constraint["description_should_not_contain"]:
for term in constraint["description_should_not_contain"]:
assert isinstance(term, str)
@ -269,20 +290,19 @@ def load_config(args=None, check_with_data=True): @@ -269,20 +290,19 @@ def load_config(args=None, check_with_data=True):
LOGGER.error(
"Unable to load configuration from file, "
"using default configuration: %s.",
exc
exc,
)
# Overload config with arguments
if args and getattr(args, "passes", None) is not None:
LOGGER.debug(
"Overloading number of passes from CLI arguments: %d.",
args.passes
"Overloading number of passes from CLI arguments: %d.", args.passes
)
config_data["passes"] = args.passes
if args and getattr(args, "max_entries", None) is not None:
LOGGER.debug(
"Overloading maximum number of entries from CLI arguments: %d.",
args.max_entries
args.max_entries,
)
config_data["max_entries"] = args.max_entries
if args and getattr(args, "port", None) is not None:
@ -297,37 +317,37 @@ def load_config(args=None, check_with_data=True): @@ -297,37 +317,37 @@ def load_config(args=None, check_with_data=True):
LOGGER.debug("Overloading data directory from CLI arguments.")
config_data["data_directory"] = args.data_dir
elif config_data["data_directory"] is None:
config_data["data_directory"] = appdirs.user_data_dir(
"flatisfy",
"flatisfy"
config_data["data_directory"] = appdirs.user_data_dir("flatisfy", "flatisfy")
LOGGER.debug(
"Using default XDG data directory: %s.", config_data["data_directory"]
)
LOGGER.debug("Using default XDG data directory: %s.",
config_data["data_directory"])
if not os.path.isdir(config_data["data_directory"]):
LOGGER.info("Creating data directory according to config: %s",
config_data["data_directory"])
LOGGER.info(
"Creating data directory according to config: %s",
config_data["data_directory"],
)
os.makedirs(config_data["data_directory"])
os.makedirs(os.path.join(config_data["data_directory"], "images"))
if config_data["database"] is None:
config_data["database"] = "sqlite:///" + os.path.join(
config_data["data_directory"],
"flatisfy.db"
config_data["data_directory"], "flatisfy.db"
)
if config_data["search_index"] is None:
config_data["search_index"] = os.path.join(
config_data["data_directory"],
"search_index"
config_data["data_directory"], "search_index"
)
# Handle constraints filtering
if args and getattr(args, "constraints", None) is not None:
LOGGER.info(
("Filtering constraints from config according to CLI argument. "
"Using only the following constraints: %s."),
args.constraints.replace(",", ", ")
(
"Filtering constraints from config according to CLI argument. "
"Using only the following constraints: %s."
),
args.constraints.replace(",", ", "),
)
constraints_filter = args.constraints.split(",")
config_data["constraints"] = {
@ -338,8 +358,8 @@ def load_config(args=None, check_with_data=True): @@ -338,8 +358,8 @@ def load_config(args=None, check_with_data=True):
# Sanitize website url
if config_data["website_url"] is not None:
if config_data["website_url"][-1] != '/':
config_data["website_url"] += '/'
if config_data["website_url"][-1] != "/":
config_data["website_url"] += "/"
config_validation = validate_config(config_data, check_with_data)
if config_validation is True:

2
flatisfy/constants.py

@ -16,7 +16,7 @@ BACKENDS_BY_PRECEDENCE = [ @@ -16,7 +16,7 @@ BACKENDS_BY_PRECEDENCE = [
"pap",
"leboncoin",
"explorimmo",
"logicimmo"
"logicimmo",
]

11
flatisfy/data.py

@ -24,11 +24,13 @@ except ImportError: @@ -24,11 +24,13 @@ except ImportError:
try:
from functools32 import lru_cache
except ImportError:
def lru_cache(maxsize=None): # pylint: disable=unused-argument
"""
Identity implementation of ``lru_cache`` for fallback.
"""
return lambda func: func
LOGGER.warning(
"`functools.lru_cache` is not available on your system. Consider "
"installing `functools32` Python module if using Python2 for "
@ -49,8 +51,8 @@ def preprocess_data(config, force=False): @@ -49,8 +51,8 @@ def preprocess_data(config, force=False):
get_session = database.init_db(config["database"], config["search_index"])
with get_session() as session:
is_built = (
session.query(PublicTransport).count() > 0 and
session.query(PostalCode).count() > 0
session.query(PublicTransport).count() > 0
and session.query(PostalCode).count() > 0
)
if is_built and not force:
# No need to rebuild the database, skip
@ -96,10 +98,7 @@ def load_data(model, constraint, config): @@ -96,10 +98,7 @@ def load_data(model, constraint, config):
# Load data for each area
areas = list(set(areas))
for area in areas:
results.extend(
session.query(model)
.filter(model.area == area).all()
)
results.extend(session.query(model).filter(model.area == area).all())
# Expunge loaded data from the session to be able to use them
# afterwards
session.expunge_all()

120
flatisfy/data_files/__init__.py

@ -24,8 +24,8 @@ MODULE_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -24,8 +24,8 @@ MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
titlecase.set_small_word_list(
# Add French small words
r"l|d|un|une|et|à|a|sur|ou|le|la|de|lès|les|" +
titlecase.SMALL
r"l|d|un|une|et|à|a|sur|ou|le|la|de|lès|les|"
+ titlecase.SMALL
)
TRANSPORT_DATA_FILES = {
@ -33,7 +33,7 @@ TRANSPORT_DATA_FILES = { @@ -33,7 +33,7 @@ TRANSPORT_DATA_FILES = {
"FR-NW": "stops_fr-nw.txt",
"FR-NE": "stops_fr-ne.txt",
"FR-SW": "stops_fr-sw.txt",
"FR-SE": "stops_fr-se.txt"
"FR-SE": "stops_fr-se.txt",
}
@ -51,8 +51,20 @@ def french_postal_codes_to_quarter(postal_code): @@ -51,8 +51,20 @@ def french_postal_codes_to_quarter(postal_code):
# French departements
# Taken from Wikipedia data.
department_to_subdivision = {
"FR-ARA": ["01", "03", "07", "15", "26", "38", "42", "43", "63", "69",
"73", "74"],
"FR-ARA": [
"01",
"03",
"07",
"15",
"26",
"38",
"42",
"43",
"63",
"69",
"73",
"74",
],
"FR-BFC": ["21", "25", "39", "58", "70", "71", "89", "90"],
"FR-BRE": ["22", "29", "35", "44", "56"],
"FR-CVL": ["18", "28", "36", "37", "41", "45"],
@ -61,19 +73,44 @@ def french_postal_codes_to_quarter(postal_code): @@ -61,19 +73,44 @@ def french_postal_codes_to_quarter(postal_code):
"FR-HDF": ["02", "59", "60", "62", "80"],
"FR-IDF": ["75", "77", "78", "91", "92", "93", "94", "95"],
"FR-NOR": ["14", "27", "50", "61", "76"],
"FR-NAQ": ["16", "17", "19", "23", "24", "33", "40", "47", "64", "79",
"86", "87"],
"FR-OCC": ["09", "11", "12", "30", "31", "32", "34", "46", "48", "65",
"66", "81", "82"],
"FR-NAQ": [
"16",
"17",
"19",
"23",
"24",
"33",
"40",
"47",
"64",
"79",
"86",
"87",
],
"FR-OCC": [
"09",
"11",
"12",
"30",
"31",
"32",
"34",
"46",
"48",
"65",
"66",
"81",
"82",
],
"FR-PDL": ["44", "49", "53", "72", "85"],
"FR-PAC": ["04", "05", "06", "13", "83", "84"]
"FR-PAC": ["04", "05", "06", "13", "83", "84"],
}
subdivision_to_quarters = {
'FR-IDF': ['FR-IDF'],
'FR-NW': ['FR-BRE', 'FR-CVL', 'FR-NOR', 'FR-PDL'],
'FR-NE': ['FR-BFC', 'FR-GES', 'FR-HDF'],
'FR-SE': ['FR-ARA', 'FR-COR', 'FR-PAC', 'FR-OCC'],
'FR-SW': ['FR-NAQ']
"FR-IDF": ["FR-IDF"],
"FR-NW": ["FR-BRE", "FR-CVL", "FR-NOR", "FR-PDL"],
"FR-NE": ["FR-BFC", "FR-GES", "FR-HDF"],
"FR-SE": ["FR-ARA", "FR-COR", "FR-PAC", "FR-OCC"],
"FR-SW": ["FR-NAQ"],
}
subdivision = next(
@ -82,7 +119,7 @@ def french_postal_codes_to_quarter(postal_code): @@ -82,7 +119,7 @@ def french_postal_codes_to_quarter(postal_code):
for i, departments in department_to_subdivision.items()
if departement in departments
),
None
None,
)
return next(
(
@ -90,7 +127,7 @@ def french_postal_codes_to_quarter(postal_code): @@ -90,7 +127,7 @@ def french_postal_codes_to_quarter(postal_code):
for i, subdivisions in subdivision_to_quarters.items()
if subdivision in subdivisions
),
None
None,
)
@ -106,9 +143,7 @@ def _preprocess_laposte(): @@ -106,9 +143,7 @@ def _preprocess_laposte():
raw_laposte_data = []
# Load opendata file
try:
with io.open(
os.path.join(MODULE_DIR, data_file), "r", encoding='utf-8'
) as fh:
with io.open(os.path.join(MODULE_DIR, data_file), "r", encoding="utf-8") as fh:
raw_laposte_data = json.load(fh)
except (IOError, ValueError):
LOGGER.error("Invalid raw LaPoste opendata file.")
@ -126,29 +161,31 @@ def _preprocess_laposte(): @@ -126,29 +161,31 @@ def _preprocess_laposte():
if area is None:
LOGGER.info(
"No matching area found for postal code %s, skipping it.",
fields["code_postal"]
fields["code_postal"],
)
continue
name = normalize_string(
titlecase.titlecase(fields["nom_de_la_commune"]),
lowercase=False
titlecase.titlecase(fields["nom_de_la_commune"]), lowercase=False
)
if (fields["code_postal"], name) in seen_postal_codes:
continue
seen_postal_codes.append((fields["code_postal"], name))
postal_codes_data.append(PostalCode(
area=area,
postal_code=fields["code_postal"],
name=name,
lat=fields["coordonnees_gps"][0],
lng=fields["coordonnees_gps"][1]
))
postal_codes_data.append(
PostalCode(
area=area,
postal_code=fields["code_postal"],
name=name,
lat=fields["coordonnees_gps"][0],
lng=fields["coordonnees_gps"][1],
)
)
except KeyError:
LOGGER.info("Missing data for postal code %s, skipping it.",
fields["code_postal"])
LOGGER.info(
"Missing data for postal code %s, skipping it.", fields["code_postal"]
)
return postal_codes_data
@ -164,17 +201,15 @@ def _preprocess_public_transport(): @@ -164,17 +201,15 @@ def _preprocess_public_transport():
for area, data_file in TRANSPORT_DATA_FILES.items():
LOGGER.info("Building from public transport data %s.", data_file)
try:
with io.open(os.path.join(MODULE_DIR, data_file), "r",
encoding='utf-8') as fh:
with io.open(
os.path.join(MODULE_DIR, data_file), "r", encoding="utf-8"
) as fh:
filereader = csv.reader(fh)
next(filereader, None) # Skip first row (headers)
for row in filereader:
public_transport_data.append(PublicTransport(
name=row[2],
area=area,
lat=row[3],
lng=row[4]
))
public_transport_data.append(
PublicTransport(name=row[2], area=area, lat=row[3], lng=row[4])
)
except (IOError, IndexError):
LOGGER.error("Invalid raw opendata file: %s.", data_file)
return []
@ -183,7 +218,4 @@ def _preprocess_public_transport(): @@ -183,7 +218,4 @@ def _preprocess_public_transport():
# List of all the available preprocessing functions. Order can be important.
PREPROCESSING_FUNCTIONS = [
_preprocess_laposte,
_preprocess_public_transport
]
PREPROCESSING_FUNCTIONS = [_preprocess_laposte, _preprocess_public_transport]

4
flatisfy/database/__init__.py

@ -47,9 +47,7 @@ def init_db(database_uri=None, search_db_uri=None): @@ -47,9 +47,7 @@ def init_db(database_uri=None, search_db_uri=None):
Session = sessionmaker(bind=engine) # pylint: disable=locally-disabled,invalid-name
if search_db_uri:
index_service = IndexService(
whoosh_base=search_db_uri
)
index_service = IndexService(whoosh_base=search_db_uri)
index_service.register_class(flatisfy.models.flat.Flat)
@contextmanager

2
flatisfy/database/types.py

@ -50,4 +50,4 @@ class StringyJSON(types.TypeDecorator): @@ -50,4 +50,4 @@ class StringyJSON(types.TypeDecorator):
# TypeEngine.with_variant says "use StringyJSON instead when
# connecting to 'sqlite'"
# pylint: disable=locally-disabled,invalid-name
MagicJSON = types.JSON().with_variant(StringyJSON, 'sqlite')
MagicJSON = types.JSON().with_variant(StringyJSON, "sqlite")

28
flatisfy/database/whooshalchemy.py

@ -30,7 +30,6 @@ from whoosh.qparser import MultifieldParser @@ -30,7 +30,6 @@ from whoosh.qparser import MultifieldParser
class IndexService(object):
def __init__(self, config=None, whoosh_base=None):
if not whoosh_base and config:
whoosh_base = config.get("WHOOSH_BASE")
@ -84,8 +83,7 @@ class IndexService(object): @@ -84,8 +83,7 @@ class IndexService(object):
primary = field.name
continue
if field.name in model_class.__searchable__:
schema[field.name] = whoosh.fields.TEXT(
analyzer=StemmingAnalyzer())
schema[field.name] = whoosh.fields.TEXT(analyzer=StemmingAnalyzer())
return Schema(**schema), primary
def before_commit(self, session):
@ -93,21 +91,24 @@ class IndexService(object): @@ -93,21 +91,24 @@ class IndexService(object):
for model in session.new:
model_class = model.__class__
if hasattr(model_class, '__searchable__'):
if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append(
("new", model))
("new", model)
)
for model in session.deleted:
model_class = model.__class__
if hasattr(model_class, '__searchable__'):
if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append(
("deleted", model))
("deleted", model)
)
for model in session.dirty:
model_class = model.__class__
if hasattr(model_class, '__searchable__'):
if hasattr(model_class, "__searchable__"):
self.to_update.setdefault(model_class.__name__, []).append(
("changed", model))
("changed", model)
)
def after_commit(self, session):
"""
@ -129,11 +130,11 @@ class IndexService(object): @@ -129,11 +130,11 @@ class IndexService(object):
# update.
writer.delete_by_term(
primary_field, text_type(getattr(model, primary_field)))
primary_field, text_type(getattr(model, primary_field))
)
if change_type in ("new", "changed"):
attrs = dict((key, getattr(model, key))
for key in searchable)
attrs = dict((key, getattr(model, key)) for key in searchable)
attrs = {
attr: text_type(getattr(model, attr))
for attr in attrs.keys()
@ -158,8 +159,7 @@ class Searcher(object): @@ -158,8 +159,7 @@ class Searcher(object):
self.parser = MultifieldParser(list(fields), index.schema)
def __call__(self, session, query, limit=None):
results = self.index.searcher().search(
self.parser.parse(query), limit=limit)
results = self.index.searcher().search(self.parser.parse(query), limit=limit)
keys = [x[self.primary] for x in results]
primary_column = getattr(self.model_class, self.primary)

58
flatisfy/email.py

@ -16,7 +16,9 @@ from email.utils import formatdate, make_msgid @@ -16,7 +16,9 @@ from email.utils import formatdate, make_msgid
LOGGER = logging.getLogger(__name__)
def send_email(server, port, subject, _from, _to, txt, html, username=None, password=None):
def send_email(
server, port, subject, _from, _to, txt, html, username=None, password=None
):
"""
Send an email
@ -36,15 +38,15 @@ def send_email(server, port, subject, _from, _to, txt, html, username=None, pass @@ -36,15 +38,15 @@ def send_email(server, port, subject, _from, _to, txt, html, username=None, pass
if username or password:
server.login(username or "", password or "")
msg = MIMEMultipart('alternative')
msg['Subject'] = subject
msg['From'] = _from
msg['To'] = ', '.join(_to)
msg['Date'] = formatdate()
msg['Message-ID'] = make_msgid()
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = _from
msg["To"] = ", ".join(_to)
msg["Date"] = formatdate()
msg["Message-ID"] = make_msgid()
msg.attach(MIMEText(txt, 'plain', 'utf-8'))
msg.attach(MIMEText(html, 'html', 'utf-8'))
msg.attach(MIMEText(txt, "plain", "utf-8"))
msg.attach(MIMEText(html, "html", "utf-8"))
server.sendmail(_from, _to, msg.as_string())
server.quit()
@ -61,7 +63,7 @@ def send_notification(config, flats): @@ -61,7 +63,7 @@ def send_notification(config, flats):
if not flats:
return
txt = u'Hello dear user,\n\nThe following new flats have been found:\n\n'
txt = "Hello dear user,\n\nThe following new flats have been found:\n\n"
html = """
<html>
<head></head>
@ -81,10 +83,8 @@ def send_notification(config, flats): @@ -81,10 +83,8 @@ def send_notification(config, flats):
cost = str(flat.cost)
currency = str(flat.currency)
txt += (
'- {}: {}#/flat/{} (area: {}, cost: {} {})\n'.format(
title, website_url, flat_id, area, cost, currency
)
txt += "- {}: {}#/flat/{} (area: {}, cost: {} {})\n".format(
title, website_url, flat_id, area, cost, currency
)
html += """
@ -92,26 +92,28 @@ def send_notification(config, flats): @@ -92,26 +92,28 @@ def send_notification(config, flats):
<a href="{}#/flat/{}">{}</a>
(area: {}, cost: {} {})
</li>
""".format(website_url, flat_id, title, area, cost, currency)
""".format(
website_url, flat_id, title, area, cost, currency
)
html += "</ul>"
signature = (
u"\nHope you'll find what you were looking for.\n\nBye!\nFlatisfy"
)
signature = "\nHope you'll find what you were looking for.\n\nBye!\nFlatisfy"
txt += signature
html += signature.replace('\n', '<br>')
html += signature.replace("\n", "<br>")
html += """</p>
</body>
</html>"""
send_email(config["smtp_server"],
config["smtp_port"],
"New flats found!",
config["smtp_from"],
config["smtp_to"],
txt,
html,
config.get("smtp_username"),
config.get("smtp_password"))
send_email(
config["smtp_server"],
config["smtp_port"],
"New flats found!",
config["smtp_from"],
config["smtp_to"],
txt,
html,
config.get("smtp_username"),
config.get("smtp_password"),
)

1
flatisfy/exceptions.py

@ -10,4 +10,5 @@ class DataBuildError(Exception): @@ -10,4 +10,5 @@ class DataBuildError(Exception):
"""
Error occurring on building a data file.
"""
pass

90
flatisfy/fetch.py

@ -24,8 +24,9 @@ try: @@ -24,8 +24,9 @@ try:
from weboob.core.ouiboube import WebNip
from weboob.tools.json import WeboobEncoder
except ImportError:
LOGGER.error("Weboob is not available on your system. Make sure you "
"installed it.")
LOGGER.error(
"Weboob is not available on your system. Make sure you " "installed it."
)
raise