Initial commit
This commit is contained in:
parent
f060324bae
commit
d7012e3834
407
.ci/pylintrc
Normal file
407
.ci/pylintrc
Normal file
@ -0,0 +1,407 @@
|
||||
[MASTER]
|
||||
|
||||
# Specify a configuration file.
|
||||
#rcfile=
|
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as
|
||||
# pygtk.require().
|
||||
#init-hook=
|
||||
|
||||
# Add files or directories to the blacklist. They should be base names, not
|
||||
# paths.
|
||||
ignore=CVS
|
||||
|
||||
# Add files or directories matching the regex patterns to the blacklist. The
|
||||
# regex matches against base names, not paths.
|
||||
ignore-patterns=
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=yes
|
||||
|
||||
# List of plugins (as comma separated values of python modules names) to load,
|
||||
# usually to register additional checkers.
|
||||
load-plugins=
|
||||
|
||||
# Use multiple processes to speed up Pylint.
|
||||
jobs=1
|
||||
|
||||
# Allow loading of arbitrary C extensions. Extensions are imported into the
|
||||
# active Python interpreter and may run arbitrary code.
|
||||
unsafe-load-any-extension=no
|
||||
|
||||
# A comma-separated list of package or module names from where C extensions may
|
||||
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||
# run arbitrary code
|
||||
extension-pkg-whitelist=
|
||||
|
||||
# Allow optimization of some AST trees. This will activate a peephole AST
|
||||
# optimizer, which will apply various small optimizations. For instance, it can
|
||||
# be used to obtain the result of joining multiple strings with the addition
|
||||
# operator. Joining a lot of strings can lead to a maximum recursion error in
|
||||
# Pylint and this flag can prevent that. It has one side effect, the resulting
|
||||
# AST will be different than the one from reality. This option is deprecated
|
||||
# and it will be removed in Pylint 2.0.
|
||||
optimize-ast=no
|
||||
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
# Only show warnings with the listed confidence levels. Leave empty to show
|
||||
# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
|
||||
confidence=
|
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can
|
||||
# either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple time (only on the command line, not in the configuration file where
|
||||
# it should appear only once). See also the "--disable" option for examples.
|
||||
#enable=
|
||||
|
||||
# Disable the message, report, category or checker with the given id(s). You
|
||||
# can either give multiple identifiers separated by comma (,) or put this
|
||||
# option multiple times (only on the command line, not in the configuration
|
||||
# file where it should appear only once).You can also use "--disable=all" to
|
||||
# disable everything first and then reenable specific checks. For example, if
|
||||
# you want to run only the similarities checker, you can use "--disable=all
|
||||
# --enable=similarities". If you want to run only the classes checker, but have
|
||||
# no Warning level messages displayed, use"--disable=all --enable=classes
|
||||
# --disable=W"
|
||||
disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Set the output format. Available formats are text, parseable, colorized, msvs
|
||||
# (visual studio) and html. You can also give a reporter class, eg
|
||||
# mypackage.mymodule.MyReporterClass.
|
||||
output-format=text
|
||||
|
||||
# Put messages in a separate file for each module / package specified on the
|
||||
# command line instead of printing them on stdout. Reports (if any) will be
|
||||
# written in a file name "pylint_global.[txt|html]". This option is deprecated
|
||||
# and it will be removed in Pylint 2.0.
|
||||
files-output=no
|
||||
|
||||
# Tells whether to display a full report or only the messages
|
||||
reports=yes
|
||||
|
||||
# Python expression which should return a note less than 10 (10 is the highest
|
||||
# note). You have access to the variables errors warning, statement which
|
||||
# respectively contain the number of errors / warnings messages and the total
|
||||
# number of statements analyzed. This is used by the global evaluation report
|
||||
# (RP0004).
|
||||
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
|
||||
|
||||
# Template used to display messages. This is a python new-style format string
|
||||
# used to format the message information. See doc for all details
|
||||
#msg-template=
|
||||
|
||||
|
||||
[BASIC]
|
||||
|
||||
# Good variable names which should always be accepted, separated by a comma
|
||||
good-names=i,j,k,ex,Run,_,fh
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma
|
||||
bad-names=foo,bar,baz,toto,tutu,tata
|
||||
|
||||
# Colon-delimited sets of names that determine each other's naming style when
|
||||
# the name regexes allow several styles.
|
||||
name-group=
|
||||
|
||||
# Include a hint for the correct naming format with invalid-name
|
||||
include-naming-hint=no
|
||||
|
||||
# List of decorators that produce properties, such as abc.abstractproperty. Add
|
||||
# to this list to register other decorators that produce valid properties.
|
||||
property-classes=abc.abstractproperty
|
||||
|
||||
# Regular expression matching correct function names
|
||||
function-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for function names
|
||||
function-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct variable names
|
||||
variable-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for variable names
|
||||
variable-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct constant names
|
||||
const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
|
||||
|
||||
# Naming hint for constant names
|
||||
const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
|
||||
|
||||
# Regular expression matching correct attribute names
|
||||
attr-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for attribute names
|
||||
attr-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct argument names
|
||||
argument-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for argument names
|
||||
argument-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct class attribute names
|
||||
class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
|
||||
|
||||
# Naming hint for class attribute names
|
||||
class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
|
||||
|
||||
# Regular expression matching correct inline iteration names
|
||||
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
|
||||
|
||||
# Naming hint for inline iteration names
|
||||
inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
|
||||
|
||||
# Regular expression matching correct class names
|
||||
class-rgx=[A-Z_][a-zA-Z0-9]+$
|
||||
|
||||
# Naming hint for class names
|
||||
class-name-hint=[A-Z_][a-zA-Z0-9]+$
|
||||
|
||||
# Regular expression matching correct module names
|
||||
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
|
||||
|
||||
# Naming hint for module names
|
||||
module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
|
||||
|
||||
# Regular expression matching correct method names
|
||||
method-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for method names
|
||||
method-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match function or class names that do
|
||||
# not require a docstring.
|
||||
no-docstring-rgx=^_
|
||||
|
||||
# Minimum line length for functions/classes that require docstrings, shorter
|
||||
# ones are exempt.
|
||||
docstring-min-length=-1
|
||||
|
||||
|
||||
[ELIF]
|
||||
|
||||
# Maximum number of nested blocks for function / method body
|
||||
max-nested-blocks=5
|
||||
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length=100
|
||||
|
||||
# Regexp for a line that is allowed to be longer than the limit.
|
||||
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
|
||||
|
||||
# Allow the body of an if to be on the same line as the test if there is no
|
||||
# else.
|
||||
single-line-if-stmt=no
|
||||
|
||||
# List of optional constructs for which whitespace checking is disabled. `dict-
|
||||
# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
|
||||
# `trailing-comma` allows a space between comma and closing bracket: (a, ).
|
||||
# `empty-line` allows space-only lines.
|
||||
no-space-check=trailing-comma,dict-separator
|
||||
|
||||
# Maximum number of lines in a module
|
||||
max-module-lines=1000
|
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||
# tab).
|
||||
indent-string=' '
|
||||
|
||||
# Number of spaces of indent required inside a hanging or continued line.
|
||||
indent-after-paren=4
|
||||
|
||||
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
|
||||
expected-line-ending-format=
|
||||
|
||||
|
||||
[LOGGING]
|
||||
|
||||
# Logging modules to check that the string format arguments are in logging
|
||||
# function parameter format
|
||||
logging-modules=logging
|
||||
|
||||
|
||||
[MISCELLANEOUS]
|
||||
|
||||
# List of note tags to take in consideration, separated by a comma.
|
||||
notes=FIXME,XXX,TODO
|
||||
|
||||
|
||||
[SIMILARITIES]
|
||||
|
||||
# Minimum lines number of a similarity.
|
||||
min-similarity-lines=4
|
||||
|
||||
# Ignore comments when computing similarities.
|
||||
ignore-comments=yes
|
||||
|
||||
# Ignore docstrings when computing similarities.
|
||||
ignore-docstrings=yes
|
||||
|
||||
# Ignore imports when computing similarities.
|
||||
ignore-imports=no
|
||||
|
||||
|
||||
[SPELLING]
|
||||
|
||||
# Spelling dictionary name. Available dictionaries: none. To make it working
|
||||
# install python-enchant package.
|
||||
spelling-dict=
|
||||
|
||||
# List of comma separated words that should not be checked.
|
||||
spelling-ignore-words=
|
||||
|
||||
# A path to a file that contains private dictionary; one word per line.
|
||||
spelling-private-dict-file=
|
||||
|
||||
# Tells whether to store unknown words to indicated private dictionary in
|
||||
# --spelling-private-dict-file option instead of raising a message.
|
||||
spelling-store-unknown-words=no
|
||||
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
# Tells whether missing members accessed in mixin class should be ignored. A
|
||||
# mixin class is detected if its name ends with "mixin" (case insensitive).
|
||||
ignore-mixin-members=yes
|
||||
|
||||
# List of module names for which member attributes should not be checked
|
||||
# (useful for modules/projects where namespaces are manipulated during runtime
|
||||
# and thus existing member attributes cannot be deduced by static analysis. It
|
||||
# supports qualified module names, as well as Unix pattern matching.
|
||||
ignored-modules=
|
||||
|
||||
# List of class names for which member attributes should not be checked (useful
|
||||
# for classes with dynamically set attributes). This supports the use of
|
||||
# qualified names.
|
||||
ignored-classes=optparse.Values,thread._local,_thread._local
|
||||
|
||||
# List of members which are set dynamically and missed by pylint inference
|
||||
# system, and so shouldn't trigger E1101 when accessed. Python regular
|
||||
# expressions are accepted.
|
||||
generated-members=
|
||||
|
||||
# List of decorators that produce context managers, such as
|
||||
# contextlib.contextmanager. Add to this list to register other decorators that
|
||||
# produce valid context managers.
|
||||
contextmanager-decorators=contextlib.contextmanager
|
||||
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
# Tells whether we should check for unused import in __init__ files.
|
||||
init-import=no
|
||||
|
||||
# A regular expression matching the name of dummy variables (i.e. expectedly
|
||||
# not used).
|
||||
dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy
|
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that
|
||||
# you should avoid to define new builtins when possible.
|
||||
additional-builtins=
|
||||
|
||||
# List of strings which can identify a callback function by name. A callback
|
||||
# name must start or end with one of those strings.
|
||||
callbacks=cb_,_cb
|
||||
|
||||
# List of qualified module names which can have objects that can redefine
|
||||
# builtins.
|
||||
redefining-builtins-modules=six.moves,future.builtins,builtins
|
||||
|
||||
|
||||
[CLASSES]
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
defining-attr-methods=__init__,__new__,setUp
|
||||
|
||||
# List of valid names for the first argument in a class method.
|
||||
valid-classmethod-first-arg=cls
|
||||
|
||||
# List of valid names for the first argument in a metaclass class method.
|
||||
valid-metaclass-classmethod-first-arg=mcs
|
||||
|
||||
# List of member names, which should be excluded from the protected access
|
||||
# warning.
|
||||
exclude-protected=_asdict,_fields,_replace,_source,_make
|
||||
|
||||
|
||||
[DESIGN]
|
||||
|
||||
# Maximum number of arguments for function / method
|
||||
max-args=5
|
||||
|
||||
# Argument names that match this expression will be ignored. Default to name
|
||||
# with leading underscore
|
||||
ignored-argument-names=_.*
|
||||
|
||||
# Maximum number of locals for function / method body
|
||||
max-locals=15
|
||||
|
||||
# Maximum number of return / yield for function / method body
|
||||
max-returns=6
|
||||
|
||||
# Maximum number of branch for function / method body
|
||||
max-branches=12
|
||||
|
||||
# Maximum number of statements in function / method body
|
||||
max-statements=50
|
||||
|
||||
# Maximum number of parents for a class (see R0901).
|
||||
max-parents=7
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=7
|
||||
|
||||
# Minimum number of public methods for a class (see R0903).
|
||||
min-public-methods=2
|
||||
|
||||
# Maximum number of public methods for a class (see R0904).
|
||||
max-public-methods=20
|
||||
|
||||
# Maximum number of boolean expressions in a if statement
|
||||
max-bool-expr=5
|
||||
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
# Deprecated modules which should not be used, separated by a comma
|
||||
deprecated-modules=regsub,TERMIOS,Bastion,rexec
|
||||
|
||||
# Create a graph of every (i.e. internal and external) dependencies in the
|
||||
# given file (report RP0402 must not be disabled)
|
||||
import-graph=
|
||||
|
||||
# Create a graph of external dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
ext-import-graph=
|
||||
|
||||
# Create a graph of internal dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
int-import-graph=
|
||||
|
||||
# Force import order to recognize a module as part of the standard
|
||||
# compatibility libraries.
|
||||
known-standard-library=
|
||||
|
||||
# Force import order to recognize a module as part of a third party library.
|
||||
known-third-party=enchant
|
||||
|
||||
# Analyse import fallback blocks. This can be used to support both Python 2 and
|
||||
# 3 compatible code, which means that the block might have code that exists
|
||||
# only in one or another interpreter, leading to false positives when analysed.
|
||||
analyse-fallback-blocks=no
|
||||
|
||||
|
||||
[EXCEPTIONS]
|
||||
|
||||
# Exceptions that will emit a warning when being caught. Defaults to
|
||||
# "Exception"
|
||||
overgeneral-exceptions=Exception
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,3 +1,6 @@
|
||||
build
|
||||
*.json
|
||||
config.py
|
||||
*.pyc
|
||||
*.swp
|
||||
*.swo
|
||||
*.db
|
||||
|
21
LICENSE.md
Normal file
21
LICENSE.md
Normal file
@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2017 Phyks (Lucas Verney)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
115
README.md
Normal file
115
README.md
Normal file
@ -0,0 +1,115 @@
|
||||
Flatisfy
|
||||
========
|
||||
|
||||
Flatisfy is your new companion to ease your search of a new housing :)
|
||||
|
||||
|
||||
It uses [Weboob](http://weboob.org/) to get all the housing posts on most of
|
||||
the websites offering housings posts, and then offers a bunch of pipelines to
|
||||
filter and deduplicate the fetched housings.
|
||||
|
||||
|
||||
It can be used as a command-line utility, but also exposes a web API and
|
||||
visualisation, to browse through the results.
|
||||
|
||||
|
||||
_Note_: It is targeted at French users (due to the currently supported
|
||||
websites), and in particular at people living close to Paris, as I developped
|
||||
it for my personal use, and am currently living in Paris :) Any feedback and
|
||||
merge requests to better support other countries / cities are more than
|
||||
welcome!
|
||||
|
||||
_Note_: In this repository and across the code, I am using the name "flat". I
|
||||
use it as a placeholder for "housing" and consider both are interchangeable.
|
||||
This code is not restricted to handling flats only!
|
||||
|
||||
|
||||
## Getting started
|
||||
|
||||
1. Clone the repository.
|
||||
2. Install required Python modules: `pip install -r requirements.txt`.
|
||||
3. Init a configuration file: `python -m flatisfy init-config > config.json`.
|
||||
Edit it according to your needs (see below).
|
||||
4. Build the required data files:
|
||||
`python -m flatisfy build-data --config config.json`.
|
||||
5. Use it to `fetch` (and output a filtered JSON list of flats) or `import`
|
||||
(into an SQLite database, for the web visualization) a list of flats
|
||||
matching your criteria.
|
||||
6. Use `python -m flatisfy serve --config config.json` to serve the web app.
|
||||
|
||||
|
||||
## Configuration
|
||||
|
||||
List of configuration options:
|
||||
|
||||
* `data_directory` is the directory in which you want data files to be stored.
|
||||
`null` is the default value and means default `XDG` location (typically
|
||||
`~/.local/share/flatisfy/`)
|
||||
* `max_entries` is the maximum number of entries to fetch **per Weboob
|
||||
backend** (that is per housing website).
|
||||
* `passes` is the number of passes to run on the data. First pass is a basic
|
||||
filtering and using only the informations from the housings list page.
|
||||
Second pass loads any possible information about the filtered flats and does
|
||||
better filtering.
|
||||
* `queries` is a list of queries defined in `flatboob` that should be fetched.
|
||||
* `database` is an SQLAlchemy URI to a database file. Defaults to `null` which
|
||||
means that it will store the database in the default location, in
|
||||
`data_directory`.
|
||||
* `navitia_api_key` is an API token for [Navitia](https://www.navitia.io/)
|
||||
which is required to compute travel times.
|
||||
|
||||
### Constraints
|
||||
|
||||
You can specify constraints, under the `constraints` key. The available
|
||||
constraints are:
|
||||
|
||||
* `area` (in m²), `bedrooms`, `cost` (in currency unit), `rooms`: this is a
|
||||
tuple of `(min, max)` values, defining an interval in which the value should
|
||||
lie. A `null` value means that any value is within this bound.
|
||||
* `postal_codes` is a list of allowed postal codes. You should include any
|
||||
postal code you want, and especially the postal codes close to the precise
|
||||
location you want. You MUST provide some postal codes.
|
||||
* `time_to` is a dictionary of places to compute travel time to them.
|
||||
Typically,
|
||||
```
|
||||
"time_to": {
|
||||
"foobar": {
|
||||
"gps": [LAT, LNG],
|
||||
"time": [min, max]
|
||||
}
|
||||
}
|
||||
```
|
||||
means that the housings must be between the `min` and `max` bounds (possibly
|
||||
`null`) from the place identified by the GPS coordinates `LAT` and `LNG`
|
||||
(latitude and longitude), and we call this place `foobar` in human-readable
|
||||
form. Beware that `time` constraints are in **seconds**.
|
||||
|
||||
|
||||
## OpenData
|
||||
|
||||
I am using the following datasets, available under `flatisfy/data_files`,
|
||||
which covers Paris. If you want to run the script using some other location,
|
||||
you might have to change these files by matching datasets.
|
||||
|
||||
* [LaPoste Hexasmal](https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5) for the list of cities and postal codes in France.
|
||||
* [RATP stations](https://data.ratp.fr/explore/dataset/positions-geographiques-des-stations-du-reseau-ratp/table/?disjunctive.stop_name&disjunctive.code_postal&disjunctive.departement) for the list of subway stations with their positions in Paris and nearby areas.
|
||||
|
||||
Both datasets are licensed under the Open Data Commons Open Database License
|
||||
(ODbL): https://opendatacommons.org/licenses/odbl/.
|
||||
|
||||
|
||||
## License
|
||||
|
||||
The content of this repository is licensed under an MIT license, unless
|
||||
explicitly mentionned otherwise.
|
||||
|
||||
|
||||
## Thanks
|
||||
|
||||
* [Weboob](http://weboob.org/)
|
||||
* The OpenData providers listed above!
|
||||
* Navitia for their really cool public transportation API.
|
||||
* A lots of Python modules, required for this script (see `requirements.txt`).
|
||||
* [Kresus](https://framagit.org/bnjbvr/kresus) which gave me part of the
|
||||
original idea (at least proved me such software based on scraping can
|
||||
achieve a high quality level :)
|
130
flat.py
130
flat.py
@ -1,130 +0,0 @@
|
||||
# coding: utf-8
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from fuzzywuzzy import process as fuzzyprocess
|
||||
|
||||
import config
|
||||
|
||||
|
||||
def pretty_json(json_str):
|
||||
return json.dumps(json_str, indent=4, separators=(',', ': '),
|
||||
sort_keys=True)
|
||||
|
||||
|
||||
def preprocess_data():
|
||||
if not os.path.isdir("build"):
|
||||
os.mkdir("build")
|
||||
|
||||
if not os.path.isfile("build/ratp.json"):
|
||||
ratp_data = []
|
||||
with open("data/ratp.json", "r") as fh:
|
||||
ratp_data = json.load(fh)
|
||||
ratp_data = sorted(
|
||||
list(set(
|
||||
x["fields"]["stop_name"].lower() for x in ratp_data
|
||||
))
|
||||
)
|
||||
with open("build/ratp.json", "w") as fh:
|
||||
fh.write(pretty_json(ratp_data))
|
||||
|
||||
|
||||
def fetch_flats_list():
|
||||
flats_list = []
|
||||
for query in config.QUERIES:
|
||||
flatboob_output = subprocess.check_output(
|
||||
["flatboob", "-n", "0", "-f", "json", "load", query]
|
||||
)
|
||||
flats_list.extend(json.loads(flatboob_output))
|
||||
return flats_list
|
||||
|
||||
|
||||
def remove_duplicates(flats_list):
|
||||
unique_flats_list = []
|
||||
ids = []
|
||||
for flat in flats_list:
|
||||
if flat["id"] in ids:
|
||||
continue
|
||||
ids.append(id)
|
||||
unique_flats_list.append(flat)
|
||||
return unique_flats_list
|
||||
|
||||
|
||||
def sort_by(flats_list, key="cost"):
|
||||
return sorted(flats_list, key=lambda x: x["cost"])
|
||||
|
||||
|
||||
def refine_params(flats_list):
|
||||
def filter_conditions(x):
|
||||
is_ok = True
|
||||
if "cost" in x:
|
||||
cost = x["cost"]
|
||||
is_ok = (
|
||||
is_ok and
|
||||
(cost < config.PARAMS["max_cost"] and
|
||||
cost > config.PARAMS["min_cost"])
|
||||
)
|
||||
if "area" in x:
|
||||
area = x["area"]
|
||||
is_ok = (
|
||||
is_ok and
|
||||
(area < config.PARAMS["max_area"] and
|
||||
area > config.PARAMS["min_area"])
|
||||
)
|
||||
return is_ok
|
||||
|
||||
return filter(filter_conditions, flats_list)
|
||||
|
||||
|
||||
def match_ratp(flats_list):
|
||||
ratp_stations = []
|
||||
with open("build/ratp.json", "r") as fh:
|
||||
ratp_stations = json.load(fh)
|
||||
|
||||
for flat in flats_list:
|
||||
if "station" in flat and flat["station"]:
|
||||
# There is some station fetched by flatboob, try to match it
|
||||
flat["ratp_station"] = fuzzyprocess.extractOne(
|
||||
flat["station"], ratp_stations
|
||||
)
|
||||
# TODO: Cross-check station location to choose the best fit
|
||||
|
||||
return flats_list
|
||||
|
||||
|
||||
def main(dumpfile=None):
|
||||
if dumpfile is None:
|
||||
flats_list = fetch_flats_list()
|
||||
else:
|
||||
with open(dumpfile, "r") as fh:
|
||||
flats_list = json.load(fh)
|
||||
|
||||
# First pass
|
||||
flats_list = remove_duplicates(flats_list)
|
||||
flats_list = sort_by(flats_list, "cost")
|
||||
flats_list = refine_params(flats_list)
|
||||
|
||||
# TODO: flats_list = match_ratp(flats_list)
|
||||
|
||||
# TODO: Second pass, loading additional infos for each entry
|
||||
|
||||
return flats_list
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1:
|
||||
dumpfile = sys.argv[1]
|
||||
else:
|
||||
dumpfile = None
|
||||
|
||||
try:
|
||||
preprocess_data()
|
||||
flats_list = main(dumpfile)
|
||||
print(
|
||||
pretty_json(flats_list)
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
5
flatisfy/__init__.py
Normal file
5
flatisfy/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
``Flatisfy`` is a tool to help you find a new housing based on some criteria.
|
||||
"""
|
||||
__version__ = "0.1"
|
176
flatisfy/__main__.py
Normal file
176
flatisfy/__main__.py
Normal file
@ -0,0 +1,176 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
Main entry point of the Flatisfy code.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import flatisfy.config
|
||||
from flatisfy import cmds
|
||||
from flatisfy import data
|
||||
from flatisfy import tools
|
||||
|
||||
|
||||
LOGGER = logging.getLogger("flatisfy")
|
||||
|
||||
|
||||
def parse_args(argv=None):
|
||||
"""
|
||||
Create parser and parse arguments.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(prog="Flatisfy",
|
||||
description="Find the perfect flat.")
|
||||
|
||||
# Parent parser containing arguments common to any subcommand
|
||||
parent_parser = argparse.ArgumentParser(add_help=False)
|
||||
parent_parser.add_argument(
|
||||
"--data-dir",
|
||||
help="Location of Flatisfy data directory."
|
||||
)
|
||||
parent_parser.add_argument(
|
||||
"--config",
|
||||
help="Configuration file to use."
|
||||
)
|
||||
parent_parser.add_argument(
|
||||
"--passes", choices=[0, 1, 2], type=int,
|
||||
help="Number of passes to do on the filtered data."
|
||||
)
|
||||
parent_parser.add_argument(
|
||||
"--max-entries", type=int,
|
||||
help="Maximum number of entries to fetch."
|
||||
)
|
||||
parent_parser.add_argument(
|
||||
"-v", "--verbose", action="store_true",
|
||||
help="Verbose logging output."
|
||||
)
|
||||
parent_parser.add_argument(
|
||||
"-vv", action="store_true",
|
||||
help="Debug logging output."
|
||||
)
|
||||
|
||||
# Subcommands
|
||||
subparsers = parser.add_subparsers(
|
||||
dest="cmd", help="Available subcommands"
|
||||
)
|
||||
|
||||
# Build data subcommand
|
||||
subparsers.add_parser(
|
||||
"build-data", parents=[parent_parser],
|
||||
help="Build necessary data"
|
||||
)
|
||||
|
||||
# Init config subcommand
|
||||
parser_init_config = subparsers.add_parser(
|
||||
"init-config", parents=[parent_parser],
|
||||
help="Initialize empty configuration."
|
||||
)
|
||||
parser_init_config.add_argument(
|
||||
"output", nargs="?", help="Output config file. Use '-' for stdout."
|
||||
)
|
||||
|
||||
# Fetch subcommand parser
|
||||
subparsers.add_parser("fetch", parents=[parent_parser],
|
||||
help="Fetch housings posts")
|
||||
|
||||
# Filter subcommand parser
|
||||
parser_filter = subparsers.add_parser("filter", parents=[parent_parser],
|
||||
help=(
|
||||
"Filter housings posts. No "
|
||||
"fetching of additional infos "
|
||||
"is done."))
|
||||
parser_filter.add_argument(
|
||||
"input",
|
||||
help="JSON dump of the housings post to filter."
|
||||
)
|
||||
|
||||
# Import subcommand parser
|
||||
subparsers.add_parser("import", parents=[parent_parser],
|
||||
help="Import housing posts in database.")
|
||||
|
||||
# Serve subcommand parser
|
||||
parser_serve = subparsers.add_parser("serve", parents=[parent_parser],
|
||||
help="Serve the web app.")
|
||||
parser_serve.add_argument("--port", type=int, help="Port to bind to.")
|
||||
parser_serve.add_argument("--host", help="Host to listen on.")
|
||||
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main module code.
|
||||
"""
|
||||
# Parse arguments
|
||||
args = parse_args()
|
||||
|
||||
# Set logger
|
||||
if args.vv:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG)
|
||||
elif args.verbose:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
# sqlalchemy INFO level is way too loud, just stick with WARNING
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
|
||||
else:
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
|
||||
|
||||
# Init-config command
|
||||
if args.cmd == "init-config":
|
||||
flatisfy.config.init_config(args.output)
|
||||
sys.exit(0)
|
||||
else:
|
||||
# Load config
|
||||
config = flatisfy.config.load_config(args)
|
||||
if config is None:
|
||||
LOGGER.error("Invalid configuration. Exiting. "
|
||||
"Run init-config before if this is the first time "
|
||||
"you run Flatisfy.")
|
||||
sys.exit(1)
|
||||
|
||||
# Build data files
|
||||
try:
|
||||
if args.cmd == "build-data":
|
||||
data.preprocess_data(config, force=True)
|
||||
sys.exit(0)
|
||||
else:
|
||||
data.preprocess_data(config)
|
||||
except flatisfy.exceptions.DataBuildError:
|
||||
sys.exit(1)
|
||||
|
||||
# Fetch command
|
||||
if args.cmd == "fetch":
|
||||
# Fetch and filter flats list
|
||||
flats_list, _ = cmds.fetch_and_filter(config)
|
||||
# Sort by cost
|
||||
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost")
|
||||
|
||||
print(
|
||||
tools.pretty_json(flats_list)
|
||||
)
|
||||
# Filter command
|
||||
elif args.cmd == "filter":
|
||||
# Load and filter flats list
|
||||
flats_list = cmds.load_and_filter(args.input, config)
|
||||
# Sort by cost
|
||||
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost")
|
||||
|
||||
print(
|
||||
tools.pretty_json(flats_list)
|
||||
)
|
||||
# Import command
|
||||
elif args.cmd == "import":
|
||||
cmds.import_and_filter(config)
|
||||
# Serve command
|
||||
elif args.cmd == "serve":
|
||||
cmds.serve(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
110
flatisfy/cmds.py
Normal file
110
flatisfy/cmds.py
Normal file
@ -0,0 +1,110 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
Main commands available for flatisfy.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import flatisfy.filters
|
||||
from flatisfy import database
|
||||
from flatisfy.models import flat as flat_model
|
||||
from flatisfy import fetch
|
||||
from flatisfy import tools
|
||||
from flatisfy.web import app as web_app
|
||||
|
||||
|
||||
def fetch_and_filter(config):
|
||||
"""
|
||||
Fetch the available flats list. Then, filter it according to criteria.
|
||||
|
||||
:param config: A config dict.
|
||||
:return: A tuple of the list of all matching flats and the list of ignored
|
||||
flats.
|
||||
"""
|
||||
# TODO: Reduce load on housings listing websites
|
||||
# Fetch flats list with flatboobs
|
||||
flats_list = fetch.fetch_flats_list(config)
|
||||
|
||||
# Do a first pass with the available infos to try to remove as much
|
||||
# unwanted postings as possible
|
||||
if config["passes"] > 0:
|
||||
flats_list, ignored_flats = flatisfy.filters.first_pass(flats_list,
|
||||
config)
|
||||
|
||||
# Do a second pass to consolidate all the infos we found and make use of
|
||||
# additional infos
|
||||
if config["passes"] > 1:
|
||||
# Load additional infos
|
||||
for flat in flats_list:
|
||||
details = fetch.fetch_details(flat["id"])
|
||||
flat = tools.merge_dicts(flat, details)
|
||||
|
||||
flats_list, extra_ignored_flats = flatisfy.filters.second_pass(
|
||||
flats_list, config
|
||||
)
|
||||
ignored_flats.extend(extra_ignored_flats)
|
||||
|
||||
return flats_list, ignored_flats
|
||||
|
||||
|
||||
def load_and_filter(housing_file, config):
|
||||
"""
|
||||
Load the dumped flats list. Then, filter it according to criteria.
|
||||
|
||||
:param housing_file: The JSON file to load flats from.
|
||||
:param config: A config dict.
|
||||
:return: A tuple of the list of all matching flats and the list of ignored
|
||||
flats.
|
||||
"""
|
||||
# Load flats list
|
||||
flats_list = fetch.load_flats_list(housing_file)
|
||||
|
||||
# Do a first pass with the available infos to try to remove as much
|
||||
# unwanted postings as possible
|
||||
if config["passes"] > 0:
|
||||
flats_list, ignored_flats = flatisfy.filters.first_pass(flats_list,
|
||||
config)
|
||||
|
||||
# Do a second pass to consolidate all the infos we found
|
||||
if config["passes"] > 1:
|
||||
flats_list, extra_ignored_flats = flatisfy.filters.second_pass(
|
||||
flats_list, config
|
||||
)
|
||||
ignored_flats.extend(extra_ignored_flats)
|
||||
|
||||
return flats_list, ignored_flats
|
||||
|
||||
|
||||
def import_and_filter(config):
|
||||
"""
|
||||
Fetch the available flats list. Then, filter it according to criteria.
|
||||
Finally, store it in the database.
|
||||
|
||||
:param config: A config dict.
|
||||
:return: ``None``.
|
||||
"""
|
||||
# Fetch and filter flats list
|
||||
flats_list, purged_list = fetch_and_filter(config)
|
||||
# Create database connection
|
||||
get_session = database.init_db(config["database"])
|
||||
|
||||
with get_session() as session:
|
||||
for flat_dict in flats_list:
|
||||
flat = flat_model.Flat.from_dict(flat_dict)
|
||||
session.merge(flat)
|
||||
|
||||
for flat_dict in purged_list:
|
||||
flat = flat_model.Flat.from_dict(flat_dict)
|
||||
flat.status = flat_model.FlatStatus.purged
|
||||
session.merge(flat)
|
||||
|
||||
|
||||
def serve(config):
|
||||
"""
|
||||
Serve the web app.
|
||||
|
||||
:param config: A config dict.
|
||||
:return: ``None``, long-running process.
|
||||
"""
|
||||
app = web_app.get_app(config)
|
||||
# TODO: Make Bottle use logging module
|
||||
app.run(host=config["host"], port=config["port"])
|
208
flatisfy/config.py
Normal file
208
flatisfy/config.py
Normal file
@ -0,0 +1,208 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module handles the configuration management for Flatisfy.
|
||||
|
||||
It loads the default configuration, then overloads it with the provided config
|
||||
file and then overloads it with command-line options.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
from builtins import str
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
import appdirs
|
||||
|
||||
from flatisfy import tools
|
||||
|
||||
|
||||
# Default configuration
|
||||
DEFAULT_CONFIG = {
|
||||
# Flatboob queries to fetch
|
||||
"queries": [],
|
||||
# Constraints to match
|
||||
"constraints": {
|
||||
"postal_codes": [], # List of postal codes
|
||||
"area": (None, None), # (min, max) in m^2
|
||||
"cost": (None, None), # (min, max) in currency unit
|
||||
"rooms": (None, None), # (min, max)
|
||||
"bedrooms": (None, None), # (min, max)
|
||||
"time_to": {} # Dict mapping names to {"gps": [lat, lng],
|
||||
# "time": (min, max) }
|
||||
# Time is in seconds
|
||||
},
|
||||
# Navitia API key
|
||||
"navitia_api_key": None,
|
||||
# Number of filtering passes to run
|
||||
"passes": 2,
|
||||
# Maximum number of entries to fetch
|
||||
"max_entries": None,
|
||||
# Directory in wich data will be put. ``None`` is XDG default location.
|
||||
"data_directory": None,
|
||||
# SQLAlchemy URI to the database to use
|
||||
"database": None,
|
||||
# Web app port
|
||||
"port": 8080,
|
||||
# Web app host to listen on
|
||||
"host": "127.0.0.1"
|
||||
}
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def validate_config(config):
|
||||
"""
|
||||
Check that the config passed as argument is a valid configuration.
|
||||
|
||||
:param config: A config dictionary to fetch.
|
||||
:return: ``True`` if the configuration is valid, ``False`` otherwise.
|
||||
"""
|
||||
def _check_constraints_bounds(bounds):
|
||||
"""
|
||||
Check the bounds for numeric constraints.
|
||||
"""
|
||||
assert len(bounds) == 2
|
||||
assert all(
|
||||
x is None or
|
||||
(
|
||||
(isinstance(x, int) or isinstance(x, float)) and
|
||||
x >= 0
|
||||
)
|
||||
for x in bounds
|
||||
)
|
||||
if bounds[0] is not None and bounds[1] is not None:
|
||||
assert bounds[1] > bounds[0]
|
||||
|
||||
try:
|
||||
# Note: The traceback fetching code only handle single line asserts.
|
||||
# Then, we disable line-too-long pylint check and E501 flake8 checks
|
||||
# and use long lines whenever needed, in order to have the full assert
|
||||
# message in the log output.
|
||||
# pylint: disable=line-too-long
|
||||
assert "postal_codes" in config["constraints"]
|
||||
assert len(config["constraints"]["postal_codes"]) > 0
|
||||
|
||||
assert "area" in config["constraints"]
|
||||
_check_constraints_bounds(config["constraints"]["area"])
|
||||
|
||||
assert "cost" in config["constraints"]
|
||||
_check_constraints_bounds(config["constraints"]["cost"])
|
||||
|
||||
assert "rooms" in config["constraints"]
|
||||
_check_constraints_bounds(config["constraints"]["rooms"])
|
||||
|
||||
assert "bedrooms" in config["constraints"]
|
||||
_check_constraints_bounds(config["constraints"]["bedrooms"])
|
||||
|
||||
assert "time_to" in config["constraints"]
|
||||
assert isinstance(config["constraints"]["time_to"], dict)
|
||||
for name, item in config["constraints"]["time_to"].items():
|
||||
assert isinstance(name, str)
|
||||
assert "gps" in item
|
||||
assert isinstance(item["gps"], list)
|
||||
assert len(item["gps"]) == 2
|
||||
assert "time" in item
|
||||
_check_constraints_bounds(item["time"])
|
||||
|
||||
assert config["passes"] in [0, 1, 2]
|
||||
assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0) # noqa: E501
|
||||
|
||||
assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501
|
||||
|
||||
assert config["database"] is None or isinstance(config["database"], str) # noqa: E501
|
||||
|
||||
assert isinstance(config["port"], int)
|
||||
assert isinstance(config["host"], str)
|
||||
|
||||
return True
|
||||
except (AssertionError, KeyError):
|
||||
_, _, exc_traceback = sys.exc_info()
|
||||
return traceback.extract_tb(exc_traceback)[-1][-1]
|
||||
|
||||
|
||||
def load_config(args=None):
|
||||
"""
|
||||
Load the configuration from file.
|
||||
|
||||
:param args: An argparse args structure.
|
||||
:return: The loaded config dict.
|
||||
"""
|
||||
LOGGER.info("Initializing configuration...")
|
||||
# Default configuration
|
||||
config_data = DEFAULT_CONFIG.copy()
|
||||
|
||||
# Load config from specified JSON
|
||||
if args and getattr(args, "config", None):
|
||||
LOGGER.debug("Loading configuration from %s.", args.config)
|
||||
try:
|
||||
with open(args.config, "r") as fh:
|
||||
config_data.update(json.load(fh))
|
||||
except (IOError, ValueError):
|
||||
LOGGER.error(
|
||||
"Unable to load configuration from file, "
|
||||
"using default configuration."
|
||||
)
|
||||
|
||||
# Overload config with arguments
|
||||
if args and getattr(args, "passes", None) is not None:
|
||||
LOGGER.debug(
|
||||
"Overloading number of passes from CLI arguments: %d.",
|
||||
args.passes
|
||||
)
|
||||
config_data["passes"] = args.passes
|
||||
if args and getattr(args, "max_entries", None) is not None:
|
||||
LOGGER.debug(
|
||||
"Overloading maximum number of entries from CLI arguments: %d.",
|
||||
args.max_entries
|
||||
)
|
||||
config_data["max_entries"] = args.max_entries
|
||||
if args and getattr(args, "port", None) is not None:
|
||||
LOGGER.debug("Overloading web app port: %d.", args.port)
|
||||
config_data["port"] = args.port
|
||||
if args and getattr(args, "host", None) is not None:
|
||||
LOGGER.debug("Overloading web app host: %s.", args.host)
|
||||
config_data["host"] = str(args.host)
|
||||
|
||||
# Handle data_directory option
|
||||
if args and getattr(args, "data_dir", None) is not None:
|
||||
LOGGER.debug("Overloading data directory from CLI arguments.")
|
||||
config_data["data_directory"] = args.data_dir
|
||||
elif config_data["data_directory"] is None:
|
||||
config_data["data_directory"] = appdirs.user_data_dir(
|
||||
"flatisfy",
|
||||
"flatisfy"
|
||||
)
|
||||
LOGGER.debug("Using default XDG data directory: %s.",
|
||||
config_data["data_directory"])
|
||||
|
||||
if config_data["database"] is None:
|
||||
config_data["database"] = "sqlite:///" + os.path.join(
|
||||
config_data["data_directory"],
|
||||
"flatisfy.db"
|
||||
)
|
||||
|
||||
config_validation = validate_config(config_data)
|
||||
if config_validation is True:
|
||||
LOGGER.info("Config has been fully initialized.")
|
||||
return config_data
|
||||
else:
|
||||
LOGGER.error("Error in configuration: %s.", config_validation)
|
||||
return None
|
||||
|
||||
|
||||
def init_config(output=None):
|
||||
"""
|
||||
Initialize an empty configuration file.
|
||||
|
||||
:param output: File to output content to. Defaults to ``stdin``.
|
||||
"""
|
||||
config_data = DEFAULT_CONFIG.copy()
|
||||
|
||||
if output and output != "-":
|
||||
with open(output, "w") as fh:
|
||||
fh.write(tools.pretty_json(config_data))
|
||||
else:
|
||||
print(tools.pretty_json(config_data))
|
163
flatisfy/data.py
Normal file
163
flatisfy/data.py
Normal file
@ -0,0 +1,163 @@
|
||||
# coding : utf-8
|
||||
"""
|
||||
This module contains all the code related to building necessary data files from
|
||||
the source opendata files.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import collections
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
import flatisfy.exceptions
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
|
||||
def _preprocess_ratp(output_dir):
|
||||
"""
|
||||
Build RATP file from the RATP data.
|
||||
|
||||
:param output_dir: Directory in which the output file should reside.
|
||||
:return: ``True`` on successful build, ``False`` otherwise.
|
||||
"""
|
||||
ratp_data_raw = []
|
||||
# Load opendata file
|
||||
try:
|
||||
with open(os.path.join(MODULE_DIR, "data_files/ratp.json"), "r") as fh:
|
||||
ratp_data_raw = json.load(fh)
|
||||
except (IOError, ValueError):
|
||||
LOGGER.error("Invalid raw RATP opendata file.")
|
||||
return False
|
||||
|
||||
# Process it
|
||||
ratp_data = collections.defaultdict(list)
|
||||
for item in ratp_data_raw:
|
||||
stop_name = item["fields"]["stop_name"].lower()
|
||||
ratp_data[stop_name].append(item["fields"]["coord"])
|
||||
|
||||
# Output it
|
||||
with open(os.path.join(output_dir, "ratp.json"), "w") as fh:
|
||||
json.dump(ratp_data, fh)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _preprocess_laposte(output_dir):
|
||||
"""
|
||||
Build JSON files from the postal codes data.
|
||||
|
||||
:param output_dir: Directory in which the output file should reside.
|
||||
:return: ``True`` on successful build, ``False`` otherwise.
|
||||
"""
|
||||
raw_laposte_data = []
|
||||
# Load opendata file
|
||||
try:
|
||||
with open(
|
||||
os.path.join(MODULE_DIR, "data_files/laposte.json"), "r"
|
||||
) as fh:
|
||||
raw_laposte_data = json.load(fh)
|
||||
except (IOError, ValueError):
|
||||
LOGGER.error("Invalid raw LaPoste opendata file.")
|
||||
return False
|
||||
|
||||
# Build postal codes to other infos file
|
||||
postal_codes_data = {}
|
||||
for item in raw_laposte_data:
|
||||
try:
|
||||
postal_codes_data[item["fields"]["code_postal"]] = {
|
||||
"gps": item["fields"]["coordonnees_gps"],
|
||||
"nom": item["fields"]["nom_de_la_commune"].title()
|
||||
}
|
||||
except KeyError:
|
||||
LOGGER.info("Missing data for postal code %s, skipping it.",
|
||||
item["fields"]["code_postal"])
|
||||
with open(os.path.join(output_dir, "postal_codes.json"), "w") as fh:
|
||||
json.dump(postal_codes_data, fh)
|
||||
|
||||
# Build city name to postal codes and other infos file
|
||||
cities_data = {}
|
||||
for item in raw_laposte_data:
|
||||
try:
|
||||
cities_data[item["fields"]["nom_de_la_commune"].title()] = {
|
||||
"gps": item["fields"]["coordonnees_gps"],
|
||||
"postal_code": item["fields"]["code_postal"]
|
||||
}
|
||||
except KeyError:
|
||||
LOGGER.info("Missing data for city %s, skipping it.",
|
||||
item["fields"]["nom_de_la_commune"])
|
||||
with open(os.path.join(output_dir, "cities.json"), "w") as fh:
|
||||
json.dump(cities_data, fh)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def preprocess_data(config, force=False):
|
||||
"""
|
||||
Ensures that all the necessary data files have been built from the raw
|
||||
opendata files.
|
||||
|
||||
:params config: A config dictionary.
|
||||
:params force: Whether to force rebuild or not.
|
||||
"""
|
||||
LOGGER.debug("Data directory is %s.", config["data_directory"])
|
||||
opendata_directory = os.path.join(config["data_directory"], "opendata")
|
||||
try:
|
||||
LOGGER.info("Ensuring the data directory exists.")
|
||||
os.makedirs(opendata_directory)
|
||||
LOGGER.debug("Created opendata directory at %s.", opendata_directory)
|
||||
except OSError:
|
||||
LOGGER.debug("Opendata directory already existed, doing nothing.")
|
||||
|
||||
is_built_ratp = os.path.isfile(
|
||||
os.path.join(opendata_directory, "ratp.json")
|
||||
)
|
||||
if not is_built_ratp or force:
|
||||
LOGGER.info("Building from RATP data.")
|
||||
if not _preprocess_ratp(opendata_directory):
|
||||
raise flatisfy.exceptions.DataBuildError("Error with RATP data.")
|
||||
|
||||
is_built_laposte = (
|
||||
os.path.isfile(os.path.join(opendata_directory, "cities.json")) and
|
||||
os.path.isfile(os.path.join(opendata_directory, "postal_codes.json"))
|
||||
)
|
||||
if not is_built_laposte or force:
|
||||
LOGGER.info("Building from LaPoste data.")
|
||||
if not _preprocess_laposte(opendata_directory):
|
||||
raise flatisfy.exceptions.DataBuildError(
|
||||
"Error with LaPoste data."
|
||||
)
|
||||
|
||||
|
||||
def load_data(data_type, config):
|
||||
"""
|
||||
Load a given built data file.
|
||||
|
||||
:param data_type: A valid data identifier.
|
||||
:param config: A config dictionary.
|
||||
:return: The loaded data. ``None`` if the query is incorrect.
|
||||
"""
|
||||
if data_type not in ["postal_codes", "cities", "ratp"]:
|
||||
LOGGER.error("Invalid request. No %s data file.", data_type)
|
||||
return None
|
||||
|
||||
opendata_directory = os.path.join(config["data_directory"], "opendata")
|
||||
datafile_path = os.path.join(opendata_directory, "%s.json" % data_type)
|
||||
data = {}
|
||||
try:
|
||||
with open(datafile_path, "r") as fh:
|
||||
data = json.load(fh)
|
||||
except IOError:
|
||||
LOGGER.error("No such data file: %s.", datafile_path)
|
||||
return None
|
||||
except ValueError:
|
||||
LOGGER.error("Invalid JSON data file: %s.", datafile_path)
|
||||
return None
|
||||
|
||||
if len(data) == 0:
|
||||
LOGGER.warning("Loading empty data for %s.", data_type)
|
||||
|
||||
return data
|
1
flatisfy/data_files/laposte.json
Normal file
1
flatisfy/data_files/laposte.json
Normal file
File diff suppressed because one or more lines are too long
64
flatisfy/database/__init__.py
Normal file
64
flatisfy/database/__init__.py
Normal file
@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains functions related to the database.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import sqlite3
|
||||
|
||||
from contextlib import contextmanager
|
||||
|
||||
from sqlalchemy import event, create_engine
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
import flatisfy.models.flat # noqa: F401
|
||||
from flatisfy.database.base import BASE
|
||||
|
||||
|
||||
@event.listens_for(Engine, "connect")
|
||||
def set_sqlite_pragma(dbapi_connection, _):
|
||||
"""
|
||||
Auto enable foreign keys for SQLite.
|
||||
"""
|
||||
# Play well with other DB backends
|
||||
if isinstance(dbapi_connection, sqlite3.Connection):
|
||||
cursor = dbapi_connection.cursor()
|
||||
cursor.execute("PRAGMA foreign_keys=ON")
|
||||
cursor.close()
|
||||
|
||||
|
||||
def init_db(database_uri=None):
|
||||
"""
|
||||
Initialize the database, ensuring tables exist etc.
|
||||
|
||||
:param database_uri: An URI describing an engine to use. Defaults to
|
||||
in-memory SQLite database.
|
||||
:return: A tuple of an SQLAlchemy session maker and the created engine.
|
||||
"""
|
||||
if database_uri is None:
|
||||
database_uri = "sqlite:///:memory:"
|
||||
|
||||
engine = create_engine(database_uri)
|
||||
BASE.metadata.create_all(engine, checkfirst=True)
|
||||
Session = sessionmaker(bind=engine) # pylint: disable=invalid-name
|
||||
|
||||
@contextmanager
|
||||
def get_session():
|
||||
"""
|
||||
Provide a transactional scope around a series of operations.
|
||||
|
||||
From [1].
|
||||
[1]: http://docs.sqlalchemy.org/en/latest/orm/session_basics.html#when-do-i-construct-a-session-when-do-i-commit-it-and-when-do-i-close-it.
|
||||
"""
|
||||
session = Session()
|
||||
try:
|
||||
yield session
|
||||
session.commit()
|
||||
except:
|
||||
session.rollback()
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
return get_session
|
10
flatisfy/database/base.py
Normal file
10
flatisfy/database/base.py
Normal file
@ -0,0 +1,10 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains the definition of the declarative SQLAlchemy base.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
|
||||
BASE = declarative_base()
|
48
flatisfy/database/types.py
Normal file
48
flatisfy/database/types.py
Normal file
@ -0,0 +1,48 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This modules implements custom types in SQLAlchemy.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
import sqlalchemy.types as types
|
||||
|
||||
|
||||
class StringyJSON(types.TypeDecorator):
|
||||
"""
|
||||
Stores and retrieves JSON as TEXT for SQLite.
|
||||
|
||||
From
|
||||
https://avacariu.me/articles/2016/compiling-json-as-text-for-sqlite-with-sqlalchemy.
|
||||
|
||||
.. note :: The associated field is immutable. That is, changes to the data
|
||||
(typically, changing the value of a dict field) will not trigger an update
|
||||
on the SQL side upon ``commit`` as the reference to the object will not
|
||||
have been updated. One should force the update by forcing an update of the
|
||||
reference (by performing a ``copy`` operation on the dict for instance).
|
||||
"""
|
||||
|
||||
impl = types.TEXT
|
||||
|
||||
def process_bind_param(self, value, dialect):
|
||||
"""
|
||||
TODO
|
||||
"""
|
||||
if value is not None:
|
||||
value = json.dumps(value)
|
||||
return value
|
||||
|
||||
def process_result_value(self, value, dialect):
|
||||
"""
|
||||
TODO
|
||||
"""
|
||||
if value is not None:
|
||||
value = json.loads(value)
|
||||
return value
|
||||
|
||||
|
||||
# TypeEngine.with_variant says "use StringyJSON instead when
|
||||
# connecting to 'sqlite'"
|
||||
# pylint: disable=invalid-name
|
||||
MagicJSON = types.JSON().with_variant(StringyJSON, 'sqlite')
|
13
flatisfy/exceptions.py
Normal file
13
flatisfy/exceptions.py
Normal file
@ -0,0 +1,13 @@
|
||||
# coding : utf-8
|
||||
"""
|
||||
This module contains all the exceptions definitions for the Flatisfy-specific
|
||||
exceptions.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
|
||||
class DataBuildError(Exception):
|
||||
"""
|
||||
Error occurring on building a data file.
|
||||
"""
|
||||
pass
|
76
flatisfy/fetch.py
Normal file
76
flatisfy/fetch.py
Normal file
@ -0,0 +1,76 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains all the code related to fetching and loading flats lists.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fetch_flats_list(config):
|
||||
"""
|
||||
Fetch the available flats using the Flatboob / Weboob config.
|
||||
|
||||
:param config: A config dict.
|
||||
:return: A list of all available flats.
|
||||
"""
|
||||
flats_list = []
|
||||
for query in config["queries"]:
|
||||
max_entries = config["max_entries"]
|
||||
if max_entries is None:
|
||||
max_entries = 0
|
||||
|
||||
LOGGER.info("Loading flats from query %s.", query)
|
||||
flatboob_output = subprocess.check_output(
|
||||
["../weboob/tools/local_run.sh", "../weboob/scripts/flatboob",
|
||||
"-n", str(max_entries), "-f", "json", "load", query]
|
||||
)
|
||||
query_flats_list = json.loads(flatboob_output)
|
||||
LOGGER.info("Fetched %d flats.", len(query_flats_list))
|
||||
flats_list.extend(query_flats_list)
|
||||
LOGGER.info("Fetched a total of %d flats.", len(flats_list))
|
||||
return flats_list
|
||||
|
||||
|
||||
def fetch_details(flat_id):
|
||||
"""
|
||||
Fetch the additional details for a flat using Flatboob / Weboob.
|
||||
|
||||
:param flat_id: ID of the flat to fetch details for.
|
||||
:return: A flat dict with all the available data.
|
||||
"""
|
||||
LOGGER.info("Loading additional details for flat %s.", flat_id)
|
||||
flatboob_output = subprocess.check_output(
|
||||
["../weboob/tools/local_run.sh", "../weboob/scripts/flatboob",
|
||||
"-f", "json", "info", flat_id]
|
||||
)
|
||||
flat_details = json.loads(flatboob_output)
|
||||
LOGGER.info("Fetched details for flat %s.", flat_id)
|
||||
|
||||
if flat_details:
|
||||
flat_details = flat_details[0]
|
||||
|
||||
return flat_details
|
||||
|
||||
|
||||
def load_flats_list(json_file):
|
||||
"""
|
||||
Load a dumped flats list from JSON file.
|
||||
|
||||
:param json_file: The file to load housings list from.
|
||||
:return: A list of all the flats in the dump file.
|
||||
"""
|
||||
flats_list = []
|
||||
try:
|
||||
LOGGER.info("Loading flats list from file %s", json_file)
|
||||
with open(json_file, "r") as fh:
|
||||
flats_list = json.load(fh)
|
||||
LOGGER.info("Found %d flats.", len(flats_list))
|
||||
except (IOError, ValueError):
|
||||
LOGGER.error("File %s is not a valid dump file.", json_file)
|
||||
return flats_list
|
153
flatisfy/filters/__init__.py
Normal file
153
flatisfy/filters/__init__.py
Normal file
@ -0,0 +1,153 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains all the filtering functions. It exposes ``first_pass`` and
|
||||
``second_pass`` functions which are a set of filters applied during the first
|
||||
pass and the second pass.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import logging
|
||||
|
||||
from flatisfy import tools
|
||||
from flatisfy.filters import duplicates
|
||||
from flatisfy.filters import metadata
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def refine_with_housing_criteria(flats_list, config):
|
||||
"""
|
||||
Filter a list of flats according to criteria.
|
||||
|
||||
Housings posts websites tend to return broader results that what was
|
||||
actually asked for. Then, we should filter out the list to match the
|
||||
user criteria, and avoid exposing unwanted flats.
|
||||
|
||||
:param flats_list: A list of flats dict to filter.
|
||||
:param config: A config dict.
|
||||
:return: A tuple of flats to keep and flats to delete.
|
||||
"""
|
||||
# For each flat, the associated `is_ok` value indicate whether it should be
|
||||
# kept or discarded.
|
||||
is_ok = [True for _ in flats_list]
|
||||
|
||||
for i, flat in enumerate(flats_list):
|
||||
# Check postal code
|
||||
postal_code = flat["flatisfy"].get("postal_code", None)
|
||||
if (
|
||||
postal_code and
|
||||
postal_code not in config["constraints"]["postal_codes"]
|
||||
):
|
||||
LOGGER.info("Postal code for flat %s is out of range.", flat["id"])
|
||||
is_ok[i] = is_ok[i] and False
|
||||
|
||||
# Check time_to
|
||||
for place_name, time in flat["flatisfy"].get("time_to", {}).items():
|
||||
is_within_interval = tools.is_within_interval(
|
||||
time,
|
||||
*(config["constraints"]["time_to"][place_name]["time"])
|
||||
)
|
||||
if not is_within_interval:
|
||||
LOGGER.info("Flat %s is too far from place %s.",
|
||||
flat["id"], place_name)
|
||||
is_ok[i] = is_ok[i] and is_within_interval
|
||||
|
||||
# Check other fields
|
||||
for field in ["area", "cost", "rooms", "bedrooms"]:
|
||||
interval = config["constraints"][field]
|
||||
is_within_interval = tools.is_within_interval(
|
||||
flat.get(field, None),
|
||||
*interval
|
||||
)
|
||||
if not is_within_interval:
|
||||
LOGGER.info("%s for flat %s is out of range.",
|
||||
field.capitalize(), flat["id"])
|
||||
is_ok[i] = is_ok[i] and is_within_interval
|
||||
|
||||
return (
|
||||
[
|
||||
flat
|
||||
for i, flat in enumerate(flats_list)
|
||||
if is_ok[i]
|
||||
],
|
||||
[
|
||||
flat
|
||||
for i, flat in enumerate(flats_list)
|
||||
if not is_ok[i]
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def first_pass(flats_list, config):
|
||||
"""
|
||||
First filtering pass.
|
||||
|
||||
Flatboob only fetches data from the listing of the available housing. Then,
|
||||
we should do a first pass to filter based on the already available data and
|
||||
only request more data for the remaining housings.
|
||||
|
||||
:param flats_list: A list of flats dict to filter.
|
||||
:param config: A config dict.
|
||||
:return: A tuple of processed flats and purged flats.
|
||||
"""
|
||||
LOGGER.info("Running first filtering pass.")
|
||||
# Handle duplicates based on ids
|
||||
# Just remove them (no merge) as they should be the exact same object.
|
||||
flats_list = duplicates.detect(
|
||||
flats_list, key="id", merge=False
|
||||
)
|
||||
# Also merge duplicates based on url (these may come from different
|
||||
# flatboob backends)
|
||||
# This is especially useful as some websites such as entreparticuliers
|
||||
# contains a lot of leboncoin housings posts.
|
||||
flats_list = duplicates.detect(
|
||||
flats_list, key="url", merge=True
|
||||
)
|
||||
|
||||
# Add the flatisfy metadata entry
|
||||
flats_list = metadata.init(flats_list)
|
||||
# Guess the postal codes
|
||||
flats_list = metadata.guess_postal_code(flats_list, config)
|
||||
# Try to match with stations
|
||||
flats_list = metadata.guess_stations(flats_list, config)
|
||||
# Remove returned housing posts that do not match criteria
|
||||
flats_list, purged_list = refine_with_housing_criteria(flats_list, config)
|
||||
|
||||
return (flats_list, purged_list)
|
||||
|
||||
|
||||
def second_pass(flats_list, config):
|
||||
"""
|
||||
Second filtering pass.
|
||||
|
||||
This pass is expected to have as most information as possible on the
|
||||
available housings. Plus it runs after first pass which already
|
||||
consolidated data.
|
||||
|
||||
It should consolidate everything and try to extract as many data as
|
||||
possible from the fetched housings.
|
||||
|
||||
:param flats_list: A list of flats dict to filter.
|
||||
:param config: A config dict.
|
||||
:return: A tuple of processed flats and purged flats.
|
||||
"""
|
||||
LOGGER.info("Running second filtering pass.")
|
||||
# Assumed to run after first pass, so there should be no obvious duplicates
|
||||
# left and we already tried to find postal code and nearby stations.
|
||||
|
||||
# Confirm postal code
|
||||
flats_list = metadata.guess_postal_code(flats_list, config)
|
||||
|
||||
# TODO: Guess the address
|
||||
|
||||
# Better match with stations (confirm and check better)
|
||||
flats_list = metadata.guess_stations(flats_list, config)
|
||||
|
||||
# Compute travel time to specified points
|
||||
flats_list = metadata.compute_travel_times(flats_list, config)
|
||||
|
||||
# Remove returned housing posts that do not match criteria
|
||||
flats_list, purged_list = refine_with_housing_criteria(flats_list, config)
|
||||
|
||||
return (flats_list, purged_list)
|
56
flatisfy/filters/duplicates.py
Normal file
56
flatisfy/filters/duplicates.py
Normal file
@ -0,0 +1,56 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
Filtering functions to detect and merge duplicates.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import collections
|
||||
|
||||
from flatisfy import tools
|
||||
|
||||
|
||||
def detect(flats_list, key="id", merge=True):
|
||||
"""
|
||||
Detect obvious duplicates within a given list of flats.
|
||||
|
||||
There may be duplicates found, as some queries could overlap (especially
|
||||
since when asking for a given place, websites tend to return housings in
|
||||
nearby locations as well). We need to handle them, by either deleting the
|
||||
duplicates (``merge=False``) or merging them together in a single flat
|
||||
object.
|
||||
|
||||
:param flats_list: A list of flats dicts.
|
||||
:param key: The flat dicts key on which the duplicate detection should be
|
||||
done.
|
||||
:param merge: Whether the found duplicates should be merged or we should
|
||||
only keep one of them.
|
||||
|
||||
:return: A deduplicated list of flat dicts.
|
||||
"""
|
||||
# TODO: Keep track of found duplicates?
|
||||
# ``seen`` is a dict mapping aggregating the flats by the deduplication
|
||||
# keys. We basically make buckets of flats for every key value. Flats in
|
||||
# the same bucket should be merged together afterwards.
|
||||
seen = collections.defaultdict(list)
|
||||
for flat in flats_list:
|
||||
seen[flat.get(key, None)].append(flat)
|
||||
|
||||
# Generate the unique flats list based on these buckets
|
||||
unique_flats_list = []
|
||||
for flat_key, matching_flats in seen.items():
|
||||
if flat_key is None:
|
||||
# If the key is None, it means Weboob could not load the data. In
|
||||
# this case, we consider every matching item as being independant
|
||||
# of the others, to avoid over-deduplication.
|
||||
unique_flats_list.extend(matching_flats)
|
||||
else:
|
||||
# Otherwise, check the policy
|
||||
if merge:
|
||||
# If a merge is requested, do the merge
|
||||
unique_flats_list.append(
|
||||
tools.merge_dicts(*matching_flats)
|
||||
)
|
||||
else:
|
||||
# Otherwise, just keep any of them
|
||||
unique_flats_list.append(matching_flats[0])
|
||||
return unique_flats_list
|
349
flatisfy/filters/metadata.py
Normal file
349
flatisfy/filters/metadata.py
Normal file
@ -0,0 +1,349 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
Filtering functions to handle flatisfy-specific metadata.
|
||||
|
||||
This includes functions to guess metadata (postal codes, stations) from the
|
||||
actual fetched data.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from flatisfy import data
|
||||
from flatisfy import tools
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def init(flats_list):
|
||||
"""
|
||||
Create a flatisfy key containing a dict of metadata fetched by flatisfy for
|
||||
each flat in the list.
|
||||
|
||||
:param flats_list: A list of flats dict.
|
||||
:return: The updated list
|
||||
"""
|
||||
for flat in flats_list:
|
||||
if "flatisfy" not in flat:
|
||||
flat["flatisfy"] = {}
|
||||
return flats_list
|
||||
|
||||
|
||||
def fuzzy_match(query, choices, limit=3, threshold=75):
|
||||
"""
|
||||
Custom search for the best element in choices matching the query.
|
||||
|
||||
:param query: The string to match.
|
||||
:param choices: The list of strings to match with.
|
||||
:param limit: The maximum number of items to return.
|
||||
:param threshold: The score threshold to use.
|
||||
|
||||
:return: Tuples of matching items and associated confidence.
|
||||
|
||||
.. note :: This function works by removing any fancy character from the
|
||||
``query`` and ``choices`` strings (replacing any non alphabetic and non
|
||||
numeric characters by space), converting to lower case and normalizing them
|
||||
(collapsing multiple spaces etc). It also converts any roman numerals to
|
||||
decimal system. It then compares the string and look for the longest string
|
||||
in ``choices`` which is a substring of ``query``. The longest one gets a
|
||||
confidence of 100. The shorter ones get a confidence proportional to their
|
||||
length.
|
||||
|
||||
.. seealso :: flatisfy.tools.normalize_string
|
||||
|
||||
.. todo :: Is there a better confidence measure?
|
||||
|
||||
:Example:
|
||||
|
||||
>>> match("Paris 14ème", ["Ris", "ris", "Paris 14"], limit=1)
|
||||
[("Paris 14", 100)
|
||||
|
||||
>>> match( \
|
||||
"Saint-Jacques, Denfert-Rochereau (Colonel Rol-Tanguy), " \
|
||||
"Mouton-Duvernet", \
|
||||
["saint-jacques", "denfert rochereau", "duvernet", "toto"], \
|
||||
limit=4 \
|
||||
)
|
||||
[('denfert rochereau', 100), ('saint-jacques', 76)]
|
||||
"""
|
||||
normalized_query = tools.normalize_string(query)
|
||||
normalized_choices = [tools.normalize_string(choice) for choice in choices]
|
||||
|
||||
# Remove duplicates in the choices list
|
||||
unique_normalized_choices = tools.uniqify(normalized_choices)
|
||||
|
||||
# Get the matches (normalized strings)
|
||||
# Keep only ``limit`` matches.
|
||||
matches = sorted(
|
||||
[
|
||||
(choice, len(choice))
|
||||
for choice in tools.uniqify(unique_normalized_choices)
|
||||
if choice in normalized_query
|
||||
],
|
||||
key=lambda x: x[1],
|
||||
reverse=True
|
||||
)[:limit]
|
||||
|
||||
# Update confidence
|
||||
if matches:
|
||||
max_confidence = max(match[1] for match in matches)
|
||||
matches = [
|
||||
(x[0], int(x[1] / max_confidence * 100))
|
||||
for x in matches
|
||||
]
|
||||
|
||||
# Convert back matches to original strings
|
||||
# Also filter out matches below threshold
|
||||
matches = [
|
||||
(choices[normalized_choices.index(x[0])], x[1])
|
||||
for x in matches
|
||||
if x[1] >= threshold
|
||||
]
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
def guess_postal_code(flats_list, config, distance_threshold=20000):
|
||||
"""
|
||||
Try to guess the postal code from the location of the flats.
|
||||
|
||||
:param flats_list: A list of flats dict.
|
||||
:param config: A config dict.
|
||||
:param distance_threshold: Maximum distance in meters between the
|
||||
constraint postal codes (from config) and the one found by this function,
|
||||
to avoid bad fuzzy matching. Can be ``None`` to disable thresholding.
|
||||
|
||||
:return: An updated list of flats dict with guessed postal code.
|
||||
"""
|
||||
opendata = {
|
||||
"cities": data.load_data("cities", config),
|
||||
"postal_codes": data.load_data("postal_codes", config)
|
||||
}
|
||||
|
||||
for flat in flats_list:
|
||||
location = flat.get("location", None)
|
||||
if not location:
|
||||
# Skip everything if empty location
|
||||
LOGGER.info(
|
||||
(
|
||||
"No location field for flat %s, skipping postal "
|
||||
"code lookup."
|
||||
),
|
||||
flat["id"]
|
||||
)
|
||||
continue
|
||||
|
||||
postal_code = None
|
||||
# Try to find a postal code directly
|
||||
try:
|
||||
postal_code = re.search(r"[0-9]{5}", location)
|
||||
assert postal_code is not None
|
||||
postal_code = postal_code.group(0)
|
||||
|
||||
# Check the postal code is within the db
|
||||
assert postal_code in opendata["postal_codes"]
|
||||
|
||||
LOGGER.info(
|
||||
"Found postal code in location field for flat %s: %s.",
|
||||
flat["id"], postal_code
|
||||
)
|
||||
except AssertionError as e:
|
||||
postal_code = None
|
||||
|
||||
# If not found, try to find a city
|
||||
if not postal_code:
|
||||
matched_city = fuzzy_match(
|
||||
location,
|
||||
opendata["cities"].keys(),
|
||||
limit=1
|
||||
)
|
||||
if matched_city:
|
||||
# Store the matching postal code
|
||||
matched_city = matched_city[0]
|
||||
matched_city_name = matched_city[0]
|
||||
postal_code = (
|
||||
opendata["cities"][matched_city_name]["postal_code"]
|
||||
)
|
||||
LOGGER.info(
|
||||
("Found postal code in location field through city lookup "
|
||||
"for flat %s: %s."),
|
||||
flat["id"], postal_code
|
||||
)
|
||||
|
||||
# Check that postal code is not too far from the ones listed in config,
|
||||
# limit bad fuzzy matching
|
||||
if postal_code and distance_threshold:
|
||||
distance = min(
|
||||
tools.distance(
|
||||
opendata["postal_codes"][postal_code]["gps"],
|
||||
opendata["postal_codes"][constraint]["gps"],
|
||||
)
|
||||
for constraint in config["constraints"]["postal_codes"]
|
||||
)
|
||||
|
||||
if distance > distance_threshold:
|
||||
LOGGER.info(
|
||||
("Postal code %s found for flat %s is off-constraints. "
|
||||
"Min distance is %f."),
|
||||
postal_code, flat["id"], distance
|
||||
)
|
||||
postal_code = None
|
||||
|
||||
# Store it
|
||||
if postal_code:
|
||||
existing_postal_code = flat["flatisfy"].get("postal_code", None)
|
||||
if existing_postal_code and existing_postal_code != postal_code:
|
||||
LOGGER.warning(
|
||||
"Replacing previous postal code %s by %s for flat %s.",
|
||||
existing_postal_code, postal_code, flat["id"]
|
||||
)
|
||||
flat["flatisfy"]["postal_code"] = postal_code
|
||||
else:
|
||||
LOGGER.info("No postal code found for flat %s.", flat["id"])
|
||||
|
||||
return flats_list
|
||||
|
||||
|
||||
def guess_stations(flats_list, config, distance_threshold=1500):
|
||||
"""
|
||||
Try to match the station field with a list of available stations nearby.
|
||||
|
||||
:param flats_list: A list of flats dict.
|
||||
:param config: A config dict.
|
||||
:param distance_threshold: Maximum distance (in meters) between the center
|
||||
of the postal code and the station to consider it ok.
|
||||
|
||||
:return: An updated list of flats dict with guessed nearby stations.
|
||||
"""
|
||||
opendata = {
|
||||
"postal_codes": data.load_data("postal_codes", config),
|
||||
"stations": data.load_data("ratp", config)
|
||||
}
|
||||
|
||||
for flat in flats_list:
|
||||
flat_station = flat.get("station", None)
|
||||
# TODO: Use flat location field as well?
|
||||
|
||||
if not flat_station:
|
||||
# Skip everything if empty station
|
||||
LOGGER.info(
|
||||
"No station field for flat %s, skipping stations lookup.",
|
||||
flat["id"]
|
||||
)
|
||||
continue
|
||||
|
||||
matched_stations = fuzzy_match(
|
||||
flat_station,
|
||||
opendata["stations"].keys(),
|
||||
limit=10,
|
||||
threshold=50
|
||||
)
|
||||
|
||||
# Filter out the stations that are obviously too far and not well
|
||||
# guessed
|
||||
good_matched_stations = []
|
||||
postal_code = flat["flatisfy"].get("postal_code", None)
|
||||
if postal_code:
|
||||
# If there is a postal code, check that the matched station is
|
||||
# closed to it
|
||||
postal_code_gps = opendata["postal_codes"][postal_code]["gps"]
|
||||
for station in matched_stations:
|
||||
# opendata["stations"] is a dict mapping station names to list
|
||||
# of coordinates, for efficiency. Note that multiple stations
|
||||
# with the same name exist in a city, hence the list of
|
||||
# coordinates.
|
||||
for station_gps in opendata["stations"][station[0]]:
|
||||
distance = tools.distance(station_gps, postal_code_gps)
|
||||
if distance < distance_threshold:
|
||||
# If at least one of the coordinates for a given
|
||||
# station is close enough, that's ok and we can add
|
||||
# the station
|
||||
good_matched_stations.append({
|
||||
"name": station[0],
|
||||
"confidence": station[1],
|
||||
"gps": station_gps
|
||||
})
|
||||
break
|
||||
LOGGER.debug(
|
||||
"Station %s is too far from flat %s, discarding it.",
|
||||
station[0], flat["id"]
|
||||
)
|
||||
else:
|
||||
LOGGER.info(
|
||||
("No postal code for flat %s, keeping all the matched "
|
||||
"stations with half confidence."),
|
||||
flat["id"]
|
||||
)
|
||||
# Otherwise, we keep every matching station but with half
|
||||
# confidence
|
||||
good_matched_stations = [
|
||||
{
|
||||
"name": station[0],
|
||||
"confidence": station[1] * 0.5,
|
||||
"gps": station_gps
|
||||
}
|
||||
for station in matched_stations
|
||||
for station_gps in opendata["stations"][station[0]]
|
||||
]
|
||||
|
||||
# Store matched stations and the associated confidence
|
||||
LOGGER.info(
|
||||
"Found stations for flat %s: %s.",
|
||||
flat["id"],
|
||||
", ".join(x["name"] for x in good_matched_stations)
|
||||
)
|
||||
# TODO: Handle update (second pass)
|
||||
flat["flatisfy"]["matched_stations"] = good_matched_stations
|
||||
|
||||
return flats_list
|
||||
|
||||
|
||||
def compute_travel_times(flats_list, config):
|
||||
"""
|
||||
Compute the travel time between each flat and the points listed in the
|
||||
constraints.
|
||||
|
||||
:param flats_list: A list of flats dict.
|
||||
:param config: A config dict.
|
||||
|
||||
:return: An updated list of flats dict with computed travel times.
|
||||
|
||||
.. note :: Requires a Navitia or CityMapper API key in the config.
|
||||
"""
|
||||
for flat in flats_list:
|
||||
if not flat["flatisfy"].get("matched_stations", []):
|
||||
# Skip any flat without matched stations
|
||||
LOGGER.info(
|
||||
"Skipping travel time computation for flat %s. No matched "
|
||||
"stations.",
|
||||
flat["id"]
|
||||
)
|
||||
continue
|
||||
|
||||
if "time_to" not in flat["flatisfy"]:
|
||||
# Ensure time_to key is initialized
|
||||
flat["flatisfy"]["time_to"] = {}
|
||||
|
||||
# For each place, loop over the stations close to the flat, and find
|
||||
# the minimum travel time.
|
||||
for place_name, place in config["constraints"]["time_to"].items():
|
||||
time_to_place = None
|
||||
for station in flat["flatisfy"]["matched_stations"]:
|
||||
time_from_station = tools.get_travel_time_between(
|
||||
station["gps"],
|
||||
place["gps"],
|
||||
config
|
||||
)
|
||||
if time_from_station and (time_from_station < time_to_place or
|
||||
time_to_place is None):
|
||||
time_to_place = time_from_station
|
||||
|
||||
if time_to_place:
|
||||
LOGGER.info(
|
||||
"Travel time between %s and flat %s is %ds.",
|
||||
place_name, flat["id"], time_to_place
|
||||
)
|
||||
flat["flatisfy"]["time_to"][place_name] = time_to_place
|
||||
return flats_list
|
0
flatisfy/models/__init__.py
Normal file
0
flatisfy/models/__init__.py
Normal file
101
flatisfy/models/flat.py
Normal file
101
flatisfy/models/flat.py
Normal file
@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This modules defines an SQLAlchemy ORM model for a flat.
|
||||
"""
|
||||
# pylint: disable=invalid-name,too-few-public-methods
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import enum
|
||||
|
||||
from sqlalchemy import Column, DateTime, Enum, Float, String, Text
|
||||
|
||||
from flatisfy.database.base import BASE
|
||||
from flatisfy.database.types import MagicJSON
|
||||
|
||||
|
||||
class FlatStatus(enum.Enum):
|
||||
"""
|
||||
An enum of the possible status for a flat entry.
|
||||
"""
|
||||
purged = -10
|
||||
new = 0
|
||||
contacted = 10
|
||||
answer_no = 20
|
||||
answer_yes = 21
|
||||
|
||||
|
||||
class Flat(BASE):
|
||||
"""
|
||||
SQLAlchemy ORM model to store a flat.
|
||||
"""
|
||||
__tablename__ = "flats"
|
||||
|
||||
# Weboob data
|
||||
id = Column(String, primary_key=True)
|
||||
area = Column(Float)
|
||||
bedrooms = Column(Float)
|
||||
cost = Column(Float)
|
||||
currency = Column(String)
|
||||
date = Column(DateTime)
|
||||
details = Column(MagicJSON)
|
||||
location = Column(String)
|
||||
phone = Column(String)
|
||||
photos = Column(MagicJSON)
|
||||
rooms = Column(Float)
|
||||
station = Column(String)
|
||||
text = Column(Text)
|
||||
title = Column(String)
|
||||
url = Column(String)
|
||||
|
||||
# Flatisfy data
|
||||
# TODO: Should be in another table with relationships
|
||||
flatisfy_stations = Column(MagicJSON)
|
||||
flatisfy_postal_code = Column(String)
|
||||
flatisfy_time_to = Column(MagicJSON)
|
||||
|
||||
# Status
|
||||
status = Column(Enum(FlatStatus), default=FlatStatus.new)
|
||||
|
||||
@staticmethod
|
||||
def from_dict(flat_dict):
|
||||
"""
|
||||
Create a Flat object from a flat dict as manipulated by the filtering
|
||||
pass.
|
||||
"""
|
||||
# Handle flatisfy metadata
|
||||
flat_dict = flat_dict.copy()
|
||||
flat_dict["flatisfy_stations"] = (
|
||||
flat_dict["flatisfy"].get("matched_stations", None)
|
||||
)
|
||||
flat_dict["flatisfy_postal_code"] = (
|
||||
flat_dict["flatisfy"].get("postal_code", None)
|
||||
)
|
||||
flat_dict["flatisfy_time_to"] = (
|
||||
flat_dict["flatisfy"].get("time_to", None)
|
||||
)
|
||||
del flat_dict["flatisfy"]
|
||||
|
||||
# Handle date field
|
||||
flat_dict["date"] = None # TODO
|
||||
|
||||
flat_object = Flat()
|
||||
flat_object.__dict__.update(flat_dict)
|
||||
return flat_object
|
||||
|
||||
def __repr__(self):
|
||||
return "<Flat(id=%s, url=%s)>" % (self.id, self.url)
|
||||
|
||||
|
||||
def json_api_repr(self):
|
||||
"""
|
||||
Return a dict representation of this flat object that is JSON
|
||||
serializable.
|
||||
"""
|
||||
flat_repr = {
|
||||
k: v
|
||||
for k, v in self.__dict__.items()
|
||||
if not k.startswith("_")
|
||||
}
|
||||
flat_repr["status"] = str(flat_repr["status"])
|
||||
|
||||
return flat_repr
|
239
flatisfy/tools.py
Normal file
239
flatisfy/tools.py
Normal file
@ -0,0 +1,239 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains basic utility functions, such as pretty printing of JSON
|
||||
output, checking that a value is within a given interval etc.
|
||||
"""
|
||||
from __future__ import (
|
||||
absolute_import, division, print_function, unicode_literals
|
||||
)
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
|
||||
import requests
|
||||
import unidecode
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def pretty_json(data):
|
||||
"""
|
||||
Pretty JSON output.
|
||||
|
||||
:param data: The data to dump as pretty JSON.
|
||||
:return: The pretty printed JSON dump.
|
||||
|
||||
:Example:
|
||||
|
||||
>>> print(pretty_json({"toto": "ok", "foo": "bar"}))
|
||||
{
|
||||
"foo": "bar",
|
||||
"toto": "ok"
|
||||
}
|
||||
"""
|
||||
return json.dumps(data, indent=4, separators=(',', ': '),
|
||||
sort_keys=True)
|
||||
|
||||
|
||||
def is_within_interval(value, min_value=None, max_value=None):
|
||||
"""
|
||||
Check whether a variable is within a given interval. Assumes the value is
|
||||
always ok with respect to a `None` bound. If the `value` is `None`, it is
|
||||
always within the bounds.
|
||||
|
||||
:param value: The value to check. Can be ``None``.
|
||||
:param min_value: The lower bound.
|
||||
:param max_value: The upper bound.
|
||||
:return: ``True`` if the value is ``None``. ``True`` or ``False`` whether
|
||||
the value is within the given interval or not.
|
||||
|
||||
.. note:: A value is always within a ``None`` bound.
|
||||
|
||||
:Example:
|
||||
|
||||
>>> is_within_interval(None)
|
||||
True
|
||||
>>> is_within_interval(None, 0, 10)
|
||||
True
|
||||
>>> is_within_interval(2, None, None)
|
||||
True
|
||||
>>> is_within_interval(2, None, 3)
|
||||
True
|
||||
>>> is_within_interval(2, 1, None)
|
||||
True
|
||||
>>> is_within_interval(2, 1, 3)
|
||||
True
|
||||
>>> is_within_interval(2, 4, 7)
|
||||
False
|
||||
>>> is_within_interval(2, 4, 1)
|
||||
False
|
||||
"""
|
||||
checks = []
|
||||
if value and min_value:
|
||||
checks.append(value >= min_value)
|
||||
if value and max_value:
|
||||
checks.append(value <= max_value)
|
||||
return all(checks)
|
||||
|
||||
|
||||
def normalize_string(string):
|
||||
"""
|
||||
Normalize the given string for matching.
|
||||
|
||||
.. todo :: Convert romanian numerals to decimal
|
||||
|
||||
:Example:
|
||||
|
||||
>>> normalize_string("tétéà 14ème-XIV, foobar")
|
||||
'tetea 14eme xiv, foobar'
|
||||
"""
|
||||
# ASCIIfy the string
|
||||
string = unidecode.unidecode(string)
|
||||
|
||||
# Replace any non-alphanumeric character by space
|
||||
# Keep some basic punctuation to keep syntaxic units
|
||||
string = re.sub(r"[^a-zA-Z0-9,;:]", " ", string)
|
||||
|
||||
# Convert to lowercase
|
||||
string = string.lower()
|
||||
|
||||
# Collapse multiple spaces, replace tabulations and newlines by space
|
||||
string = re.sub(r"\s+", " ", string)
|
||||
|
||||
return string
|
||||
|
||||
|
||||
def uniqify(some_list):
|
||||
"""
|
||||
Filter out duplicates from a given list.
|
||||
|
||||
:Example:
|
||||
|
||||
>>> uniqify([1, 2, 2, 3])
|
||||
[1, 2, 3]
|
||||
"""
|
||||
return list(set(some_list))
|
||||
|
||||
|
||||
def distance(gps1, gps2):
|
||||
"""
|
||||
Compute the distance between two tuples of latitude and longitude.
|
||||
|
||||
:param gps1: First tuple of (latitude, longitude).
|
||||
:param gps2: Second tuple of (latitude, longitude).
|
||||
:return: The distance in meters.
|
||||
|
||||
:Example:
|
||||
|
||||
>>> int(distance([48.86786647303717, 2.19368117495212], \
|
||||
[48.95314107920405, 2.3368043817358464]))
|
||||
14117
|
||||
"""
|
||||
lat1 = math.radians(gps1[0])
|
||||
long1 = math.radians(gps1[1])
|
||||
|
||||
lat2 = math.radians(gps2[0])
|
||||
long2 = math.radians(gps2[1])
|
||||
|
||||
# pylint: disable=invalid-name
|
||||
a = (
|
||||
math.sin((lat2 - lat1) / 2.0)**2 +
|
||||
math.cos(lat1) * math.cos(lat2) * math.sin((long2 - long1) / 2.0)**2
|
||||
)
|
||||
c = 2.0 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
||||
earth_radius = 6371000
|
||||
|
||||
return earth_radius * c
|
||||
|
||||
|
||||
def sort_list_of_dicts_by(flats_list, key):
|
||||
"""
|
||||
Sort a list of dicts according to a given field common to all the dicts.
|
||||
|
||||
:param flats_list: List of dicts to sort.
|
||||
:param key: The key of the dict items to sort on.
|
||||
:return: A sorted list.
|
||||
|
||||
:Example:
|
||||
|
||||
>>> sort_list_of_dicts_by([{1: 2}, {1: 1}], 1)
|
||||
[{1: 1}, {1: 2}]
|
||||
"""
|
||||
return sorted(flats_list, key=lambda x: x[key])
|
||||
|
||||
|
||||
def merge_dicts(*args):
|
||||
"""
|
||||
Merge the two flats passed as argument in a single flat dict object.
|
||||
"""
|
||||
if len(args) == 1:
|
||||
return args[0]
|
||||
else:
|
||||
flat1, flat2 = args[:2]
|
||||
merged_flat = {}
|
||||
for k, value2 in flat2.items():
|
||||
value1 = flat1.get(k, None)
|
||||
if value1 is None:
|
||||
# flat1 has empty matching field, just keep the flat2 field
|
||||
merged_flat[k] = value2
|
||||
elif value2 is None:
|
||||
# flat2 field is empty, just keep the flat1 field
|
||||
merged_flat[k] = value1
|
||||
else:
|
||||
# Any other case, we should merge
|
||||
# TODO: Do the merge
|
||||
merged_flat[k] = value1
|
||||
return merge_dicts(merged_flat, *args[2:])
|
||||
|
||||
|
||||
def get_travel_time_between(latlng_from, latlng_to, config):
|
||||
"""
|
||||
Query the Navitia API to get the travel time between two points identified
|
||||
by their latitude and longitude.
|
||||
|
||||
:param latlng_from: A tuple of (latitude, longitude) for the starting
|
||||
point.
|
||||
:param latlng_to: A tuple of (latitude, longitude) for the destination.
|
||||
:return: The travel time in seconds. Returns ``None`` if it could not fetch
|
||||
it.
|
||||
|
||||
.. note :: Uses the Navitia API. Requires a ``navitia_api_key`` field to be
|
||||
filled-in in the ``config``.
|
||||
"""
|
||||
NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys"
|
||||
time = None
|
||||
|
||||
# Check that Navitia API key is available
|
||||
if config["navitia_api_key"]:
|
||||
payload = {
|
||||
"from": "%s;%s" % (latlng_from[1], latlng_from[0]),
|
||||
"to": "%s;%s" % (latlng_to[1], latlng_to[0]),
|
||||
"datetime": datetime.datetime.now().isoformat(),
|
||||
"count": 1
|
||||
}
|
||||
try:
|
||||
# Do the query to Navitia API
|
||||
req = requests.get(
|
||||
NAVITIA_ENDPOINT, params=payload,
|
||||
auth=(config["navitia_api_key"], "")
|
||||
)
|
||||
req.raise_for_status()
|
||||
time = req.json()["journeys"][0]["durations"]["total"]
|
||||
except (requests.exceptions.RequestException,
|
||||
ValueError, IndexError, KeyError) as e:
|
||||
# Ignore any possible exception
|
||||
LOGGER.warning(
|
||||
"An exception occurred during travel time lookup on "
|
||||
"Navitia: %s.",
|
||||
str(e)
|
||||
)
|
||||
else:
|
||||
LOGGER.warning(
|
||||
"No API key available for travel time lookup. Please provide "
|
||||
"a Navitia API key. Skipping travel time lookup."
|
||||
)
|
||||
return time
|
0
flatisfy/web/__init__.py
Normal file
0
flatisfy/web/__init__.py
Normal file
53
flatisfy/web/app.py
Normal file
53
flatisfy/web/app.py
Normal file
@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains the definition of the Bottle web app.
|
||||
"""
|
||||
from __future__ import (
|
||||
absolute_import, division, print_function, unicode_literals
|
||||
)
|
||||
|
||||
import os
|
||||
|
||||
import bottle
|
||||
|
||||
from flatisfy import database
|
||||
from flatisfy.web.routes import api as api_routes
|
||||
from flatisfy.web.dbplugin import DatabasePlugin
|
||||
|
||||
|
||||
def _serve_static_file(filename):
|
||||
"""
|
||||
Helper function to serve static file.
|
||||
"""
|
||||
return bottle.static_file(
|
||||
filename,
|
||||
root=os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)),
|
||||
"static"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def get_app(config):
|
||||
"""
|
||||
Get a Bottle app instance with all the routes set-up.
|
||||
|
||||
:return: The built bottle app.
|
||||
"""
|
||||
get_session = database.init_db(config["database"])
|
||||
|
||||
app = bottle.default_app()
|
||||
app.install(DatabasePlugin(get_session))
|
||||
|
||||
# API v1 routes
|
||||
app.route("/api/v1/", "GET", api_routes.index_v1)
|
||||
app.route("/api/v1/flats", "GET", api_routes.flats_v1)
|
||||
app.route("/api/v1/flat/:id", "GET", api_routes.flat_v1)
|
||||
|
||||
# Index
|
||||
app.route("/", "GET", lambda: _serve_static_file("index.html"))
|
||||
|
||||
# Static files
|
||||
app.route("/static/<filename:path>", "GET", _serve_static_file)
|
||||
|
||||
return app
|
58
flatisfy/web/dbplugin.py
Normal file
58
flatisfy/web/dbplugin.py
Normal file
@ -0,0 +1,58 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains a Bottle plugin to pass the database argument to any route
|
||||
which needs it.
|
||||
"""
|
||||
from __future__ import (
|
||||
absolute_import, division, print_function, unicode_literals
|
||||
)
|
||||
|
||||
import functools
|
||||
import inspect
|
||||
|
||||
import bottle
|
||||
|
||||
|
||||
class DatabasePlugin(object):
|
||||
name = 'database'
|
||||
api = 2
|
||||
KEYWORD = "db"
|
||||
|
||||
def __init__(self, get_session):
|
||||
"""
|
||||
:param keyword: Keyword used to inject session database in a route
|
||||
:param create_session: SQLAlchemy session maker created with the
|
||||
'sessionmaker' function. Will create its own if undefined.
|
||||
"""
|
||||
self.get_session = get_session
|
||||
|
||||
def setup(self, app):
|
||||
"""
|
||||
Make sure that other installed plugins don't affect the same
|
||||
keyword argument and check if metadata is available.
|
||||
"""
|
||||
for other in app.plugins:
|
||||
if not isinstance(other, DatabasePlugin):
|
||||
continue
|
||||
else:
|
||||
raise bottle.PluginError(
|
||||
"Found another conflicting Database plugin."
|
||||
)
|
||||
|
||||
def apply(self, callback, route):
|
||||
try:
|
||||
callback_args = inspect.signature(route.callback).parameters
|
||||
except AttributeError:
|
||||
# inspect.signature does not exist on older Python
|
||||
callback_args = inspect.getargspec(route.callback).args
|
||||
|
||||
if self.KEYWORD not in callback_args:
|
||||
return callback
|
||||
else:
|
||||
with self.get_session() as session:
|
||||
kwargs = {}
|
||||
kwargs[self.KEYWORD] = session
|
||||
return functools.partial(callback, **kwargs)
|
||||
|
||||
|
||||
Plugin = DatabasePlugin
|
0
flatisfy/web/routes/__init__.py
Normal file
0
flatisfy/web/routes/__init__.py
Normal file
47
flatisfy/web/routes/api.py
Normal file
47
flatisfy/web/routes/api.py
Normal file
@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains the definition of the web app API routes.
|
||||
"""
|
||||
from __future__ import (
|
||||
absolute_import, division, print_function, unicode_literals
|
||||
)
|
||||
|
||||
from flatisfy.models import flat as flat_model
|
||||
|
||||
|
||||
def index_v1():
|
||||
"""
|
||||
API v1 index route:
|
||||
|
||||
GET /api/v1/
|
||||
"""
|
||||
return {
|
||||
"flats": "/api/v1/flats"
|
||||
}
|
||||
|
||||
|
||||
def flats_v1(db):
|
||||
"""
|
||||
API v1 flats route:
|
||||
|
||||
GET /api/v1/flats
|
||||
"""
|
||||
flats = [
|
||||
flat.json_api_repr()
|
||||
for flat in db.query(flat_model.Flat).all()
|
||||
]
|
||||
return {
|
||||
"data": flats
|
||||
}
|
||||
|
||||
|
||||
def flat_v1(id, db):
|
||||
"""
|
||||
API v1 flat route:
|
||||
|
||||
GET /api/v1/flat/:id
|
||||
"""
|
||||
flat = db.query(flat_model.Flat).filter_by(id=id).first()
|
||||
return {
|
||||
"data": flat.json_api_repr()
|
||||
}
|
30
flatisfy/web/static/index.html
Normal file
30
flatisfy/web/static/index.html
Normal file
@ -0,0 +1,30 @@
|
||||
<!doctype html>
|
||||
<html lang="fr">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Flatisfy</title>
|
||||
<script src="https://unpkg.com/vue"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app">
|
||||
<h1>Flatisfy</h1>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Titre</th>
|
||||
<th>Lien</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<script type="text/javascript">
|
||||
var app = new Vue({
|
||||
el: '#app',
|
||||
data: {
|
||||
}
|
||||
})
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
3
hooks/pre-commit
Executable file
3
hooks/pre-commit
Executable file
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
pylint --rcfile=.ci/pylintrc flatisfy
|
8
requirements.txt
Normal file
8
requirements.txt
Normal file
@ -0,0 +1,8 @@
|
||||
appdirs
|
||||
bottle
|
||||
bottle-sqlalchemy
|
||||
enum34
|
||||
future
|
||||
request
|
||||
sqlalchemy
|
||||
unidecode
|
Loading…
Reference in New Issue
Block a user