Initial commit
This commit is contained in:
parent
f060324bae
commit
d7012e3834
407
.ci/pylintrc
Normal file
407
.ci/pylintrc
Normal file
@ -0,0 +1,407 @@
|
||||
[MASTER]
|
||||
|
||||
# Specify a configuration file.
|
||||
#rcfile=
|
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as
|
||||
# pygtk.require().
|
||||
#init-hook=
|
||||
|
||||
# Add files or directories to the blacklist. They should be base names, not
|
||||
# paths.
|
||||
ignore=CVS
|
||||
|
||||
# Add files or directories matching the regex patterns to the blacklist. The
|
||||
# regex matches against base names, not paths.
|
||||
ignore-patterns=
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=yes
|
||||
|
||||
# List of plugins (as comma separated values of python modules names) to load,
|
||||
# usually to register additional checkers.
|
||||
load-plugins=
|
||||
|
||||
# Use multiple processes to speed up Pylint.
|
||||
jobs=1
|
||||
|
||||
# Allow loading of arbitrary C extensions. Extensions are imported into the
|
||||
# active Python interpreter and may run arbitrary code.
|
||||
unsafe-load-any-extension=no
|
||||
|
||||
# A comma-separated list of package or module names from where C extensions may
|
||||
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||
# run arbitrary code
|
||||
extension-pkg-whitelist=
|
||||
|
||||
# Allow optimization of some AST trees. This will activate a peephole AST
|
||||
# optimizer, which will apply various small optimizations. For instance, it can
|
||||
# be used to obtain the result of joining multiple strings with the addition
|
||||
# operator. Joining a lot of strings can lead to a maximum recursion error in
|
||||
# Pylint and this flag can prevent that. It has one side effect, the resulting
|
||||
# AST will be different than the one from reality. This option is deprecated
|
||||
# and it will be removed in Pylint 2.0.
|
||||
optimize-ast=no
|
||||
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
# Only show warnings with the listed confidence levels. Leave empty to show
|
||||
# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
|
||||
confidence=
|
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can
|
||||
# either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple time (only on the command line, not in the configuration file where
|
||||
# it should appear only once). See also the "--disable" option for examples.
|
||||
#enable=
|
||||
|
||||
# Disable the message, report, category or checker with the given id(s). You
|
||||
# can either give multiple identifiers separated by comma (,) or put this
|
||||
# option multiple times (only on the command line, not in the configuration
|
||||
# file where it should appear only once).You can also use "--disable=all" to
|
||||
# disable everything first and then reenable specific checks. For example, if
|
||||
# you want to run only the similarities checker, you can use "--disable=all
|
||||
# --enable=similarities". If you want to run only the classes checker, but have
|
||||
# no Warning level messages displayed, use"--disable=all --enable=classes
|
||||
# --disable=W"
|
||||
disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Set the output format. Available formats are text, parseable, colorized, msvs
|
||||
# (visual studio) and html. You can also give a reporter class, eg
|
||||
# mypackage.mymodule.MyReporterClass.
|
||||
output-format=text
|
||||
|
||||
# Put messages in a separate file for each module / package specified on the
|
||||
# command line instead of printing them on stdout. Reports (if any) will be
|
||||
# written in a file name "pylint_global.[txt|html]". This option is deprecated
|
||||
# and it will be removed in Pylint 2.0.
|
||||
files-output=no
|
||||
|
||||
# Tells whether to display a full report or only the messages
|
||||
reports=yes
|
||||
|
||||
# Python expression which should return a note less than 10 (10 is the highest
|
||||
# note). You have access to the variables errors warning, statement which
|
||||
# respectively contain the number of errors / warnings messages and the total
|
||||
# number of statements analyzed. This is used by the global evaluation report
|
||||
# (RP0004).
|
||||
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
|
||||
|
||||
# Template used to display messages. This is a python new-style format string
|
||||
# used to format the message information. See doc for all details
|
||||
#msg-template=
|
||||
|
||||
|
||||
[BASIC]
|
||||
|
||||
# Good variable names which should always be accepted, separated by a comma
|
||||
good-names=i,j,k,ex,Run,_,fh
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma
|
||||
bad-names=foo,bar,baz,toto,tutu,tata
|
||||
|
||||
# Colon-delimited sets of names that determine each other's naming style when
|
||||
# the name regexes allow several styles.
|
||||
name-group=
|
||||
|
||||
# Include a hint for the correct naming format with invalid-name
|
||||
include-naming-hint=no
|
||||
|
||||
# List of decorators that produce properties, such as abc.abstractproperty. Add
|
||||
# to this list to register other decorators that produce valid properties.
|
||||
property-classes=abc.abstractproperty
|
||||
|
||||
# Regular expression matching correct function names
|
||||
function-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for function names
|
||||
function-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct variable names
|
||||
variable-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for variable names
|
||||
variable-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct constant names
|
||||
const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
|
||||
|
||||
# Naming hint for constant names
|
||||
const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
|
||||
|
||||
# Regular expression matching correct attribute names
|
||||
attr-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for attribute names
|
||||
attr-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct argument names
|
||||
argument-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for argument names
|
||||
argument-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression matching correct class attribute names
|
||||
class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
|
||||
|
||||
# Naming hint for class attribute names
|
||||
class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
|
||||
|
||||
# Regular expression matching correct inline iteration names
|
||||
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
|
||||
|
||||
# Naming hint for inline iteration names
|
||||
inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
|
||||
|
||||
# Regular expression matching correct class names
|
||||
class-rgx=[A-Z_][a-zA-Z0-9]+$
|
||||
|
||||
# Naming hint for class names
|
||||
class-name-hint=[A-Z_][a-zA-Z0-9]+$
|
||||
|
||||
# Regular expression matching correct module names
|
||||
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
|
||||
|
||||
# Naming hint for module names
|
||||
module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
|
||||
|
||||
# Regular expression matching correct method names
|
||||
method-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Naming hint for method names
|
||||
method-name-hint=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match function or class names that do
|
||||
# not require a docstring.
|
||||
no-docstring-rgx=^_
|
||||
|
||||
# Minimum line length for functions/classes that require docstrings, shorter
|
||||
# ones are exempt.
|
||||
docstring-min-length=-1
|
||||
|
||||
|
||||
[ELIF]
|
||||
|
||||
# Maximum number of nested blocks for function / method body
|
||||
max-nested-blocks=5
|
||||
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length=100
|
||||
|
||||
# Regexp for a line that is allowed to be longer than the limit.
|
||||
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
|
||||
|
||||
# Allow the body of an if to be on the same line as the test if there is no
|
||||
# else.
|
||||
single-line-if-stmt=no
|
||||
|
||||
# List of optional constructs for which whitespace checking is disabled. `dict-
|
||||
# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
|
||||
# `trailing-comma` allows a space between comma and closing bracket: (a, ).
|
||||
# `empty-line` allows space-only lines.
|
||||
no-space-check=trailing-comma,dict-separator
|
||||
|
||||
# Maximum number of lines in a module
|
||||
max-module-lines=1000
|
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||
# tab).
|
||||
indent-string=' '
|
||||
|
||||
# Number of spaces of indent required inside a hanging or continued line.
|
||||
indent-after-paren=4
|
||||
|
||||
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
|
||||
expected-line-ending-format=
|
||||
|
||||
|
||||
[LOGGING]
|
||||
|
||||
# Logging modules to check that the string format arguments are in logging
|
||||
# function parameter format
|
||||
logging-modules=logging
|
||||
|
||||
|
||||
[MISCELLANEOUS]
|
||||
|
||||
# List of note tags to take in consideration, separated by a comma.
|
||||
notes=FIXME,XXX,TODO
|
||||
|
||||
|
||||
[SIMILARITIES]
|
||||
|
||||
# Minimum lines number of a similarity.
|
||||
min-similarity-lines=4
|
||||
|
||||
# Ignore comments when computing similarities.
|
||||
ignore-comments=yes
|
||||
|
||||
# Ignore docstrings when computing similarities.
|
||||
ignore-docstrings=yes
|
||||
|
||||
# Ignore imports when computing similarities.
|
||||
ignore-imports=no
|
||||
|
||||
|
||||
[SPELLING]
|
||||
|
||||
# Spelling dictionary name. Available dictionaries: none. To make it working
|
||||
# install python-enchant package.
|
||||
spelling-dict=
|
||||
|
||||
# List of comma separated words that should not be checked.
|
||||
spelling-ignore-words=
|
||||
|
||||
# A path to a file that contains private dictionary; one word per line.
|
||||
spelling-private-dict-file=
|
||||
|
||||
# Tells whether to store unknown words to indicated private dictionary in
|
||||
# --spelling-private-dict-file option instead of raising a message.
|
||||
spelling-store-unknown-words=no
|
||||
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
# Tells whether missing members accessed in mixin class should be ignored. A
|
||||
# mixin class is detected if its name ends with "mixin" (case insensitive).
|
||||
ignore-mixin-members=yes
|
||||
|
||||
# List of module names for which member attributes should not be checked
|
||||
# (useful for modules/projects where namespaces are manipulated during runtime
|
||||
# and thus existing member attributes cannot be deduced by static analysis. It
|
||||
# supports qualified module names, as well as Unix pattern matching.
|
||||
ignored-modules=
|
||||
|
||||
# List of class names for which member attributes should not be checked (useful
|
||||
# for classes with dynamically set attributes). This supports the use of
|
||||
# qualified names.
|
||||
ignored-classes=optparse.Values,thread._local,_thread._local
|
||||
|
||||
# List of members which are set dynamically and missed by pylint inference
|
||||
# system, and so shouldn't trigger E1101 when accessed. Python regular
|
||||
# expressions are accepted.
|
||||
generated-members=
|
||||
|
||||
# List of decorators that produce context managers, such as
|
||||
# contextlib.contextmanager. Add to this list to register other decorators that
|
||||
# produce valid context managers.
|
||||
contextmanager-decorators=contextlib.contextmanager
|
||||
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
# Tells whether we should check for unused import in __init__ files.
|
||||
init-import=no
|
||||
|
||||
# A regular expression matching the name of dummy variables (i.e. expectedly
|
||||
# not used).
|
||||
dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy
|
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that
|
||||
# you should avoid to define new builtins when possible.
|
||||
additional-builtins=
|
||||
|
||||
# List of strings which can identify a callback function by name. A callback
|
||||
# name must start or end with one of those strings.
|
||||
callbacks=cb_,_cb
|
||||
|
||||
# List of qualified module names which can have objects that can redefine
|
||||
# builtins.
|
||||
redefining-builtins-modules=six.moves,future.builtins,builtins
|
||||
|
||||
|
||||
[CLASSES]
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
defining-attr-methods=__init__,__new__,setUp
|
||||
|
||||
# List of valid names for the first argument in a class method.
|
||||
valid-classmethod-first-arg=cls
|
||||
|
||||
# List of valid names for the first argument in a metaclass class method.
|
||||
valid-metaclass-classmethod-first-arg=mcs
|
||||
|
||||
# List of member names, which should be excluded from the protected access
|
||||
# warning.
|
||||
exclude-protected=_asdict,_fields,_replace,_source,_make
|
||||
|
||||
|
||||
[DESIGN]
|
||||
|
||||
# Maximum number of arguments for function / method
|
||||
max-args=5
|
||||
|
||||
# Argument names that match this expression will be ignored. Default to name
|
||||
# with leading underscore
|
||||
ignored-argument-names=_.*
|
||||
|
||||
# Maximum number of locals for function / method body
|
||||
max-locals=15
|
||||
|
||||
# Maximum number of return / yield for function / method body
|
||||
max-returns=6
|
||||
|
||||
# Maximum number of branch for function / method body
|
||||
max-branches=12
|
||||
|
||||
# Maximum number of statements in function / method body
|
||||
max-statements=50
|
||||
|
||||
# Maximum number of parents for a class (see R0901).
|
||||
max-parents=7
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=7
|
||||
|
||||
# Minimum number of public methods for a class (see R0903).
|
||||
min-public-methods=2
|
||||
|
||||
# Maximum number of public methods for a class (see R0904).
|
||||
max-public-methods=20
|
||||
|
||||
# Maximum number of boolean expressions in a if statement
|
||||
max-bool-expr=5
|
||||
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
# Deprecated modules which should not be used, separated by a comma
|
||||
deprecated-modules=regsub,TERMIOS,Bastion,rexec
|
||||
|
||||
# Create a graph of every (i.e. internal and external) dependencies in the
|
||||
# given file (report RP0402 must not be disabled)
|
||||
import-graph=
|
||||
|
||||
# Create a graph of external dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
ext-import-graph=
|
||||
|
||||
# Create a graph of internal dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
int-import-graph=
|
||||
|
||||
# Force import order to recognize a module as part of the standard
|
||||
# compatibility libraries.
|
||||
known-standard-library=
|
||||
|
||||
# Force import order to recognize a module as part of a third party library.
|
||||
known-third-party=enchant
|
||||
|
||||
# Analyse import fallback blocks. This can be used to support both Python 2 and
|
||||
# 3 compatible code, which means that the block might have code that exists
|
||||
# only in one or another interpreter, leading to false positives when analysed.
|
||||
analyse-fallback-blocks=no
|
||||
|
||||
|
||||
[EXCEPTIONS]
|
||||
|
||||
# Exceptions that will emit a warning when being caught. Defaults to
|
||||
# "Exception"
|
||||
overgeneral-exceptions=Exception
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,3 +1,6 @@
|
||||
build
|
||||
*.json
|
||||
config.py
|
||||
*.pyc
|
||||
*.swp
|
||||
*.swo
|
||||
*.db
|
||||
|
21
LICENSE.md
Normal file
21
LICENSE.md
Normal file
@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2017 Phyks (Lucas Verney)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
115
README.md
Normal file
115
README.md
Normal file
@ -0,0 +1,115 @@
|
||||
Flatisfy
|
||||
========
|
||||
|
||||
Flatisfy is your new companion to ease your search of a new housing :)
|
||||
|
||||
|
||||
It uses [Weboob](http://weboob.org/) to get all the housing posts on most of
|
||||
the websites offering housings posts, and then offers a bunch of pipelines to
|
||||
filter and deduplicate the fetched housings.
|
||||
|
||||
|
||||
It can be used as a command-line utility, but also exposes a web API and
|
||||
visualisation, to browse through the results.
|
||||
|
||||
|
||||
_Note_: It is targeted at French users (due to the currently supported
|
||||
websites), and in particular at people living close to Paris, as I developped
|
||||
it for my personal use, and am currently living in Paris :) Any feedback and
|
||||
merge requests to better support other countries / cities are more than
|
||||
welcome!
|
||||
|
||||
_Note_: In this repository and across the code, I am using the name "flat". I
|
||||
use it as a placeholder for "housing" and consider both are interchangeable.
|
||||
This code is not restricted to handling flats only!
|
||||
|
||||
|
||||
## Getting started
|
||||
|
||||
1. Clone the repository.
|
||||
2. Install required Python modules: `pip install -r requirements.txt`.
|
||||
3. Init a configuration file: `python -m flatisfy init-config > config.json`.
|
||||
Edit it according to your needs (see below).
|
||||
4. Build the required data files:
|
||||
`python -m flatisfy build-data --config config.json`.
|
||||
5. Use it to `fetch` (and output a filtered JSON list of flats) or `import`
|
||||
(into an SQLite database, for the web visualization) a list of flats
|
||||
matching your criteria.
|
||||
6. Use `python -m flatisfy serve --config config.json` to serve the web app.
|
||||
|
||||
|
||||
## Configuration
|
||||
|
||||
List of configuration options:
|
||||
|
||||
* `data_directory` is the directory in which you want data files to be stored.
|
||||
`null` is the default value and means default `XDG` location (typically
|
||||
`~/.local/share/flatisfy/`)
|
||||
* `max_entries` is the maximum number of entries to fetch **per Weboob
|
||||
backend** (that is per housing website).
|
||||
* `passes` is the number of passes to run on the data. First pass is a basic
|
||||
filtering and using only the informations from the housings list page.
|
||||
Second pass loads any possible information about the filtered flats and does
|
||||
better filtering.
|
||||
* `queries` is a list of queries defined in `flatboob` that should be fetched.
|
||||
* `database` is an SQLAlchemy URI to a database file. Defaults to `null` which
|
||||
means that it will store the database in the default location, in
|
||||
`data_directory`.
|
||||
* `navitia_api_key` is an API token for [Navitia](https://www.navitia.io/)
|
||||
which is required to compute travel times.
|
||||
|
||||
### Constraints
|
||||
|
||||
You can specify constraints, under the `constraints` key. The available
|
||||
constraints are:
|
||||
|
||||
* `area` (in m²), `bedrooms`, `cost` (in currency unit), `rooms`: this is a
|
||||
tuple of `(min, max)` values, defining an interval in which the value should
|
||||
lie. A `null` value means that any value is within this bound.
|
||||
* `postal_codes` is a list of allowed postal codes. You should include any
|
||||
postal code you want, and especially the postal codes close to the precise
|
||||
location you want. You MUST provide some postal codes.
|
||||
* `time_to` is a dictionary of places to compute travel time to them.
|
||||
Typically,
|
||||
```
|
||||
"time_to": {
|
||||
"foobar": {
|
||||
"gps": [LAT, LNG],
|
||||
"time": [min, max]
|
||||
}
|
||||
}
|
||||
```
|
||||
means that the housings must be between the `min` and `max` bounds (possibly
|
||||
`null`) from the place identified by the GPS coordinates `LAT` and `LNG`
|
||||
(latitude and longitude), and we call this place `foobar` in human-readable
|
||||
form. Beware that `time` constraints are in **seconds**.
|
||||
|
||||
|
||||
## OpenData
|
||||
|
||||
I am using the following datasets, available under `flatisfy/data_files`,
|
||||
which covers Paris. If you want to run the script using some other location,
|
||||
you might have to change these files by matching datasets.
|
||||
|
||||
* [LaPoste Hexasmal](https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5) for the list of cities and postal codes in France.
|
||||
* [RATP stations](https://data.ratp.fr/explore/dataset/positions-geographiques-des-stations-du-reseau-ratp/table/?disjunctive.stop_name&disjunctive.code_postal&disjunctive.departement) for the list of subway stations with their positions in Paris and nearby areas.
|
||||
|
||||
Both datasets are licensed under the Open Data Commons Open Database License
|
||||
(ODbL): https://opendatacommons.org/licenses/odbl/.
|
||||
|
||||
|
||||
## License
|
||||
|
||||
The content of this repository is licensed under an MIT license, unless
|
||||
explicitly mentionned otherwise.
|
||||
|
||||
|
||||
## Thanks
|
||||
|
||||
* [Weboob](http://weboob.org/)
|
||||
* The OpenData providers listed above!
|
||||
* Navitia for their really cool public transportation API.
|
||||
* A lots of Python modules, required for this script (see `requirements.txt`).
|
||||
* [Kresus](https://framagit.org/bnjbvr/kresus) which gave me part of the
|
||||
original idea (at least proved me such software based on scraping can
|
||||
achieve a high quality level :)
|
130
flat.py
130
flat.py
@ -1,130 +0,0 @@
|
||||
# coding: utf-8
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from fuzzywuzzy import process as fuzzyprocess
|
||||
|
||||
import config
|
||||
|
||||
|
||||
def pretty_json(json_str):
|
||||
return json.dumps(json_str, indent=4, separators=(',', ': '),
|
||||
sort_keys=True)
|
||||
|
||||
|
||||
def preprocess_data():
|
||||
if not os.path.isdir("build"):
|
||||
os.mkdir("build")
|
||||
|
||||
if not os.path.isfile("build/ratp.json"):
|
||||
ratp_data = []
|
||||
with open("data/ratp.json", "r") as fh:
|
||||
ratp_data = json.load(fh)
|
||||
ratp_data = sorted(
|
||||
list(set(
|
||||
x["fields"]["stop_name"].lower() for x in ratp_data
|
||||
))
|
||||
)
|
||||
with open("build/ratp.json", "w") as fh:
|
||||
fh.write(pretty_json(ratp_data))
|
||||
|
||||
|
||||
def fetch_flats_list():
|
||||
flats_list = []
|
||||
for query in config.QUERIES:
|
||||
flatboob_output = subprocess.check_output(
|
||||
["flatboob", "-n", "0", "-f", "json", "load", query]
|
||||
)
|
||||
flats_list.extend(json.loads(flatboob_output))
|
||||
return flats_list
|
||||
|
||||
|
||||
def remove_duplicates(flats_list):
|
||||
unique_flats_list = []
|
||||
ids = []
|
||||
for flat in flats_list:
|
||||
if flat["id"] in ids:
|
||||
continue
|
||||
ids.append(id)
|
||||
unique_flats_list.append(flat)
|
||||
return unique_flats_list
|
||||
|
||||
|
||||
def sort_by(flats_list, key="cost"):
|
||||
return sorted(flats_list, key=lambda x: x["cost"])
|
||||
|
||||
|
||||
def refine_params(flats_list):
|
||||
def filter_conditions(x):
|
||||
is_ok = True
|
||||
if "cost" in x:
|
||||
cost = x["cost"]
|
||||
is_ok = (
|
||||
is_ok and
|
||||
(cost < config.PARAMS["max_cost"] and
|
||||
cost > config.PARAMS["min_cost"])
|
||||
)
|
||||
if "area" in x:
|
||||
area = x["area"]
|
||||
is_ok = (
|
||||
is_ok and
|
||||
(area < config.PARAMS["max_area"] and
|
||||
area > config.PARAMS["min_area"])
|
||||
)
|
||||
return is_ok
|
||||
|
||||
return filter(filter_conditions, flats_list)
|
||||
|
||||
|
||||
def match_ratp(flats_list):
|
||||
ratp_stations = []
|
||||
with open("build/ratp.json", "r") as fh:
|
||||
ratp_stations = json.load(fh)
|
||||
|
||||
for flat in flats_list:
|
||||
if "station" in flat and flat["station"]:
|
||||
# There is some station fetched by flatboob, try to match it
|
||||
flat["ratp_station"] = fuzzyprocess.extractOne(
|
||||
flat["station"], ratp_stations
|
||||
)
|
||||
# TODO: Cross-check station location to choose the best fit
|
||||
|
||||
return flats_list
|
||||
|
||||
|
||||
def main(dumpfile=None):
|
||||
if dumpfile is None:
|
||||
flats_list = fetch_flats_list()
|
||||
else:
|
||||
with open(dumpfile, "r") as fh:
|
||||
flats_list = json.load(fh)
|
||||
|
||||
# First pass
|
||||
flats_list = remove_duplicates(flats_list)
|
||||
flats_list = sort_by(flats_list, "cost")
|
||||
flats_list = refine_params(flats_list)
|
||||
|
||||
# TODO: flats_list = match_ratp(flats_list)
|
||||
|
||||
# TODO: Second pass, loading additional infos for each entry
|
||||
|
||||
return flats_list
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1:
|
||||
dumpfile = sys.argv[1]
|
||||
else:
|
||||
dumpfile = None
|
||||
|
||||
try:
|
||||
preprocess_data()
|
||||
flats_list = main(dumpfile)
|
||||
print(
|
||||
pretty_json(flats_list)
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
5
flatisfy/__init__.py
Normal file
5
flatisfy/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
``Flatisfy`` is a tool to help you find a new housing based on some criteria.
|
||||
"""
|
||||
__version__ = "0.1"
|
176
flatisfy/__main__.py
Normal file
176
flatisfy/__main__.py
Normal file
@ -0,0 +1,176 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
Main entry point of the Flatisfy code.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import flatisfy.config
|
||||
from flatisfy import cmds
|
||||
from flatisfy import data
|
||||
from flatisfy import tools
|
||||
|
||||
|
||||
LOGGER = logging.getLogger("flatisfy")
|
||||
|
||||
|
||||
def parse_args(argv=None):
|
||||
"""
|
||||
Create parser and parse arguments.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(prog="Flatisfy",
|
||||
description="Find the perfect flat.")
|
||||
|
||||
# Parent parser containing arguments common to any subcommand
|
||||
parent_parser = argparse.ArgumentParser(add_help=False)
|
||||
parent_parser.add_argument(
|
||||
"--data-dir",
|
||||
help="Location of Flatisfy data directory."
|
||||
)
|
||||
parent_parser.add_argument(
|
||||
"--config",
|
||||
help="Configuration file to use."
|
||||
)
|
||||
parent_parser.add_argument(
|
||||
"--passes", choices=[0, 1, 2], type=int,
|
||||
help="Number of passes to do on the filtered data."
|
||||
)
|
||||
parent_parser.add_argument(
|
||||
"--max-entries", type=int,
|
||||
help="Maximum number of entries to fetch."
|
||||
)
|
||||
parent_parser.add_argument(
|
||||
"-v", "--verbose", action="store_true",
|
||||
help="Verbose logging output."
|
||||
)
|
||||
parent_parser.add_argument(
|
||||
"-vv", action="store_true",
|
||||
help="Debug logging output."
|
||||
)
|
||||
|
||||
# Subcommands
|
||||
subparsers = parser.add_subparsers(
|
||||
dest="cmd", help="Available subcommands"
|
||||
)
|
||||
|
||||
# Build data subcommand
|
||||
subparsers.add_parser(
|
||||
"build-data", parents=[parent_parser],
|
||||
help="Build necessary data"
|
||||
)
|
||||
|
||||
# Init config subcommand
|
||||
parser_init_config = subparsers.add_parser(
|
||||
"init-config", parents=[parent_parser],
|
||||
help="Initialize empty configuration."
|
||||
)
|
||||
parser_init_config.add_argument(
|
||||
"output", nargs="?", help="Output config file. Use '-' for stdout."
|
||||
)
|
||||
|
||||
# Fetch subcommand parser
|
||||
subparsers.add_parser("fetch", parents=[parent_parser],
|
||||
help="Fetch housings posts")
|
||||
|
||||
# Filter subcommand parser
|
||||
parser_filter = subparsers.add_parser("filter", parents=[parent_parser],
|
||||
help=(
|
||||
"Filter housings posts. No "
|
||||
"fetching of additional infos "
|
||||
"is done."))
|
||||
parser_filter.add_argument(
|
||||
"input",
|
||||
help="JSON dump of the housings post to filter."
|
||||
)
|
||||
|
||||
# Import subcommand parser
|
||||
subparsers.add_parser("import", parents=[parent_parser],
|
||||
help="Import housing posts in database.")
|
||||
|
||||
# Serve subcommand parser
|
||||
parser_serve = subparsers.add_parser("serve", parents=[parent_parser],
|
||||
help="Serve the web app.")
|
||||
parser_serve.add_argument("--port", type=int, help="Port to bind to.")
|
||||
parser_serve.add_argument("--host", help="Host to listen on.")
|
||||
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main module code.
|
||||
"""
|
||||
# Parse arguments
|
||||
args = parse_args()
|
||||
|
||||
# Set logger
|
||||
if args.vv:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG)
|
||||
elif args.verbose:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
# sqlalchemy INFO level is way too loud, just stick with WARNING
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
|
||||
else:
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
|
||||
|
||||
# Init-config command
|
||||
if args.cmd == "init-config":
|
||||
flatisfy.config.init_config(args.output)
|
||||
sys.exit(0)
|
||||
else:
|
||||
# Load config
|
||||
config = flatisfy.config.load_config(args)
|
||||
if config is None:
|
||||
LOGGER.error("Invalid configuration. Exiting. "
|
||||
"Run init-config before if this is the first time "
|
||||
"you run Flatisfy.")
|
||||
sys.exit(1)
|
||||
|
||||
# Build data files
|
||||
try:
|
||||
if args.cmd == "build-data":
|
||||
data.preprocess_data(config, force=True)
|
||||
sys.exit(0)
|
||||
else:
|
||||
data.preprocess_data(config)
|
||||
except flatisfy.exceptions.DataBuildError:
|
||||
sys.exit(1)
|
||||
|
||||
# Fetch command
|
||||
if args.cmd == "fetch":
|
||||
# Fetch and filter flats list
|
||||
flats_list, _ = cmds.fetch_and_filter(config)
|
||||
# Sort by cost
|
||||
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost")
|
||||
|
||||
print(
|
||||
tools.pretty_json(flats_list)
|
||||
)
|
||||
# Filter command
|
||||
elif args.cmd == "filter":
|
||||
# Load and filter flats list
|
||||
flats_list = cmds.load_and_filter(args.input, config)
|
||||
# Sort by cost
|
||||
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost")
|
||||
|
||||
print(
|
||||
tools.pretty_json(flats_list)
|
||||
)
|
||||
# Import command
|
||||
elif args.cmd == "import":
|
||||
cmds.import_and_filter(config)
|
||||
# Serve command
|
||||
elif args.cmd == "serve":
|
||||
cmds.serve(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
110
flatisfy/cmds.py
Normal file
110
flatisfy/cmds.py
Normal file
@ -0,0 +1,110 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
Main commands available for flatisfy.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import flatisfy.filters
|
||||
from flatisfy import database
|
||||
from flatisfy.models import flat as flat_model
|
||||
from flatisfy import fetch
|
||||
from flatisfy import tools
|
||||
from flatisfy.web import app as web_app
|
||||
|
||||
|
||||
def fetch_and_filter(config):
|
||||
"""
|
||||
Fetch the available flats list. Then, filter it according to criteria.
|
||||
|
||||
:param config: A config dict.
|
||||
:return: A tuple of the list of all matching flats and the list of ignored
|
||||
flats.
|
||||
"""
|
||||
# TODO: Reduce load on housings listing websites
|
||||
# Fetch flats list with flatboobs
|
||||
flats_list = fetch.fetch_flats_list(config)
|
||||
|
||||
# Do a first pass with the available infos to try to remove as much
|
||||
# unwanted postings as possible
|
||||
if config["passes"] > 0:
|
||||
flats_list, ignored_flats = flatisfy.filters.first_pass(flats_list,
|
||||
config)
|
||||
|
||||
# Do a second pass to consolidate all the infos we found and make use of
|
||||
# additional infos
|
||||
if config["passes"] > 1:
|
||||
# Load additional infos
|
||||
for flat in flats_list:
|
||||
details = fetch.fetch_details(flat["id"])
|
||||
flat = tools.merge_dicts(flat, details)
|
||||
|
||||
flats_list, extra_ignored_flats = flatisfy.filters.second_pass(
|
||||
flats_list, config
|
||||
)
|
||||
ignored_flats.extend(extra_ignored_flats)
|
||||
|
||||
return flats_list, ignored_flats
|
||||
|
||||
|
||||
def load_and_filter(housing_file, config):
|
||||
"""
|
||||
Load the dumped flats list. Then, filter it according to criteria.
|
||||
|
||||
:param housing_file: The JSON file to load flats from.
|
||||
:param config: A config dict.
|
||||
:return: A tuple of the list of all matching flats and the list of ignored
|
||||
flats.
|
||||
"""
|
||||
# Load flats list
|
||||
flats_list = fetch.load_flats_list(housing_file)
|
||||
|
||||
# Do a first pass with the available infos to try to remove as much
|
||||
# unwanted postings as possible
|
||||
if config["passes"] > 0:
|
||||
flats_list, ignored_flats = flatisfy.filters.first_pass(flats_list,
|
||||
config)
|
||||
|
||||
# Do a second pass to consolidate all the infos we found
|
||||
if config["passes"] > 1:
|
||||
flats_list, extra_ignored_flats = flatisfy.filters.second_pass(
|
||||
flats_list, config
|
||||
)
|
||||
ignored_flats.extend(extra_ignored_flats)
|
||||
|
||||
return flats_list, ignored_flats
|
||||
|
||||
|
||||
def import_and_filter(config):
|
||||
"""
|
||||
Fetch the available flats list. Then, filter it according to criteria.
|
||||
Finally, store it in the database.
|
||||
|
||||
:param config: A config dict.
|
||||
:return: ``None``.
|
||||
"""
|
||||
# Fetch and filter flats list
|
||||
flats_list, purged_list = fetch_and_filter(config)
|
||||
# Create database connection
|
||||
get_session = database.init_db(config["database"])
|
||||
|
||||
with get_session() as session:
|
||||
for flat_dict in flats_list:
|
||||
flat = flat_model.Flat.from_dict(flat_dict)
|
||||
session.merge(flat)
|
||||
|
||||
for flat_dict in purged_list:
|
||||
flat = flat_model.Flat.from_dict(flat_dict)
|
||||
flat.status = flat_model.FlatStatus.purged
|
||||
session.merge(flat)
|
||||
|
||||
|
||||
def serve(config):
|
||||
"""
|
||||
Serve the web app.
|
||||
|
||||
:param config: A config dict.
|
||||
:return: ``None``, long-running process.
|
||||
"""
|
||||
app = web_app.get_app(config)
|
||||
# TODO: Make Bottle use logging module
|
||||
app.run(host=config["host"], port=config["port"])
|
208
flatisfy/config.py
Normal file
208
flatisfy/config.py
Normal file
@ -0,0 +1,208 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module handles the configuration management for Flatisfy.
|
||||
|
||||
It loads the default configuration, then overloads it with the provided config
|
||||
file and then overloads it with command-line options.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
from builtins import str
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
import appdirs
|
||||
|
||||
from flatisfy import tools
|
||||
|
||||
|
||||
# Default configuration
|
||||
DEFAULT_CONFIG = {
|
||||
# Flatboob queries to fetch
|
||||
"queries": [],
|
||||
# Constraints to match
|
||||
"constraints": {
|
||||
"postal_codes": [], # List of postal codes
|
||||
"area": (None, None), # (min, max) in m^2
|
||||
"cost": (None, None), # (min, max) in currency unit
|
||||
"rooms": (None, None), # (min, max)
|
||||
"bedrooms": (None, None), # (min, max)
|
||||
"time_to": {} # Dict mapping names to {"gps": [lat, lng],
|
||||
# "time": (min, max) }
|
||||
# Time is in seconds
|
||||
},
|
||||
# Navitia API key
|
||||
"navitia_api_key": None,
|
||||
# Number of filtering passes to run
|
||||
"passes": 2,
|
||||
# Maximum number of entries to fetch
|
||||
"max_entries": None,
|
||||
# Directory in wich data will be put. ``None`` is XDG default location.
|
||||
"data_directory": None,
|
||||
# SQLAlchemy URI to the database to use
|
||||
"database": None,
|
||||
# Web app port
|
||||
"port": 8080,
|
||||
# Web app host to listen on
|
||||
"host": "127.0.0.1"
|
||||
}
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def validate_config(config):
|
||||
"""
|
||||
Check that the config passed as argument is a valid configuration.
|
||||
|
||||
:param config: A config dictionary to fetch.
|
||||
:return: ``True`` if the configuration is valid, ``False`` otherwise.
|
||||
"""
|
||||
def _check_constraints_bounds(bounds):
|
||||
"""
|
||||
Check the bounds for numeric constraints.
|
||||
"""
|
||||
assert len(bounds) == 2
|
||||
assert all(
|
||||
x is None or
|
||||
(
|
||||
(isinstance(x, int) or isinstance(x, float)) and
|
||||
x >= 0
|
||||
)
|
||||
for x in bounds
|
||||
)
|
||||
if bounds[0] is not None and bounds[1] is not None:
|
||||
assert bounds[1] > bounds[0]
|
||||
|
||||
try:
|
||||
# Note: The traceback fetching code only handle single line asserts.
|
||||
# Then, we disable line-too-long pylint check and E501 flake8 checks
|
||||
# and use long lines whenever needed, in order to have the full assert
|
||||
# message in the log output.
|
||||
# pylint: disable=line-too-long
|
||||
assert "postal_codes" in config["constraints"]
|
||||
assert len(config["constraints"]["postal_codes"]) > 0
|
||||
|
||||
assert "area" in config["constraints"]
|
||||
_check_constraints_bounds(config["constraints"]["area"])
|
||||
|
||||
assert "cost" in config["constraints"]
|
||||
_check_constraints_bounds(config["constraints"]["cost"])
|
||||
|
||||
assert "rooms" in config["constraints"]
|
||||
_check_constraints_bounds(config["constraints"]["rooms"])
|
||||
|
||||
assert "bedrooms" in config["constraints"]
|
||||
_check_constraints_bounds(config["constraints"]["bedrooms"])
|
||||
|
||||
assert "time_to" in config["constraints"]
|
||||
assert isinstance(config["constraints"]["time_to"], dict)
|
||||
for name, item in config["constraints"]["time_to"].items():
|
||||
assert isinstance(name, str)
|
||||
assert "gps" in item
|
||||
assert isinstance(item["gps"], list)
|
||||
assert len(item["gps"]) == 2
|
||||
assert "time" in item
|
||||
_check_constraints_bounds(item["time"])
|
||||
|
||||
assert config["passes"] in [0, 1, 2]
|
||||
assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0) # noqa: E501
|
||||
|
||||
assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501
|
||||
|
||||
assert config["database"] is None or isinstance(config["database"], str) # noqa: E501
|
||||
|
||||
assert isinstance(config["port"], int)
|
||||
assert isinstance(config["host"], str)
|
||||
|
||||
return True
|
||||
except (AssertionError, KeyError):
|
||||
_, _, exc_traceback = sys.exc_info()
|
||||
return traceback.extract_tb(exc_traceback)[-1][-1]
|
||||
|
||||
|
||||
def load_config(args=None):
|
||||
"""
|
||||
Load the configuration from file.
|
||||
|
||||
:param args: An argparse args structure.
|
||||
:return: The loaded config dict.
|
||||
"""
|
||||
LOGGER.info("Initializing configuration...")
|
||||
# Default configuration
|
||||
config_data = DEFAULT_CONFIG.copy()
|
||||
|
||||
# Load config from specified JSON
|
||||
if args and getattr(args, "config", None):
|
||||
LOGGER.debug("Loading configuration from %s.", args.config)
|
||||
try:
|
||||
with open(args.config, "r") as fh:
|
||||
config_data.update(json.load(fh))
|
||||
except (IOError, ValueError):
|
||||
LOGGER.error(
|
||||
"Unable to load configuration from file, "
|
||||
"using default configuration."
|
||||
)
|
||||
|
||||
# Overload config with arguments
|
||||
if args and getattr(args, "passes", None) is not None:
|
||||
LOGGER.debug(
|
||||
"Overloading number of passes from CLI arguments: %d.",
|
||||
args.passes
|
||||
)
|
||||
config_data["passes"] = args.passes
|
||||
if args and getattr(args, "max_entries", None) is not None:
|
||||
LOGGER.debug(
|
||||
"Overloading maximum number of entries from CLI arguments: %d.",
|
||||
args.max_entries
|
||||
)
|
||||
config_data["max_entries"] = args.max_entries
|
||||
if args and getattr(args, "port", None) is not None:
|
||||
LOGGER.debug("Overloading web app port: %d.", args.port)
|
||||
config_data["port"] = args.port
|
||||
if args and getattr(args, "host", None) is not None:
|
||||
LOGGER.debug("Overloading web app host: %s.", args.host)
|
||||
config_data["host"] = str(args.host)
|
||||
|
||||
# Handle data_directory option
|
||||
if args and getattr(args, "data_dir", None) is not None:
|
||||
LOGGER.debug("Overloading data directory from CLI arguments.")
|
||||
config_data["data_directory"] = args.data_dir
|
||||
elif config_data["data_directory"] is None:
|
||||
config_data["data_directory"] = appdirs.user_data_dir(
|
||||
"flatisfy",
|
||||
"flatisfy"
|
||||
)
|
||||
LOGGER.debug("Using default XDG data directory: %s.",
|
||||
config_data["data_directory"])
|
||||
|
||||
if config_data["database"] is None:
|
||||
config_data["database"] = "sqlite:///" + os.path.join(
|
||||
config_data["data_directory"],
|
||||
"flatisfy.db"
|
||||
)
|
||||
|
||||
config_validation = validate_config(config_data)
|
||||
if config_validation is True:
|
||||
LOGGER.info("Config has been fully initialized.")
|
||||
return config_data
|
||||
else:
|
||||
LOGGER.error("Error in configuration: %s.", config_validation)
|
||||
return None
|
||||
|
||||
|
||||
def init_config(output=None):
|
||||
"""
|
||||
Initialize an empty configuration file.
|
||||
|
||||
:param output: File to output content to. Defaults to ``stdin``.
|
||||
"""
|
||||
config_data = DEFAULT_CONFIG.copy()
|
||||
|
||||
if output and output != "-":
|
||||
with open(output, "w") as fh:
|
||||
fh.write(tools.pretty_json(config_data))
|
||||
else:
|
||||
print(tools.pretty_json(config_data))
|
163
flatisfy/data.py
Normal file
163
flatisfy/data.py
Normal file
@ -0,0 +1,163 @@
|
||||
# coding : utf-8
|
||||
"""
|
||||
This module contains all the code related to building necessary data files from
|
||||
the source opendata files.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import collections
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
import flatisfy.exceptions
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
|
||||
def _preprocess_ratp(output_dir):
|
||||
"""
|
||||
Build RATP file from the RATP data.
|
||||
|
||||
:param output_dir: Directory in which the output file should reside.
|
||||
:return: ``True`` on successful build, ``False`` otherwise.
|
||||
"""
|
||||
ratp_data_raw = []
|
||||
# Load opendata file
|
||||
try:
|
||||
with open(os.path.join(MODULE_DIR, "data_files/ratp.json"), "r") as fh:
|
||||
ratp_data_raw = json.load(fh)
|
||||
except (IOError, ValueError):
|
||||
LOGGER.error("Invalid raw RATP opendata file.")
|
||||
return False
|
||||
|
||||
# Process it
|
||||
ratp_data = collections.defaultdict(list)
|
||||
for item in ratp_data_raw:
|
||||
stop_name = item["fields"]["stop_name"].lower()
|
||||
ratp_data[stop_name].append(item["fields"]["coord"])
|
||||
|
||||
# Output it
|
||||
with open(os.path.join(output_dir, "ratp.json"), "w") as fh:
|
||||
json.dump(ratp_data, fh)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _preprocess_laposte(output_dir):
|
||||
"""
|
||||
Build JSON files from the postal codes data.
|
||||
|
||||
:param output_dir: Directory in which the output file should reside.
|
||||
:return: ``True`` on successful build, ``False`` otherwise.
|
||||
"""
|
||||
raw_laposte_data = []
|
||||
# Load opendata file
|
||||
try:
|
||||
with open(
|
||||
os.path.join(MODULE_DIR, "data_files/laposte.json"), "r"
|
||||
) as fh:
|
||||
raw_laposte_data = json.load(fh)
|
||||
except (IOError, ValueError):
|
||||
LOGGER.error("Invalid raw LaPoste opendata file.")
|
||||
return False
|
||||
|
||||
# Build postal codes to other infos file
|
||||
postal_codes_data = {}
|
||||
for item in raw_laposte_data:
|
||||
try:
|
||||
postal_codes_data[item["fields"]["code_postal"]] = {
|
||||
"gps": item["fields"]["coordonnees_gps"],
|
||||
"nom": item["fields"]["nom_de_la_commune"].title()
|
||||
}
|
||||
except KeyError:
|
||||
LOGGER.info("Missing data for postal code %s, skipping it.",
|
||||
item["fields"]["code_postal"])
|
||||
with open(os.path.join(output_dir, "postal_codes.json"), "w") as fh:
|
||||
json.dump(postal_codes_data, fh)
|
||||
|
||||
# Build city name to postal codes and other infos file
|
||||
cities_data = {}
|
||||
for item in raw_laposte_data:
|
||||
try:
|
||||
cities_data[item["fields"]["nom_de_la_commune"].title()] = {
|
||||
"gps": item["fields"]["coordonnees_gps"],
|
||||
"postal_code": item["fields"]["code_postal"]
|
||||
}
|
||||
except KeyError:
|
||||
LOGGER.info("Missing data for city %s, skipping it.",
|
||||
item["fields"]["nom_de_la_commune"])
|
||||
with open(os.path.join(output_dir, "cities.json"), "w") as fh:
|
||||
json.dump(cities_data, fh)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def preprocess_data(config, force=False):
|
||||
"""
|
||||
Ensures that all the necessary data files have been built from the raw
|
||||
opendata files.
|
||||
|
||||
:params config: A config dictionary.
|
||||
:params force: Whether to force rebuild or not.
|
||||
"""
|
||||
LOGGER.debug("Data directory is %s.", config["data_directory"])
|
||||
opendata_directory = os.path.join(config["data_directory"], "opendata")
|
||||
try:
|
||||
LOGGER.info("Ensuring the data directory exists.")
|
||||
os.makedirs(opendata_directory)
|
||||
LOGGER.debug("Created opendata directory at %s.", opendata_directory)
|
||||
except OSError:
|
||||
LOGGER.debug("Opendata directory already existed, doing nothing.")
|
||||
|
||||
is_built_ratp = os.path.isfile(
|
||||
os.path.join(opendata_directory, "ratp.json")
|
||||
)
|
||||
if not is_built_ratp or force:
|
||||
LOGGER.info("Building from RATP data.")
|
||||
if not _preprocess_ratp(opendata_directory):
|
||||
raise flatisfy.exceptions.DataBuildError("Error with RATP data.")
|
||||
|
||||
is_built_laposte = (
|
||||
os.path.isfile(os.path.join(opendata_directory, "cities.json")) and
|
||||
os.path.isfile(os.path.join(opendata_directory, "postal_codes.json"))
|
||||
)
|
||||
if not is_built_laposte or force:
|
||||
LOGGER.info("Building from LaPoste data.")
|
||||
if not _preprocess_laposte(opendata_directory):
|
||||
raise flatisfy.exceptions.DataBuildError(
|
||||
"Error with LaPoste data."
|
||||
)
|
||||
|
||||
|
||||
def load_data(data_type, config):
|
||||
"""
|
||||
Load a given built data file.
|
||||
|
||||
:param data_type: A valid data identifier.
|
||||
:param config: A config dictionary.
|
||||
:return: The loaded data. ``None`` if the query is incorrect.
|
||||
"""
|
||||
if data_type not in ["postal_codes", "cities", "ratp"]:
|
||||
LOGGER.error("Invalid request. No %s data file.", data_type)
|
||||
return None
|
||||
|
||||
opendata_directory = os.path.join(config["data_directory"], "opendata")
|
||||
datafile_path = os.path.join(opendata_directory, "%s.json" % data_type)
|
||||
data = {}
|
||||
try:
|
||||
with open(datafile_path, "r") as fh:
|
||||
data = json.load(fh)
|
||||
except IOError:
|
||||
LOGGER.error("No such data file: %s.", datafile_path)
|
||||
return None
|
||||
except ValueError:
|
||||
LOGGER.error("Invalid JSON data file: %s.", datafile_path)
|
||||
return None
|
||||
|
||||
if len(data) == 0:
|
||||
LOGGER.warning("Loading empty data for %s.", data_type)
|
||||
|
||||
return data
|
1
flatisfy/data_files/laposte.json
Normal file
1
flatisfy/data_files/laposte.json
Normal file
File diff suppressed because one or more lines are too long
64
flatisfy/database/__init__.py
Normal file
64
flatisfy/database/__init__.py
Normal file
@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains functions related to the database.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import sqlite3
|
||||
|
||||
from contextlib import contextmanager
|
||||
|
||||
from sqlalchemy import event, create_engine
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
import flatisfy.models.flat # noqa: F401
|
||||
from flatisfy.database.base import BASE
|
||||
|
||||
|
||||
@event.listens_for(Engine, "connect")
|
||||
def set_sqlite_pragma(dbapi_connection, _):
|
||||
"""
|
||||
Auto enable foreign keys for SQLite.
|
||||
"""
|
||||
# Play well with other DB backends
|
||||
if isinstance(dbapi_connection, sqlite3.Connection):
|
||||
cursor = dbapi_connection.cursor()
|
||||
cursor.execute("PRAGMA foreign_keys=ON")
|
||||
cursor.close()
|
||||
|
||||
|
||||
def init_db(database_uri=None):
|
||||
"""
|
||||
Initialize the database, ensuring tables exist etc.
|
||||
|
||||
:param database_uri: An URI describing an engine to use. Defaults to
|
||||
in-memory SQLite database.
|
||||
:return: A tuple of an SQLAlchemy session maker and the created engine.
|
||||
"""
|
||||
if database_uri is None:
|
||||
database_uri = "sqlite:///:memory:"
|
||||
|
||||
engine = create_engine(database_uri)
|
||||
BASE.metadata.create_all(engine, checkfirst=True)
|
||||
Session = sessionmaker(bind=engine) # pylint: disable=invalid-name
|
||||
|
||||
@contextmanager
|
||||
def get_session():
|
||||
"""
|
||||
Provide a transactional scope around a series of operations.
|
||||
|
||||
From [1].
|
||||
[1]: http://docs.sqlalchemy.org/en/latest/orm/session_basics.html#when-do-i-construct-a-session-when-do-i-commit-it-and-when-do-i-close-it.
|
||||
"""
|
||||
session = Session()
|
||||
try:
|
||||
yield session
|
||||
session.commit()
|
||||
except:
|
||||
session.rollback()
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
return get_session
|
10
flatisfy/database/base.py
Normal file
10
flatisfy/database/base.py
Normal file
@ -0,0 +1,10 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains the definition of the declarative SQLAlchemy base.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
|
||||
BASE = declarative_base()
|
48
flatisfy/database/types.py
Normal file
48
flatisfy/database/types.py
Normal file
@ -0,0 +1,48 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This modules implements custom types in SQLAlchemy.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
import sqlalchemy.types as types
|
||||
|
||||
|
||||
class StringyJSON(types.TypeDecorator):
|
||||
"""
|
||||
Stores and retrieves JSON as TEXT for SQLite.
|
||||
|
||||
From
|
||||
https://avacariu.me/articles/2016/compiling-json-as-text-for-sqlite-with-sqlalchemy.
|
||||
|
||||
.. note :: The associated field is immutable. That is, changes to the data
|
||||
(typically, changing the value of a dict field) will not trigger an update
|
||||
on the SQL side upon ``commit`` as the reference to the object will not
|
||||
have been updated. One should force the update by forcing an update of the
|
||||
reference (by performing a ``copy`` operation on the dict for instance).
|
||||
"""
|
||||
|
||||
impl = types.TEXT
|
||||
|
||||
def process_bind_param(self, value, dialect):
|
||||
"""
|
||||
TODO
|
||||
"""
|
||||
if value is not None:
|
||||
value = json.dumps(value)
|
||||
return value
|
||||
|
||||
def process_result_value(self, value, dialect):
|
||||
"""
|
||||
TODO
|
||||
"""
|
||||
if value is not None:
|
||||
value = json.loads(value)
|
||||
return value
|
||||
|
||||
|
||||
# TypeEngine.with_variant says "use StringyJSON instead when
|
||||
# connecting to 'sqlite'"
|
||||
# pylint: disable=invalid-name
|
||||
MagicJSON = types.JSON().with_variant(StringyJSON, 'sqlite')
|
13
flatisfy/exceptions.py
Normal file
13
flatisfy/exceptions.py
Normal file
@ -0,0 +1,13 @@
|
||||
# coding : utf-8
|
||||
"""
|
||||
This module contains all the exceptions definitions for the Flatisfy-specific
|
||||
exceptions.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
|
||||
class DataBuildError(Exception):
|
||||
"""
|
||||
Error occurring on building a data file.
|
||||
"""
|
||||
pass
|
76
flatisfy/fetch.py
Normal file
76
flatisfy/fetch.py
Normal file
@ -0,0 +1,76 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
This module contains all the code related to fetching and loading flats lists.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fetch_flats_list(config):
|
||||
"""
|
||||
Fetch the available flats using the Flatboob / Weboob config.
|
||||
|
||||
:param config: A config dict.
|
||||
:return: A list of all available flats.
|
||||
"""
|
||||
flats_list = []
|
||||
for query in config["queries"]:
|
||||
max_entries = config["max_entries"]
|
||||
if max_entries is None:
|
||||
max_entries = 0
|
||||
|
||||
LOGGER.info("Loading flats from query %s.", query)
|
||||
flatboob_output = subprocess.check_output(
|
||||
["../weboob/tools/local_run.sh", |