31 changed files with 2518 additions and 131 deletions
@ -0,0 +1,407 @@
@@ -0,0 +1,407 @@
|
||||
[MASTER] |
||||
|
||||
# Specify a configuration file. |
||||
#rcfile= |
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as |
||||
# pygtk.require(). |
||||
#init-hook= |
||||
|
||||
# Add files or directories to the blacklist. They should be base names, not |
||||
# paths. |
||||
ignore=CVS |
||||
|
||||
# Add files or directories matching the regex patterns to the blacklist. The |
||||
# regex matches against base names, not paths. |
||||
ignore-patterns= |
||||
|
||||
# Pickle collected data for later comparisons. |
||||
persistent=yes |
||||
|
||||
# List of plugins (as comma separated values of python modules names) to load, |
||||
# usually to register additional checkers. |
||||
load-plugins= |
||||
|
||||
# Use multiple processes to speed up Pylint. |
||||
jobs=1 |
||||
|
||||
# Allow loading of arbitrary C extensions. Extensions are imported into the |
||||
# active Python interpreter and may run arbitrary code. |
||||
unsafe-load-any-extension=no |
||||
|
||||
# A comma-separated list of package or module names from where C extensions may |
||||
# be loaded. Extensions are loading into the active Python interpreter and may |
||||
# run arbitrary code |
||||
extension-pkg-whitelist= |
||||
|
||||
# Allow optimization of some AST trees. This will activate a peephole AST |
||||
# optimizer, which will apply various small optimizations. For instance, it can |
||||
# be used to obtain the result of joining multiple strings with the addition |
||||
# operator. Joining a lot of strings can lead to a maximum recursion error in |
||||
# Pylint and this flag can prevent that. It has one side effect, the resulting |
||||
# AST will be different than the one from reality. This option is deprecated |
||||
# and it will be removed in Pylint 2.0. |
||||
optimize-ast=no |
||||
|
||||
|
||||
[MESSAGES CONTROL] |
||||
|
||||
# Only show warnings with the listed confidence levels. Leave empty to show |
||||
# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED |
||||
confidence= |
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can |
||||
# either give multiple identifier separated by comma (,) or put this option |
||||
# multiple time (only on the command line, not in the configuration file where |
||||
# it should appear only once). See also the "--disable" option for examples. |
||||
#enable= |
||||
|
||||
# Disable the message, report, category or checker with the given id(s). You |
||||
# can either give multiple identifiers separated by comma (,) or put this |
||||
# option multiple times (only on the command line, not in the configuration |
||||
# file where it should appear only once).You can also use "--disable=all" to |
||||
# disable everything first and then reenable specific checks. For example, if |
||||
# you want to run only the similarities checker, you can use "--disable=all |
||||
# --enable=similarities". If you want to run only the classes checker, but have |
||||
# no Warning level messages displayed, use"--disable=all --enable=classes |
||||
# --disable=W" |
||||
disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating |
||||
|
||||
|
||||
[REPORTS] |
||||
|
||||
# Set the output format. Available formats are text, parseable, colorized, msvs |
||||
# (visual studio) and html. You can also give a reporter class, eg |
||||
# mypackage.mymodule.MyReporterClass. |
||||
output-format=text |
||||
|
||||
# Put messages in a separate file for each module / package specified on the |
||||
# command line instead of printing them on stdout. Reports (if any) will be |
||||
# written in a file name "pylint_global.[txt|html]". This option is deprecated |
||||
# and it will be removed in Pylint 2.0. |
||||
files-output=no |
||||
|
||||
# Tells whether to display a full report or only the messages |
||||
reports=yes |
||||
|
||||
# Python expression which should return a note less than 10 (10 is the highest |
||||
# note). You have access to the variables errors warning, statement which |
||||
# respectively contain the number of errors / warnings messages and the total |
||||
# number of statements analyzed. This is used by the global evaluation report |
||||
# (RP0004). |
||||
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) |
||||
|
||||
# Template used to display messages. This is a python new-style format string |
||||
# used to format the message information. See doc for all details |
||||
#msg-template= |
||||
|
||||
|
||||
[BASIC] |
||||
|
||||
# Good variable names which should always be accepted, separated by a comma |
||||
good-names=i,j,k,ex,Run,_,fh |
||||
|
||||
# Bad variable names which should always be refused, separated by a comma |
||||
bad-names=foo,bar,baz,toto,tutu,tata |
||||
|
||||
# Colon-delimited sets of names that determine each other's naming style when |
||||
# the name regexes allow several styles. |
||||
name-group= |
||||
|
||||
# Include a hint for the correct naming format with invalid-name |
||||
include-naming-hint=no |
||||
|
||||
# List of decorators that produce properties, such as abc.abstractproperty. Add |
||||
# to this list to register other decorators that produce valid properties. |
||||
property-classes=abc.abstractproperty |
||||
|
||||
# Regular expression matching correct function names |
||||
function-rgx=[a-z_][a-z0-9_]{2,30}$ |
||||
|
||||
# Naming hint for function names |
||||
function-name-hint=[a-z_][a-z0-9_]{2,30}$ |
||||
|
||||
# Regular expression matching correct variable names |
||||
variable-rgx=[a-z_][a-z0-9_]{2,30}$ |
||||
|
||||
# Naming hint for variable names |
||||
variable-name-hint=[a-z_][a-z0-9_]{2,30}$ |
||||
|
||||
# Regular expression matching correct constant names |
||||
const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ |
||||
|
||||
# Naming hint for constant names |
||||
const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ |
||||
|
||||
# Regular expression matching correct attribute names |
||||
attr-rgx=[a-z_][a-z0-9_]{2,30}$ |
||||
|
||||
# Naming hint for attribute names |
||||
attr-name-hint=[a-z_][a-z0-9_]{2,30}$ |
||||
|
||||
# Regular expression matching correct argument names |
||||
argument-rgx=[a-z_][a-z0-9_]{2,30}$ |
||||
|
||||
# Naming hint for argument names |
||||
argument-name-hint=[a-z_][a-z0-9_]{2,30}$ |
||||
|
||||
# Regular expression matching correct class attribute names |
||||
class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ |
||||
|
||||
# Naming hint for class attribute names |
||||
class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ |
||||
|
||||
# Regular expression matching correct inline iteration names |
||||
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ |
||||
|
||||
# Naming hint for inline iteration names |
||||
inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ |
||||
|
||||
# Regular expression matching correct class names |
||||
class-rgx=[A-Z_][a-zA-Z0-9]+$ |
||||
|
||||
# Naming hint for class names |
||||
class-name-hint=[A-Z_][a-zA-Z0-9]+$ |
||||
|
||||
# Regular expression matching correct module names |
||||
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ |
||||
|
||||
# Naming hint for module names |
||||
module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ |
||||
|
||||
# Regular expression matching correct method names |
||||
method-rgx=[a-z_][a-z0-9_]{2,30}$ |
||||
|
||||
# Naming hint for method names |
||||
method-name-hint=[a-z_][a-z0-9_]{2,30}$ |
||||
|
||||
# Regular expression which should only match function or class names that do |
||||
# not require a docstring. |
||||
no-docstring-rgx=^_ |
||||
|
||||
# Minimum line length for functions/classes that require docstrings, shorter |
||||
# ones are exempt. |
||||
docstring-min-length=-1 |
||||
|
||||
|
||||
[ELIF] |
||||
|
||||
# Maximum number of nested blocks for function / method body |
||||
max-nested-blocks=5 |
||||
|
||||
|
||||
[FORMAT] |
||||
|
||||
# Maximum number of characters on a single line. |
||||
max-line-length=100 |
||||
|
||||
# Regexp for a line that is allowed to be longer than the limit. |
||||
ignore-long-lines=^\s*(# )?<?https?://\S+>?$ |
||||
|
||||
# Allow the body of an if to be on the same line as the test if there is no |
||||
# else. |
||||
single-line-if-stmt=no |
||||
|
||||
# List of optional constructs for which whitespace checking is disabled. `dict- |
||||
# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. |
||||
# `trailing-comma` allows a space between comma and closing bracket: (a, ). |
||||
# `empty-line` allows space-only lines. |
||||
no-space-check=trailing-comma,dict-separator |
||||
|
||||
# Maximum number of lines in a module |
||||
max-module-lines=1000 |
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 |
||||
# tab). |
||||
indent-string=' ' |
||||
|
||||
# Number of spaces of indent required inside a hanging or continued line. |
||||
indent-after-paren=4 |
||||
|
||||
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. |
||||
expected-line-ending-format= |
||||
|
||||
|
||||
[LOGGING] |
||||
|
||||
# Logging modules to check that the string format arguments are in logging |
||||
# function parameter format |
||||
logging-modules=logging |
||||
|
||||
|
||||
[MISCELLANEOUS] |
||||
|
||||
# List of note tags to take in consideration, separated by a comma. |
||||
notes=FIXME,XXX,TODO |
||||
|
||||
|
||||
[SIMILARITIES] |
||||
|
||||
# Minimum lines number of a similarity. |
||||
min-similarity-lines=4 |
||||
|
||||
# Ignore comments when computing similarities. |
||||
ignore-comments=yes |
||||
|
||||
# Ignore docstrings when computing similarities. |
||||
ignore-docstrings=yes |
||||
|
||||
# Ignore imports when computing similarities. |
||||
ignore-imports=no |
||||
|
||||
|
||||
[SPELLING] |
||||
|
||||
# Spelling dictionary name. Available dictionaries: none. To make it working |
||||
# install python-enchant package. |
||||
spelling-dict= |
||||
|
||||
# List of comma separated words that should not be checked. |
||||
spelling-ignore-words= |
||||
|
||||
# A path to a file that contains private dictionary; one word per line. |
||||
spelling-private-dict-file= |
||||
|
||||
# Tells whether to store unknown words to indicated private dictionary in |
||||
# --spelling-private-dict-file option instead of raising a message. |
||||
spelling-store-unknown-words=no |
||||
|
||||
|
||||
[TYPECHECK] |
||||
|
||||
# Tells whether missing members accessed in mixin class should be ignored. A |
||||
# mixin class is detected if its name ends with "mixin" (case insensitive). |
||||
ignore-mixin-members=yes |
||||
|
||||
# List of module names for which member attributes should not be checked |
||||
# (useful for modules/projects where namespaces are manipulated during runtime |
||||
# and thus existing member attributes cannot be deduced by static analysis. It |
||||
# supports qualified module names, as well as Unix pattern matching. |
||||
ignored-modules= |
||||
|
||||
# List of class names for which member attributes should not be checked (useful |
||||
# for classes with dynamically set attributes). This supports the use of |
||||
# qualified names. |
||||
ignored-classes=optparse.Values,thread._local,_thread._local |
||||
|
||||
# List of members which are set dynamically and missed by pylint inference |
||||
# system, and so shouldn't trigger E1101 when accessed. Python regular |
||||
# expressions are accepted. |
||||
generated-members= |
||||
|
||||
# List of decorators that produce context managers, such as |
||||
# contextlib.contextmanager. Add to this list to register other decorators that |
||||
# produce valid context managers. |
||||
contextmanager-decorators=contextlib.contextmanager |
||||
|
||||
|
||||
[VARIABLES] |
||||
|
||||
# Tells whether we should check for unused import in __init__ files. |
||||
init-import=no |
||||
|
||||
# A regular expression matching the name of dummy variables (i.e. expectedly |
||||
# not used). |
||||
dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy |
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that |
||||
# you should avoid to define new builtins when possible. |
||||
additional-builtins= |
||||
|
||||
# List of strings which can identify a callback function by name. A callback |
||||
# name must start or end with one of those strings. |
||||
callbacks=cb_,_cb |
||||
|
||||
# List of qualified module names which can have objects that can redefine |
||||
# builtins. |
||||
redefining-builtins-modules=six.moves,future.builtins,builtins |
||||
|
||||
|
||||
[CLASSES] |
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes. |
||||
defining-attr-methods=__init__,__new__,setUp |
||||
|
||||
# List of valid names for the first argument in a class method. |
||||
valid-classmethod-first-arg=cls |
||||
|
||||
# List of valid names for the first argument in a metaclass class method. |
||||
valid-metaclass-classmethod-first-arg=mcs |
||||
|
||||
# List of member names, which should be excluded from the protected access |
||||
# warning. |
||||
exclude-protected=_asdict,_fields,_replace,_source,_make |
||||
|
||||
|
||||
[DESIGN] |
||||
|
||||
# Maximum number of arguments for function / method |
||||
max-args=5 |
||||
|
||||
# Argument names that match this expression will be ignored. Default to name |
||||
# with leading underscore |
||||
ignored-argument-names=_.* |
||||
|
||||
# Maximum number of locals for function / method body |
||||
max-locals=15 |
||||
|
||||
# Maximum number of return / yield for function / method body |
||||
max-returns=6 |
||||
|
||||
# Maximum number of branch for function / method body |
||||
max-branches=12 |
||||
|
||||
# Maximum number of statements in function / method body |
||||
max-statements=50 |
||||
|
||||
# Maximum number of parents for a class (see R0901). |
||||
max-parents=7 |
||||
|
||||
# Maximum number of attributes for a class (see R0902). |
||||
max-attributes=7 |
||||
|
||||
# Minimum number of public methods for a class (see R0903). |
||||
min-public-methods=2 |
||||
|
||||
# Maximum number of public methods for a class (see R0904). |
||||
max-public-methods=20 |
||||
|
||||
# Maximum number of boolean expressions in a if statement |
||||
max-bool-expr=5 |
||||
|
||||
|
||||
[IMPORTS] |
||||
|
||||
# Deprecated modules which should not be used, separated by a comma |
||||
deprecated-modules=regsub,TERMIOS,Bastion,rexec |
||||
|
||||
# Create a graph of every (i.e. internal and external) dependencies in the |
||||
# given file (report RP0402 must not be disabled) |
||||
import-graph= |
||||
|
||||
# Create a graph of external dependencies in the given file (report RP0402 must |
||||
# not be disabled) |
||||
ext-import-graph= |
||||
|
||||
# Create a graph of internal dependencies in the given file (report RP0402 must |
||||
# not be disabled) |
||||
int-import-graph= |
||||
|
||||
# Force import order to recognize a module as part of the standard |
||||
# compatibility libraries. |
||||
known-standard-library= |
||||
|
||||
# Force import order to recognize a module as part of a third party library. |
||||
known-third-party=enchant |
||||
|
||||
# Analyse import fallback blocks. This can be used to support both Python 2 and |
||||
# 3 compatible code, which means that the block might have code that exists |
||||
# only in one or another interpreter, leading to false positives when analysed. |
||||
analyse-fallback-blocks=no |
||||
|
||||
|
||||
[EXCEPTIONS] |
||||
|
||||
# Exceptions that will emit a warning when being caught. Defaults to |
||||
# "Exception" |
||||
overgeneral-exceptions=Exception |
@ -1,3 +1,6 @@
@@ -1,3 +1,6 @@
|
||||
build |
||||
*.json |
||||
config.py |
||||
*.pyc |
||||
*.swp |
||||
*.swo |
||||
*.db |
||||
|
@ -0,0 +1,21 @@
@@ -0,0 +1,21 @@
|
||||
The MIT License (MIT) |
||||
|
||||
Copyright (c) 2017 Phyks (Lucas Verney) |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
of this software and associated documentation files (the "Software"), to deal |
||||
in the Software without restriction, including without limitation the rights |
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
copies of the Software, and to permit persons to whom the Software is |
||||
furnished to do so, subject to the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be included in all |
||||
copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
SOFTWARE. |
@ -0,0 +1,115 @@
@@ -0,0 +1,115 @@
|
||||
Flatisfy |
||||
======== |
||||
|
||||
Flatisfy is your new companion to ease your search of a new housing :) |
||||
|
||||
|
||||
It uses [Weboob](http://weboob.org/) to get all the housing posts on most of |
||||
the websites offering housings posts, and then offers a bunch of pipelines to |
||||
filter and deduplicate the fetched housings. |
||||
|
||||
|
||||
It can be used as a command-line utility, but also exposes a web API and |
||||
visualisation, to browse through the results. |
||||
|
||||
|
||||
_Note_: It is targeted at French users (due to the currently supported |
||||
websites), and in particular at people living close to Paris, as I developped |
||||
it for my personal use, and am currently living in Paris :) Any feedback and |
||||
merge requests to better support other countries / cities are more than |
||||
welcome! |
||||
|
||||
_Note_: In this repository and across the code, I am using the name "flat". I |
||||
use it as a placeholder for "housing" and consider both are interchangeable. |
||||
This code is not restricted to handling flats only! |
||||
|
||||
|
||||
## Getting started |
||||
|
||||
1. Clone the repository. |
||||
2. Install required Python modules: `pip install -r requirements.txt`. |
||||
3. Init a configuration file: `python -m flatisfy init-config > config.json`. |
||||
Edit it according to your needs (see below). |
||||
4. Build the required data files: |
||||
`python -m flatisfy build-data --config config.json`. |
||||
5. Use it to `fetch` (and output a filtered JSON list of flats) or `import` |
||||
(into an SQLite database, for the web visualization) a list of flats |
||||
matching your criteria. |
||||
6. Use `python -m flatisfy serve --config config.json` to serve the web app. |
||||
|
||||
|
||||
## Configuration |
||||
|
||||
List of configuration options: |
||||
|
||||
* `data_directory` is the directory in which you want data files to be stored. |
||||
`null` is the default value and means default `XDG` location (typically |
||||
`~/.local/share/flatisfy/`) |
||||
* `max_entries` is the maximum number of entries to fetch **per Weboob |
||||
backend** (that is per housing website). |
||||
* `passes` is the number of passes to run on the data. First pass is a basic |
||||
filtering and using only the informations from the housings list page. |
||||
Second pass loads any possible information about the filtered flats and does |
||||
better filtering. |
||||
* `queries` is a list of queries defined in `flatboob` that should be fetched. |
||||
* `database` is an SQLAlchemy URI to a database file. Defaults to `null` which |
||||
means that it will store the database in the default location, in |
||||
`data_directory`. |
||||
* `navitia_api_key` is an API token for [Navitia](https://www.navitia.io/) |
||||
which is required to compute travel times. |
||||
|
||||
### Constraints |
||||
|
||||
You can specify constraints, under the `constraints` key. The available |
||||
constraints are: |
||||
|
||||
* `area` (in mยฒ), `bedrooms`, `cost` (in currency unit), `rooms`: this is a |
||||
tuple of `(min, max)` values, defining an interval in which the value should |
||||
lie. A `null` value means that any value is within this bound. |
||||
* `postal_codes` is a list of allowed postal codes. You should include any |
||||
postal code you want, and especially the postal codes close to the precise |
||||
location you want. You MUST provide some postal codes. |
||||
* `time_to` is a dictionary of places to compute travel time to them. |
||||
Typically, |
||||
``` |
||||
"time_to": { |
||||
"foobar": { |
||||
"gps": [LAT, LNG], |
||||
"time": [min, max] |
||||
} |
||||
} |
||||
``` |
||||
means that the housings must be between the `min` and `max` bounds (possibly |
||||
`null`) from the place identified by the GPS coordinates `LAT` and `LNG` |
||||
(latitude and longitude), and we call this place `foobar` in human-readable |
||||
form. Beware that `time` constraints are in **seconds**. |
||||
|
||||
|
||||
## OpenData |
||||
|
||||
I am using the following datasets, available under `flatisfy/data_files`, |
||||
which covers Paris. If you want to run the script using some other location, |
||||
you might have to change these files by matching datasets. |
||||
|
||||
* [LaPoste Hexasmal](https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5) for the list of cities and postal codes in France. |
||||
* [RATP stations](https://data.ratp.fr/explore/dataset/positions-geographiques-des-stations-du-reseau-ratp/table/?disjunctive.stop_name&disjunctive.code_postal&disjunctive.departement) for the list of subway stations with their positions in Paris and nearby areas. |
||||
|
||||
Both datasets are licensed under the Open Data Commons Open Database License |
||||
(ODbL): https://opendatacommons.org/licenses/odbl/. |
||||
|
||||
|
||||
## License |
||||
|
||||
The content of this repository is licensed under an MIT license, unless |
||||
explicitly mentionned otherwise. |
||||
|
||||
|
||||
## Thanks |
||||
|
||||
* [Weboob](http://weboob.org/) |
||||
* The OpenData providers listed above! |
||||
* Navitia for their really cool public transportation API. |
||||
* A lots of Python modules, required for this script (see `requirements.txt`). |
||||
* [Kresus](https://framagit.org/bnjbvr/kresus) which gave me part of the |
||||
original idea (at least proved me such software based on scraping can |
||||
achieve a high quality level :) |
@ -1,130 +0,0 @@
@@ -1,130 +0,0 @@
|
||||
# coding: utf-8 |
||||
#!/usr/bin/env python3 |
||||
import json |
||||
import os |
||||
import subprocess |
||||
import sys |
||||
|
||||
from fuzzywuzzy import process as fuzzyprocess |
||||
|
||||
import config |
||||
|
||||
|
||||
def pretty_json(json_str): |
||||
return json.dumps(json_str, indent=4, separators=(',', ': '), |
||||
sort_keys=True) |
||||
|
||||
|
||||
def preprocess_data(): |
||||
if not os.path.isdir("build"): |
||||
os.mkdir("build") |
||||
|
||||
if not os.path.isfile("build/ratp.json"): |
||||
ratp_data = [] |
||||
with open("data/ratp.json", "r") as fh: |
||||
ratp_data = json.load(fh) |
||||
ratp_data = sorted( |
||||
list(set( |
||||
x["fields"]["stop_name"].lower() for x in ratp_data |
||||
)) |
||||
) |
||||
with open("build/ratp.json", "w") as fh: |
||||
fh.write(pretty_json(ratp_data)) |
||||
|
||||
|
||||
def fetch_flats_list(): |
||||
flats_list = [] |
||||
for query in config.QUERIES: |
||||
flatboob_output = subprocess.check_output( |
||||
["flatboob", "-n", "0", "-f", "json", "load", query] |
||||
) |
||||
flats_list.extend(json.loads(flatboob_output)) |
||||
return flats_list |
||||
|
||||
|
||||
def remove_duplicates(flats_list): |
||||
unique_flats_list = [] |
||||
ids = [] |
||||
for flat in flats_list: |
||||
if flat["id"] in ids: |
||||
continue |
||||
ids.append(id) |
||||
unique_flats_list.append(flat) |
||||
return unique_flats_list |
||||
|
||||
|
||||
def sort_by(flats_list, key="cost"): |
||||
return sorted(flats_list, key=lambda x: x["cost"]) |
||||
|
||||
|
||||
def refine_params(flats_list): |
||||
def filter_conditions(x): |
||||
is_ok = True |
||||
if "cost" in x: |
||||
cost = x["cost"] |
||||
is_ok = ( |
||||
is_ok and |
||||
(cost < config.PARAMS["max_cost"] and |
||||
cost > config.PARAMS["min_cost"]) |
||||
) |
||||
if "area" in x: |
||||
area = x["area"] |
||||
is_ok = ( |
||||
is_ok and |
||||
(area < config.PARAMS["max_area"] and |
||||
area > config.PARAMS["min_area"]) |
||||
) |
||||
return is_ok |
||||
|
||||
return filter(filter_conditions, flats_list) |
||||
|
||||
|
||||
def match_ratp(flats_list): |
||||
ratp_stations = [] |
||||
with open("build/ratp.json", "r") as fh: |
||||
ratp_stations = json.load(fh) |
||||
|
||||
for flat in flats_list: |
||||
if "station" in flat and flat["station"]: |
||||
# There is some station fetched by flatboob, try to match it |
||||
flat["ratp_station"] = fuzzyprocess.extractOne( |
||||
flat["station"], ratp_stations |
||||
) |
||||
# TODO: Cross-check station location to choose the best fit |
||||
|
||||
return flats_list |
||||
|
||||
|
||||
def main(dumpfile=None): |
||||
if dumpfile is None: |
||||
flats_list = fetch_flats_list() |
||||
else: |
||||
with open(dumpfile, "r") as fh: |
||||
flats_list = json.load(fh) |
||||
|
||||
# First pass |
||||
flats_list = remove_duplicates(flats_list) |
||||
flats_list = sort_by(flats_list, "cost") |
||||
flats_list = refine_params(flats_list) |
||||
|
||||
# TODO: flats_list = match_ratp(flats_list) |
||||
|
||||
# TODO: Second pass, loading additional infos for each entry |
||||
|
||||
return flats_list |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
if len(sys.argv) > 1: |
||||
dumpfile = sys.argv[1] |
||||
else: |
||||
dumpfile = None |
||||
|
||||
try: |
||||
preprocess_data() |
||||
flats_list = main(dumpfile) |
||||
print( |
||||
pretty_json(flats_list) |
||||
) |
||||
except KeyboardInterrupt: |
||||
pass |
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
# coding: utf-8 |
||||
""" |
||||
``Flatisfy`` is a tool to help you find a new housing based on some criteria. |
||||
""" |
||||
__version__ = "0.1" |
@ -0,0 +1,176 @@
@@ -0,0 +1,176 @@
|
||||
# coding: utf-8 |
||||
""" |
||||
Main entry point of the Flatisfy code. |
||||
""" |
||||
from __future__ import absolute_import, print_function, unicode_literals |
||||
|
||||
import argparse |
||||
import logging |
||||
import sys |
||||
|
||||
import flatisfy.config |
||||
from flatisfy import cmds |
||||
from flatisfy import data |
||||
from flatisfy import tools |
||||
|
||||
|
||||
LOGGER = logging.getLogger("flatisfy") |
||||
|
||||
|
||||
def parse_args(argv=None): |
||||
""" |
||||
Create parser and parse arguments. |
||||
""" |
||||
parser = argparse.ArgumentParser(prog="Flatisfy", |
||||
description="Find the perfect flat.") |
||||
|
||||
# Parent parser containing arguments common to any subcommand |
||||
parent_parser = argparse.ArgumentParser(add_help=False) |
||||
parent_parser.add_argument( |
||||
"--data-dir", |
||||
help="Location of Flatisfy data directory." |
||||
) |
||||
parent_parser.add_argument( |
||||
"--config", |
||||
help="Configuration file to use." |
||||
) |
||||
parent_parser.add_argument( |
||||
"--passes", choices=[0, 1, 2], type=int, |
||||
help="Number of passes to do on the filtered data." |
||||
) |
||||
parent_parser.add_argument( |
||||
"--max-entries", type=int, |
||||
help="Maximum number of entries to fetch." |
||||
) |
||||
parent_parser.add_argument( |
||||
"-v", "--verbose", action="store_true", |
||||
help="Verbose logging output." |
||||
) |
||||
parent_parser.add_argument( |
||||
"-vv", action="store_true", |
||||
help="Debug logging output." |
||||
) |
||||
|
||||
# Subcommands |
||||
subparsers = parser.add_subparsers( |
||||
dest="cmd", help="Available subcommands" |
||||
) |
||||
|
||||
# Build data subcommand |
||||
subparsers.add_parser( |
||||
"build-data", parents=[parent_parser], |
||||
help="Build necessary data" |
||||
) |
||||
|
||||
# Init config subcommand |
||||
parser_init_config = subparsers.add_parser( |
||||
"init-config", parents=[parent_parser], |
||||
help="Initialize empty configuration." |
||||
) |
||||
parser_init_config.add_argument( |
||||
"output", nargs="?", help="Output config file. Use '-' for stdout." |
||||
) |
||||
|
||||
# Fetch subcommand parser |
||||
subparsers.add_parser("fetch", parents=[parent_parser], |
||||
help="Fetch housings posts") |
||||
|
||||
# Filter subcommand parser |
||||
parser_filter = subparsers.add_parser("filter", parents=[parent_parser], |
||||
help=( |
||||
"Filter housings posts. No " |
||||
"fetching of additional infos " |
||||
"is done.")) |
||||
parser_filter.add_argument( |
||||
"input", |
||||
help="JSON dump of the housings post to filter." |
||||
) |
||||
|
||||
# Import subcommand parser |
||||
subparsers.add_parser("import", parents=[parent_parser], |
||||
help="Import housing posts in database.") |
||||
|
||||
# Serve subcommand parser |
||||
parser_serve = subparsers.add_parser("serve", parents=[parent_parser], |
||||
help="Serve the web app.") |
||||
parser_serve.add_argument("--port", type=int, help="Port to bind to.") |
||||
parser_serve.add_argument("--host", help="Host to listen on.") |
||||
|
||||
return parser.parse_args(argv) |
||||
|
||||
|
||||
def main(): |
||||
""" |
||||
Main module code. |
||||
""" |
||||
# Parse arguments |
||||
args = parse_args() |
||||
|
||||
# Set logger |
||||
if args.vv: |
||||
logging.basicConfig(level=logging.DEBUG) |
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG) |
||||
elif args.verbose: |
||||
logging.basicConfig(level=logging.INFO) |
||||
# sqlalchemy INFO level is way too loud, just stick with WARNING |
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING) |
||||
else: |
||||
logging.basicConfig(level=logging.WARNING) |
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING) |
||||
|
||||
# Init-config command |
||||
if args.cmd == "init-config": |
||||
flatisfy.config.init_config(args.output) |
||||
sys.exit(0) |
||||
else: |
||||
# Load config |
||||
config = flatisfy.config.load_config(args) |
||||
if config is None: |
||||
LOGGER.error("Invalid configuration. Exiting. " |
||||
"Run init-config before if this is the first time " |
||||
"you run Flatisfy.") |
||||
sys.exit(1) |
||||
|
||||
# Build data files |
||||
try: |
||||
if args.cmd == "build-data": |
||||
data.preprocess_data(config, force=True) |
||||
sys.exit(0) |
||||
else: |
||||
data.preprocess_data(config) |
||||
except flatisfy.exceptions.DataBuildError: |
||||
sys.exit(1) |
||||
|
||||
# Fetch command |
||||
if args.cmd == "fetch": |
||||
# Fetch and filter flats list |
||||
flats_list, _ = cmds.fetch_and_filter(config) |
||||
# Sort by cost |
||||
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost") |
||||
|
||||
print( |
||||
tools.pretty_json(flats_list) |
||||
) |
||||
# Filter command |
||||
elif args.cmd == "filter": |
||||
# Load and filter flats list |
||||
flats_list = cmds.load_and_filter(args.input, config) |
||||
# Sort by cost |
||||
flats_list = tools.sort_list_of_dicts_by(flats_list, "cost") |
||||
|
||||
print( |
||||
tools.pretty_json(flats_list) |
||||
) |
||||
# Import command |
||||
elif args.cmd == "import": |
||||
cmds.import_and_filter(config) |
||||
# Serve command |
||||
elif args.cmd == "serve": |
||||
cmds.serve(config) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
try: |
||||
main() |
||||
except KeyboardInterrupt: |
||||
pass |
@ -0,0 +1,110 @@
@@ -0,0 +1,110 @@
|
||||
# coding: utf-8 |
||||
""" |
||||
Main commands available for flatisfy. |
||||
""" |
||||
from __future__ import absolute_import, print_function, unicode_literals |
||||
|
||||
import flatisfy.filters |
||||
from flatisfy import database |
||||
from flatisfy.models import flat as flat_model |
||||
from flatisfy import fetch |
||||
from flatisfy import tools |
||||
from flatisfy.web import app as web_app |
||||
|
||||
|
||||
def fetch_and_filter(config): |
||||
""" |
||||
Fetch the available flats list. Then, filter it according to criteria. |
||||
|
||||
:param config: A config dict. |
||||
:return: A tuple of the list of all matching flats and the list of ignored |
||||
flats. |
||||
""" |
||||
# TODO: Reduce load on housings listing websites |
||||
# Fetch flats list with flatboobs |
||||
flats_list = fetch.fetch_flats_list(config) |
||||
|
||||
# Do a first pass with the available infos to try to remove as much |
||||
# unwanted postings as possible |
||||
if config["passes"] > 0: |
||||
flats_list, ignored_flats = flatisfy.filters.first_pass(flats_list, |
||||
config) |
||||
|
||||
# Do a second pass to consolidate all the infos we found and make use of |
||||
# additional infos |
||||
if config["passes"] > 1: |
||||
# Load additional infos |
||||
for flat in flats_list: |
||||
details = fetch.fetch_details(flat["id"]) |
||||
flat = tools.merge_dicts(flat, details) |
||||
|
||||
flats_list, extra_ignored_flats = flatisfy.filters.second_pass( |
||||
flats_list, config |
||||
) |
||||
ignored_flats.extend(extra_ignored_flats) |
||||
|
||||
return flats_list, ignored_flats |
||||
|
||||
|
||||
def load_and_filter(housing_file, config): |
||||
""" |
||||
Load the dumped flats list. Then, filter it according to criteria. |
||||
|
||||
:param housing_file: The JSON file to load flats from. |
||||
:param config: A config dict. |
||||
:return: A tuple of the list of all matching flats and the list of ignored |
||||
flats. |
||||
""" |
||||
# Load flats list |
||||
flats_list = fetch.load_flats_list(housing_file) |
||||
|
||||
# Do a first pass with the available infos to try to remove as much |
||||
# unwanted postings as possible |
||||
if config["passes"] > 0: |
||||
flats_list, ignored_flats = flatisfy.filters.first_pass(flats_list, |
||||
config) |
||||
|
||||
# Do a second pass to consolidate all the infos we found |
||||
if config["passes"] > 1: |
||||
flats_list, extra_ignored_flats = flatisfy.filters.second_pass( |
||||
flats_list, config |
||||
) |
||||
ignored_flats.extend(extra_ignored_flats) |
||||
|
||||
return flats_list, ignored_flats |
||||
|
||||
|
||||
def import_and_filter(config): |
||||
""" |
||||
Fetch the available flats list. Then, filter it according to criteria. |
||||
Finally, store it in the database. |
||||
|
||||
:param config: A config dict. |
||||
:return: ``None``. |
||||
""" |
||||
# Fetch and filter flats list |
||||
flats_list, purged_list = fetch_and_filter(config) |
||||
# Create database connection |
||||
get_session = database.init_db(config["database"]) |
||||
|
||||
with get_session() as session: |
||||
for flat_dict in flats_list: |
||||
flat = flat_model.Flat.from_dict(flat_dict) |
||||
session.merge(flat) |
||||
|
||||
for flat_dict in purged_list: |
||||
flat = flat_model.Flat.from_dict(flat_dict) |
||||
flat.status = flat_model.FlatStatus.purged |
||||
session.merge(flat) |
||||
|
||||
|
||||
def serve(config): |
||||
""" |
||||
Serve the web app. |
||||
|
||||
:param config: A config dict. |
||||
:return: ``None``, long-running process. |
||||
""" |
||||
app = web_app.get_app(config) |
||||
# TODO: Make Bottle use logging module |
||||
app.run(host=config["host"], port=config["port"]) |
@ -0,0 +1,208 @@
@@ -0,0 +1,208 @@
|
||||
# coding: utf-8 |
||||
""" |
||||
This module handles the configuration management for Flatisfy. |
||||
|
||||
It loads the default configuration, then overloads it with the provided config |
||||
file and then overloads it with command-line options. |
||||
""" |
||||
from __future__ import absolute_import, print_function, unicode_literals |
||||
from builtins import str |
||||
|
||||
import json |
||||
import logging |
||||
import os |
||||
import sys |
||||
import traceback |
||||
|
||||
import appdirs |
||||
|
||||
from flatisfy import tools |
||||
|
||||
|
||||
# Default configuration |
||||
DEFAULT_CONFIG = { |
||||
# Flatboob queries to fetch |
||||
"queries": [], |
||||
# Constraints to match |
||||
"constraints": { |
||||
"postal_codes": [], # List of postal codes |
||||
"area": (None, None), # (min, max) in m^2 |
||||
"cost": (None, None), # (min, max) in currency unit |
||||
"rooms": (None, None), # (min, max) |
||||
"bedrooms": (None, None), # (min, max) |
||||
"time_to": {} # Dict mapping names to {"gps": [lat, lng], |
||||
# "time": (min, max) } |
||||
# Time is in seconds |
||||
}, |
||||
# Navitia API key |
||||
"navitia_api_key": None, |
||||
# Number of filtering passes to run |
||||
"passes": 2, |
||||
# Maximum number of entries to fetch |
||||
"max_entries": None, |
||||
# Directory in wich data will be put. ``None`` is XDG default location. |
||||
"data_directory": None, |
||||
# SQLAlchemy URI to the database to use |
||||
"database": None, |
||||
# Web app port |
||||
"port": 8080, |
||||
# Web app host to listen on |
||||
"host": "127.0.0.1" |
||||
} |
||||
|
||||
LOGGER = logging.getLogger(__name__) |
||||
|
||||
|
||||
def validate_config(config): |
||||
""" |
||||
Check that the config passed as argument is a valid configuration. |
||||
|
||||
:param config: A config dictionary to fetch. |
||||
:return: ``True`` if the configuration is valid, ``False`` otherwise. |
||||
""" |
||||
def _check_constraints_bounds(bounds): |
||||
""" |
||||
Check the bounds for numeric constraints. |
||||
""" |
||||
assert len(bounds) == 2 |
||||
assert all( |
||||
x is None or |
||||
( |
||||
(isinstance(x, int) or isinstance(x, float)) and |
||||
x >= 0 |
||||
) |
||||
for x in bounds |
||||
) |
||||
if bounds[0] is not None and bounds[1] is not None: |
||||
assert bounds[1] > bounds[0] |
||||
|
||||
try: |
||||
# Note: The traceback fetching code only handle single line asserts. |
||||
# Then, we disable line-too-long pylint check and E501 flake8 checks |
||||
# and use long lines whenever needed, in order to have the full assert |
||||
# message in the log output. |
||||
# pylint: disable=line-too-long |
||||
assert "postal_codes" in config["constraints"] |
||||
assert len(config["constraints"]["postal_codes"]) > 0 |
||||
|
||||
assert "area" in config["constraints"] |
||||
_check_constraints_bounds(config["constraints"]["area"]) |
||||
|
||||
assert "cost" in config["constraints"] |
||||
_check_constraints_bounds(config["constraints"]["cost"]) |
||||
|
||||
assert "rooms" in config["constraints"] |
||||
_check_constraints_bounds(config["constraints"]["rooms"]) |
||||
|
||||
assert "bedrooms" in config["constraints"] |
||||
_check_constraints_bounds(config["constraints"]["bedrooms"]) |
||||
|
||||
assert "time_to" in config["constraints"] |
||||
assert isinstance(config["constraints"]["time_to"], dict) |
||||
for name, item in config["constraints"]["time_to"].items(): |
||||
assert isinstance(name, str) |
||||
assert "gps" in item |
||||
assert isinstance(item["gps"], list) |
||||
assert len(item["gps"]) == 2 |
||||
assert "time" in item |
||||
_check_constraints_bounds(item["time"]) |
||||
|
||||
assert config["passes"] in [0, 1, 2] |
||||
assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0) # noqa: E501 |
||||
|
||||
assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501 |
||||
|
||||
assert config["database"] is None or isinstance(config["database"], str) # noqa: E501 |
||||
|
||||
assert isinstance(config["port"], int) |
||||
assert isinstance(config["host"], str) |
||||
|
||||
return True |
||||
except (AssertionError, KeyError): |
||||
_, _, exc_traceback = sys.exc_info() |
||||
return traceback.extract_tb(exc_traceback)[-1][-1] |
||||
|
||||
|
||||
def load_config(args=None): |
||||
""" |
||||
Load the configuration from file. |
||||
|
||||
:param args: An argparse args structure. |
||||
:return: The loaded config dict. |
||||
""" |
||||
LOGGER.info("Initializing configuration...") |
||||
# Default configuration |
||||
config_data = DEFAULT_CONFIG.copy() |
||||
|
||||
# Load config from specified JSON |
||||
if args and getattr(args, "config", None): |
||||
LOGGER.debug("Loading configuration from %s.", args.config) |
||||
try: |
||||
with open(args.config, "r") as fh: |
||||
config_data.update(json.load(fh)) |
||||
except (IOError, ValueError): |
||||
LOGGER.error( |
||||
"Unable to load configuration from file, " |
||||
"using default configuration." |
||||
) |
||||
|
||||
# Overload config with arguments |
||||
if args and getattr(args, "passes", None) is not None: |
||||
LOGGER.debug( |
||||
"Overloading number of passes from CLI arguments: %d.", |
||||
args.passes |
||||
) |
||||
config_data["passes"] = args.passes |
||||
if args and getattr(args, "max_entries", None) is not None: |
||||
LOGGER.debug( |
||||
"Overloading maximum number of entries from CLI arguments: %d.", |
||||
args.max_entries |
||||
) |
||||
config_data["max_entries"] = args.max_entries |
||||
if args and getattr(args, "port", None) is not None: |
||||
LOGGER.debug("Overloading web app port: %d.", args.port) |
||||
config_data["port"] = args.port |
||||
if args and getattr(args, "host", None) is not None: |
||||
LOGGER.debug("Overloading web app host: %s.", args.host) |
||||
config_data["host"] = str(args.host) |
||||
|
||||
# Handle data_directory option |
||||
if args and getattr(args, "data_dir", None) is not None: |
||||
LOGGER.debug("Overloading data directory from CLI arguments.") |
||||
config_data["data_directory"] = args.data_dir |
||||
elif config_data["data_directory"] is None: |
||||
config_data["data_directory"] = appdirs.user_data_dir( |
||||
"flatisfy", |
||||
"flatisfy" |
||||
) |
||||
LOGGER.debug("Using default XDG data directory: %s.", |
||||
config_data["data_directory"]) |
||||
|
||||
if config_data["database"] is None: |
||||
config_data["database"] = "sqlite:///" + os.path.join( |
||||
config_data["data_directory"], |
||||
"flatisfy.db" |
||||
) |
||||
|
||||
config_validation = validate_config(config_data) |
||||
if config_validation is True: |
||||
LOGGER.info("Config has been fully initialized.") |
||||
return config_data |
||||
else: |
||||
LOGGER.error("Error in configuration: %s.", config_validation) |
||||
return None |
||||
|
||||
|
||||
def init_config(output=None): |
||||
""" |
||||
Initialize an empty configuration file. |
||||
|
||||
:param output: File to output content to. Defaults to ``stdin``. |
||||
""" |
||||
config_data = DEFAULT_CONFIG.copy() |
||||
|
||||
if output and output != "-": |
||||
with open(output, "w") as fh: |
||||
fh.write(tools.pretty_json(config_data)) |
||||
else: |
||||
print(tools.pretty_json(config_data)) |
@ -0,0 +1,163 @@
@@ -0,0 +1,163 @@
|
||||
# coding : utf-8 |
||||
""" |
||||
This module contains all the code related to building necessary data files from |
||||
the source opendata files. |
||||
""" |
||||
from __future__ import absolute_import, print_function, unicode_literals |
||||
|
||||
import collections |
||||
import json |
||||
import logging |
||||
import os |
||||
|
||||
import flatisfy.exceptions |
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__) |
||||
MODULE_DIR = os.path.dirname(os.path.realpath(__file__)) |
||||
|
||||
|
||||
def _preprocess_ratp(output_dir): |
||||
""" |
||||
Build RATP file from the RATP data. |
||||
|
||||
:param output_dir: Directory in which the output file should reside. |
||||
:return: ``True`` on successful build, ``False`` otherwise. |
||||
""" |
||||
ratp_data_raw = [] |
||||
# Load opendata file |
||||
try: |
||||
with open(os.path.join(MODULE_DIR, "data_files/ratp.json"), "r") as fh: |
||||
ratp_data_raw = json.load(fh) |
||||
except (IOError, ValueError): |
||||
LOGGER.error("Invalid raw RATP opendata file.") |
||||
return False |
||||
|
||||
# Process it |
||||
ratp_data = collections.defaultdict(list) |
||||
for item in ratp_data_raw: |
||||
stop_name = item["fields"]["stop_name"].lower() |
||||
ratp_data[stop_name].append(item["fields"]["coord"]) |
||||
|
||||
# Output it |
||||
with open(os.path.join(output_dir, "ratp.json"), "w") as fh: |
||||
json.dump(ratp_data, fh) |
||||
|
||||
return True |
||||
|
||||
|
||||
def _preprocess_laposte(output_dir): |
||||
""" |
||||
Build JSON files from the postal codes data. |
||||
|
||||
:param output_dir: Directory in which the output file should reside. |
||||
:return: ``True`` on successful build, ``False`` otherwise. |
||||
""" |
||||
raw_laposte_data = [] |
||||
# Load opendata file |
||||
try: |
||||
with open( |
||||
os.path.join(MODULE_DIR, "data_files/laposte.json"), "r" |
||||
) as fh: |
||||
raw_laposte_data = json.load(fh) |
||||
except (IOError, ValueError): |
||||
LOGGER.error("Invalid raw LaPoste opendata file.") |
||||
return False |
||||
|
||||
# Build postal codes to other infos file |
||||
postal_codes_data = {} |
||||
for item in raw_laposte_data: |
||||
try: |
||||
postal_codes_data[item["fields"]["code_postal"]] = { |
||||
"gps": item["fields"]["coordonnees_gps"], |
||||
"nom": item["fields"]["nom_de_la_commune"].title() |
||||
} |
||||
except KeyError: |
||||
LOGGER.info("Missing data for postal code %s, skipping it.", |
||||
item["fields"]["code_postal"]) |
||||
with open(os.path.join(output_dir, "postal_codes.json"), "w") as fh: |
||||
json.dump(postal_codes_data, fh) |
||||
|
||||
# Build city name to postal codes and other infos file |
||||
cities_data = {} |
||||
for item in raw_laposte_data: |
||||
try: |
||||
cities_data[item["fields"]["nom_de_la_commune"].title()] = { |
||||
"gps": item["fields"]["coordonnees_gps"], |
||||
"postal_code": item["fields"]["code_postal"] |
||||
} |
||||
except KeyError: |
||||
LOGGER.info("Missing data for city %s, skipping it.", |
||||
item["fields"]["nom_de_la_commune"]) |
||||
with open(os.path.join(output_dir, "cities.json"), "w") as fh: |
||||
json.dump(cities_data, fh) |
||||
|
||||
return True |
||||
|
||||
|
||||
def preprocess_data(config, force=False): |
||||
""" |
||||
Ensures that all the necessary data files have been built from the raw |
||||
opendata files. |
||||
|
||||
:params config: A config dictionary. |
||||
:params force: Whether to force rebuild or not. |
||||
""" |
||||
LOGGER.debug("Data directory is %s.", config["data_directory"]) |
||||
opendata_directory = os.path.join(config["data_directory"], "opendata") |
||||
try: |
||||
LOGGER.info("Ensuring the data directory exists.") |
||||
os.makedirs(opendata_directory) |
||||
LOGGER.debug("Created opendata directory at %s.", opendata_directory) |
||||
except OSError: |
||||
LOGGER.debug("Opendata directory already existed, doing nothing.") |
||||
|
||||
is_built_ratp = os.path.isfile( |
||||
os.path.join(opendata_directory, "ratp.json") |
||||
) |
||||
if not is_built_ratp or force: |
||||
LOGGER.info("Building from RATP data.") |
||||
if not _preprocess_ratp(opendata_directory): |
||||
raise flatisfy.exceptions.DataBuildError("Error with RATP data.") |
||||
|
||||
is_built_laposte = ( |
||||
os.path.isfile(os.path.join(opendata_directory, "cities.json")) and |
||||
os.path.isfile(os.path.join(opendata_directory, "postal_codes.json")) |
||||
) |
||||
if not is_built_laposte or force: |
||||
LOGGER.info("Building from LaPoste data.") |
||||
if not _preprocess_laposte(opendata_directory): |
||||
raise flatisfy.exceptions.DataBuildError( |
||||
"Error with LaPoste data." |
||||
) |
||||
|
||||
|
||||
def load_data(data_type, config): |
||||
""" |
||||
Load a given built data file. |
||||
|
||||
:param data_type: A valid data identifier. |
||||
:param config: A config dictionary. |
||||
:return: The loaded data. ``None`` if the query is incorrect. |
||||
""" |
||||
if data_type not in ["postal_codes", "cities", "ratp"]: |
||||
LOGGER.error("Invalid request. No %s data file.", data_type) |
||||
return None |
||||
|
||||
opendata_directory = os.path.join(config["data_directory"], "opendata") |
||||
datafile_path = os.path.join(opendata_directory, "%s.json" % data_type) |
||||
data = {} |
||||
try: |
||||
with open(datafile_path, "r") as fh: |
||||
data = json.load(fh) |
||||
except IOError: |
||||
LOGGER.error("No such data file: %s.", datafile_path) |
||||
return None |
||||
except ValueError: |
||||
LOGGER.error("Invalid JSON data file: %s.", datafile_path) |
||||
return None |
||||
|
||||
if len(data) == 0: |
||||
LOGGER.warning("Loading empty data for %s.", data_type) |
||||
|
||||
return data |
File diff suppressed because one or more lines are too long
@ -0,0 +1,64 @@
@@ -0,0 +1,64 @@
|
||||
# coding: utf-8 |
||||
""" |
||||
This module contains functions related to the database. |
||||
""" |
||||
from __future__ import absolute_import, print_function, unicode_literals |
||||
|
||||
import sqlite3 |
||||
|
||||
from contextlib import contextmanager |
||||
|
||||
from sqlalchemy import event, create_engine |
||||
from sqlalchemy.engine import Engine |
||||
from sqlalchemy.orm import sessionmaker |
||||
|
||||
import flatisfy.models.flat # noqa: F401 |
||||
from flatisfy.database.base import BASE |
||||
|
||||
|
||||
@event.listens_for(Engine, "connect") |
||||
def set_sqlite_pragma(dbapi_connection, _): |
||||
""" |
||||
Auto enable foreign keys for SQLite. |
||||
""" |
||||
# Play well with other DB backends |
||||
if isinstance(dbapi_connection, sqlite3.Connection): |
||||
cursor = dbapi_connection.cursor() |
||||
cursor.execute("PRAGMA foreign_keys=ON") |
||||
cursor.close() |
||||
|
||||
|
||||
def init_db(database_uri=None): |
||||
""" |
||||
Initialize the database, ensuring tables exist etc. |
||||
|
||||
:param database_uri: An URI describing an engine to use. Defaults to |
||||
in-memory SQLite database. |
||||
:return: A tuple of an SQLAlchemy session maker and the created engine. |
||||
""" |
||||
if database_uri is None: |
||||
database_uri = "sqlite:///:memory:" |
||||
|
||||
engine = create_engine(database_uri) |
||||
BASE.metadata.create_all(engine, checkfirst=True) |
||||
Session = sessionmaker(bind=engine) # pylint: disable=invalid-name |
||||
|
||||
@contextmanager |
||||
def get_session(): |
||||
""" |
||||
Provide a transactional scope around a series of operations. |
||||
|
||||
From [1]. |
||||
[1]: http://docs.sqlalchemy.org/en/latest/orm/session_basics.html#when-do-i-construct-a-session-when-do-i-commit-it-and-when-do-i-close-it. |
||||
""" |
||||
session = Session() |
||||
try: |
||||
yield session |
||||
session.commit() |
||||
except: |
||||
session.rollback() |
||||
raise |
||||
finally: |
||||
session.close() |
||||
|
||||
return get_session |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
# coding: utf-8 |
||||
""" |
||||
This module contains the definition of the declarative SQLAlchemy base. |
||||
""" |
||||
from __future__ import absolute_import, print_function, unicode_literals |
||||
|
||||
from sqlalchemy.ext.declarative import declarative_base |
||||
|
||||
|
||||
BASE = declarative_base() |
@ -0,0 +1,48 @@
@@ -0,0 +1,48 @@
|
||||
# coding: utf-8 |
||||
""" |
||||
This modules implements custom types in SQLAlchemy. |
||||
""" |
||||
from __future__ import absolute_import, print_function, unicode_literals |
||||
|
||||
import json |
||||
|
||||
import sqlalchemy.types as types |
||||
|
||||
|
||||
class StringyJSON(types.TypeDecorator): |
||||
""" |
||||
Stores and retrieves JSON as TEXT for SQLite. |
||||
|
||||
From |
||||
https://avacariu.me/articles/2016/compiling-json-as-text-for-sqlite-with-sqlalchemy. |
||||
|
||||
.. note :: The associated field is immutable. That is, changes to the data |
||||
(typically, changing the value of a dict field) will not trigger an update |
||||
on the SQL side upon ``commit`` as the reference to the object will not |
||||
have been updated. One should force the update by forcing an update of the |
||||
reference (by performing a ``copy`` operation on the dict for instance). |
||||
""" |
||||
|
||||
impl = types.TEXT |
||||
|
||||
def process_bind_param(self, value, dialect): |
||||
""" |
||||
TODO |
||||
""" |
||||
if value is not None: |
||||
value = json.dumps(value) |
||||
return value |
||||
|
||||
def process_result_value(self, value, dialect): |
||||
""" |
||||
TODO |
||||
""" |
||||
if value is not None: |
||||
value = json.loads(value) |
||||
return value |
||||
|
||||
|
||||
# TypeEngine.with_variant says "use StringyJSON instead when |
||||
# connecting to 'sqlite'" |
||||
# pylint: disable=invalid-name |
||||
MagicJSON = types.JSON().with_variant(StringyJSON, 'sqlite') |
@ -0,0 +1,13 @@
@@ -0,0 +1,13 @@
|
||||
# coding : utf-8 |
||||
""" |
||||
This module contains all the exceptions definitions for the Flatisfy-specific |
||||
exceptions. |
||||
""" |
||||
from __future__ import absolute_import, print_function, unicode_literals |
||||
|
||||
|
||||
class DataBuildError(Exception): |
||||
""" |
||||
Error occurring on building a data file. |
||||
""" |
||||
pass |
@ -0,0 +1,76 @@
@@ -0,0 +1,76 @@
|
||||
# coding: utf-8 |
||||
""" |
||||
This module contains all the code related to fetching and loading flats lists. |
||||
""" |
||||
from __future__ import absolute_import, print_function, unicode_literals |
||||
|
||||
import json |
||||
import logging |
||||
import subprocess |
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__) |
||||
|
||||
|
||||
def fetch_flats_list(config): |
||||
""" |
||||
Fetch the available flats using the Flatboob / Weboob config. |
||||
|
||||
:param config: A config dict. |
||||
:return: A list of all available flats. |
||||
""" |
||||
flats_list = [] |
||||
for query in config["queries"]: |
||||
max_entries = config["max_entries"] |
||||
if max_entries is None: |
||||
max_entries = 0 |
||||
|
||||
LOGGER.info("Loading flats from query %s.", query) |
||||
flatboob_output = subprocess.check_output( |
||||
["../weboob/tools/local_run.sh", "../weboob/scripts/flatboob", |
||||
"-n", str(max_entries), "-f", "json", "load", query] |
||||
) |
||||
query_flats_list = json.loads(flatboob_output) |
||||
LOGGER.info("Fetched %d flats.", len(query_flats_list)) |
||||
flats_list.extend(query_flats_list) |
||||
LOGGER.info("Fetched a total of %d flats.", len(flats_list)) |
||||
return flats_list |
||||
|
||||
|
||||
def fetch_details(flat_id): |
||||
""" |
||||
Fetch the additional details for a flat using Flatboob / Weboob. |
||||
|
||||
:param flat_id: ID of the flat to fetch details for. |
||||
:return: A flat dict with all the available data. |
||||
""" |
||||
LOGGER.info("Loading additional details for flat %s.", flat_id) |
||||
flatboob_output = subprocess.check_output( |
||||
["../weboob/tools/local_run.sh", "../weboob/scripts/flatboob", |
||||
"-f", "json", "info", flat_id] |
||||
) |
||||
flat_details = json.loads(flatboob_output) |
||||
LOGGER.info("Fetched details for flat %s.", flat_id) |
||||