Support other transport modes, fix #62

This commit is contained in:
Lucas Verney 2018-09-07 19:28:15 +02:00
parent 9e71b946e3
commit e0e04317ca
6 changed files with 168 additions and 54 deletions

View File

@ -112,7 +112,10 @@ List of configuration options:
means that it will store the database in the default location, in means that it will store the database in the default location, in
`data_directory`. `data_directory`.
* `navitia_api_key` is an API token for [Navitia](https://www.navitia.io/) * `navitia_api_key` is an API token for [Navitia](https://www.navitia.io/)
which is required to compute travel times. which is required to compute travel times for `PUBLIC_TRANSPORT` mode.
* `mapbox_api_key` is an API token for [Mapbox](http://mapbox.com/)
which is required to compute travel times for `WALK`, `BIKE` and `CAR`
modes.
* `modules_path` is the path to the WebOOB modules. It can be `null` if you * `modules_path` is the path to the WebOOB modules. It can be `null` if you
want WebOOB to use the locally installed [WebOOB want WebOOB to use the locally installed [WebOOB
modules](https://git.weboob.org/weboob/modules), which you should install modules](https://git.weboob.org/weboob/modules), which you should install
@ -136,6 +139,10 @@ List of configuration options:
`1500`). This is useful to avoid false-positive. `1500`). This is useful to avoid false-positive.
* `duplicate_threshold` is the minimum score in the deep duplicate detection * `duplicate_threshold` is the minimum score in the deep duplicate detection
step to consider two flats as being duplicates (defaults to `15`). step to consider two flats as being duplicates (defaults to `15`).
* `serve_images_locally` lets you download all the images from the housings
websites when importing the posts. Then, all your Flatisfy works standalone,
serving the local copy of the images instead of fetching the images from the
remote websites every time you look through the fetched housing posts.
_Note:_ In production, you can either use the `serve` command with a reliable _Note:_ In production, you can either use the `serve` command with a reliable
webserver instead of the default Bottle webserver (specifying a `webserver` webserver instead of the default Bottle webserver (specifying a `webserver`
@ -160,14 +167,14 @@ under the `constraints` key. The available constraints are:
* `postal_codes` (as strings) is a list of postal codes. You should include any postal code * `postal_codes` (as strings) is a list of postal codes. You should include any postal code
you want, and especially the postal codes close to the precise location you you want, and especially the postal codes close to the precise location you
want. want.
* `time_to` is a dictionary of places to compute travel time to them (using * `time_to` is a dictionary of places to compute travel time to them.
public transport, relies on [Navitia API](http://navitia.io/)).
Typically, Typically,
``` ```
"time_to": { "time_to": {
"foobar": { "foobar": {
"gps": [LAT, LNG], "gps": [LAT, LNG],
"mode": A transport mode,
"time": [min, max] "time": [min, max]
} }
} }
@ -176,7 +183,10 @@ under the `constraints` key. The available constraints are:
means that the housings must be between the `min` and `max` bounds (possibly means that the housings must be between the `min` and `max` bounds (possibly
`null`) from the place identified by the GPS coordinates `LAT` and `LNG` `null`) from the place identified by the GPS coordinates `LAT` and `LNG`
(latitude and longitude), and we call this place `foobar` in human-readable (latitude and longitude), and we call this place `foobar` in human-readable
form. Beware that `time` constraints are in **seconds**. form. `mode` should be either `PUBLIC_TRANSPORT`, `WALK`, `BIKE` or `CAR`.
Beware that `time` constraints are in **seconds**. You should take
some margin as the travel time computation is done with found nearby public
transport stations, which is only a rough estimate of the flat position.
* `minimum_nb_photos` lets you filter out posts with less than this number of * `minimum_nb_photos` lets you filter out posts with less than this number of
photos. photos.
* `description_should_contain` lets you specify a list of terms that should * `description_should_contain` lets you specify a list of terms that should

View File

@ -19,6 +19,7 @@ from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
from flatisfy import data from flatisfy import data
from flatisfy import tools from flatisfy import tools
from flatisfy.constants import TimeToModes
from flatisfy.models.postal_code import PostalCode from flatisfy.models.postal_code import PostalCode
@ -38,7 +39,8 @@ DEFAULT_CONFIG = {
"minimum_nb_photos": None, # min number of photos "minimum_nb_photos": None, # min number of photos
"description_should_contain": [], # list of terms "description_should_contain": [], # list of terms
"time_to": {} # Dict mapping names to {"gps": [lat, lng], "time_to": {} # Dict mapping names to {"gps": [lat, lng],
# "time": (min, max) } # "time": (min, max),
# "mode": Valid mode }
# Time is in seconds # Time is in seconds
} }
}, },
@ -56,6 +58,8 @@ DEFAULT_CONFIG = {
"serve_images_locally": True, "serve_images_locally": True,
# Navitia API key # Navitia API key
"navitia_api_key": None, "navitia_api_key": None,
# Mapbox API key
"mapbox_api_key": None,
# Number of filtering passes to run # Number of filtering passes to run
"passes": 3, "passes": 3,
# Maximum number of entries to fetch # Maximum number of entries to fetch
@ -151,6 +155,10 @@ def validate_config(config, check_with_data):
assert isinstance(config["duplicate_threshold"], int) assert isinstance(config["duplicate_threshold"], int)
assert isinstance(config["duplicate_image_hash_threshold"], int) assert isinstance(config["duplicate_image_hash_threshold"], int)
# API keys
assert config["navitia_api_key"] is None or isinstance(config["navitia_api_key"], str) # noqa: E501
assert config["mapbox_api_key"] is None or isinstance(config["mapbox_api_key"], str) # noqa: E501
# Ensure constraints are ok # Ensure constraints are ok
assert config["constraints"] assert config["constraints"]
for constraint in config["constraints"].values(): for constraint in config["constraints"].values():
@ -209,6 +217,8 @@ def validate_config(config, check_with_data):
assert len(item["gps"]) == 2 assert len(item["gps"]) == 2
assert "time" in item assert "time" in item
_check_constraints_bounds(item["time"]) _check_constraints_bounds(item["time"])
if "mode" in item:
TimeToModes[item["mode"]]
return True return True
except (AssertionError, KeyError): except (AssertionError, KeyError):

View File

@ -4,6 +4,8 @@ Constants used across the app.
""" """
from __future__ import absolute_import, print_function, unicode_literals from __future__ import absolute_import, print_function, unicode_literals
from enum import Enum
# Some backends give more infos than others. Here is the precedence we want to # Some backends give more infos than others. Here is the precedence we want to
# use. First is most important one, last is the one that will always be # use. First is most important one, last is the one that will always be
# considered as less trustable if two backends have similar info about a # considered as less trustable if two backends have similar info about a
@ -16,3 +18,10 @@ BACKENDS_BY_PRECEDENCE = [
"explorimmo", "explorimmo",
"logicimmo" "logicimmo"
] ]
class TimeToModes(Enum):
PUBLIC_TRANSPORT = -1
WALK = 1
BIKE = 2
CAR = 3

View File

@ -12,6 +12,7 @@ import re
from flatisfy import data from flatisfy import data
from flatisfy import tools from flatisfy import tools
from flatisfy.constants import TimeToModes
from flatisfy.models.postal_code import PostalCode from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport from flatisfy.models.public_transport import PublicTransport
@ -417,12 +418,14 @@ def compute_travel_times(flats_list, constraint, config):
# For each place, loop over the stations close to the flat, and find # For each place, loop over the stations close to the flat, and find
# the minimum travel time. # the minimum travel time.
for place_name, place in constraint["time_to"].items(): for place_name, place in constraint["time_to"].items():
mode = place.get("mode", "PUBLIC_TRANSPORT")
time_to_place_dict = None time_to_place_dict = None
for station in flat["flatisfy"]["matched_stations"]: for station in flat["flatisfy"]["matched_stations"]:
# Time from station is a dict with time and route # Time from station is a dict with time and route
time_from_station_dict = tools.get_travel_time_between( time_from_station_dict = tools.get_travel_time_between(
station["gps"], station["gps"],
place["gps"], place["gps"],
TimeToModes[mode],
config config
) )
if ( if (
@ -436,8 +439,8 @@ def compute_travel_times(flats_list, constraint, config):
if time_to_place_dict: if time_to_place_dict:
LOGGER.info( LOGGER.info(
"Travel time between %s and flat %s is %ds.", "Travel time between %s and flat %s by %s is %ds.",
place_name, flat["id"], time_to_place_dict["time"] place_name, flat["id"], mode, time_to_place_dict["time"]
) )
flat["flatisfy"]["time_to"][place_name] = time_to_place_dict flat["flatisfy"]["time_to"][place_name] = time_to_place_dict
return flats_list return flats_list

View File

@ -15,9 +15,12 @@ import math
import re import re
import time import time
import mapbox
import requests import requests
import unidecode import unidecode
from flatisfy.constants import TimeToModes
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
@ -25,6 +28,23 @@ LOGGER = logging.getLogger(__name__)
NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys" NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys"
def next_weekday(d, weekday):
"""
Find datetime object for next given weekday.
From
https://stackoverflow.com/questions/6558535/find-the-date-for-the-first-monday-after-a-given-a-date.
:param d: Datetime to search from.
:param weekday: Weekday (0 for Monday, etc)
:returns: The datetime object for the next given weekday.
"""
days_ahead = weekday - d.weekday()
if days_ahead <= 0: # Target day already happened this week
days_ahead += 7
return d + datetime.timedelta(days_ahead)
def convert_arabic_to_roman(arabic): def convert_arabic_to_roman(arabic):
""" """
Convert an arabic literal to a roman one. Limits to 39, which is a rough Convert an arabic literal to a roman one. Limits to 39, which is a rough
@ -322,7 +342,7 @@ def merge_dicts(*args):
return merge_dicts(merged_flat, *args[2:]) return merge_dicts(merged_flat, *args[2:])
def get_travel_time_between(latlng_from, latlng_to, config): def get_travel_time_between(latlng_from, latlng_to, mode, config):
""" """
Query the Navitia API to get the travel time between two points identified Query the Navitia API to get the travel time between two points identified
by their latitude and longitude. by their latitude and longitude.
@ -330,6 +350,7 @@ def get_travel_time_between(latlng_from, latlng_to, config):
:param latlng_from: A tuple of (latitude, longitude) for the starting :param latlng_from: A tuple of (latitude, longitude) for the starting
point. point.
:param latlng_to: A tuple of (latitude, longitude) for the destination. :param latlng_to: A tuple of (latitude, longitude) for the destination.
:param mode: A TimeToMode enum value for the mode of transportation to use.
:return: A dict of the travel time in seconds and sections of the journey :return: A dict of the travel time in seconds and sections of the journey
with GeoJSON paths. Returns ``None`` if it could not fetch it. with GeoJSON paths. Returns ``None`` if it could not fetch it.
@ -338,58 +359,118 @@ def get_travel_time_between(latlng_from, latlng_to, config):
Uses the Navitia API. Requires a ``navitia_api_key`` field to be Uses the Navitia API. Requires a ``navitia_api_key`` field to be
filled-in in the ``config``. filled-in in the ``config``.
""" """
sections = []
travel_time = None travel_time = None
# Check that Navitia API key is available if mode == TimeToModes.PUBLIC_TRANSPORT:
if config["navitia_api_key"]: # Check that Navitia API key is available
payload = { if config["navitia_api_key"]:
"from": "%s;%s" % (latlng_from[1], latlng_from[0]), # Search route for next Monday at 8am to avoid looking for a route
"to": "%s;%s" % (latlng_to[1], latlng_to[0]), # in the middle of the night if the fetch is done by night.
"datetime": datetime.datetime.now().isoformat(), date_from = next_weekday(datetime.datetime.now(), 0).replace(
"count": 1 hour=8,
} minute=0,
try:
# Do the query to Navitia API
req = requests.get(
NAVITIA_ENDPOINT, params=payload,
auth=(config["navitia_api_key"], "")
) )
req.raise_for_status() payload = {
"from": "%s;%s" % (latlng_from[1], latlng_from[0]),
"to": "%s;%s" % (latlng_to[1], latlng_to[0]),
"datetime": date_from.isoformat(),
"count": 1
}
try:
# Do the query to Navitia API
req = requests.get(
NAVITIA_ENDPOINT, params=payload,
auth=(config["navitia_api_key"], "")
)
req.raise_for_status()
journeys = req.json()["journeys"][0] journeys = req.json()["journeys"][0]
travel_time = journeys["durations"]["total"] travel_time = journeys["durations"]["total"]
sections = [] for section in journeys["sections"]:
for section in journeys["sections"]: if section["type"] == "public_transport":
if section["type"] == "public_transport": # Public transport
# Public transport sections.append({
sections.append({ "geojson": section["geojson"],
"geojson": section["geojson"], "color": (
"color": ( section["display_informations"].get("color", None)
section["display_informations"].get("color", None) )
) })
}) elif section["type"] == "street_network":
elif section["type"] == "street_network": # Walking
# Walking sections.append({
sections.append({ "geojson": section["geojson"],
"geojson": section["geojson"], "color": None
"color": None })
}) else:
else: # Skip anything else
# Skip anything else continue
continue except (requests.exceptions.RequestException,
except (requests.exceptions.RequestException, ValueError, IndexError, KeyError) as exc:
ValueError, IndexError, KeyError) as exc: # Ignore any possible exception
# Ignore any possible exception LOGGER.warning(
"An exception occurred during travel time lookup on "
"Navitia: %s.",
str(exc)
)
else:
LOGGER.warning( LOGGER.warning(
"An exception occurred during travel time lookup on " "No API key available for travel time lookup. Please provide "
"Navitia: %s.", "a Navitia API key. Skipping travel time lookup."
str(exc)
) )
else: elif mode in [TimeToModes.WALK, TimeToModes.BIKE, TimeToModes.CAR]:
LOGGER.warning( MAPBOX_MODES = {
"No API key available for travel time lookup. Please provide " TimeToModes.WALK: 'mapbox/walking',
"a Navitia API key. Skipping travel time lookup." TimeToModes.BIKE: 'mapbox/cycling',
) TimeToModes.CAR: 'mapbox/driving'
}
# Check that Mapbox API key is available
if config["mapbox_api_key"]:
try:
service = mapbox.Directions(
access_token=config['mapbox_api_key']
)
origin = {
'type': 'Feature',
'properties': {'name': 'Start'},
'geometry': {
'type': 'Point',
'coordinates': [latlng_from[1], latlng_from[0]]}}
destination = {
'type': 'Feature',
'properties': {'name': 'End'},
'geometry': {
'type': 'Point',
'coordinates': [latlng_to[1], latlng_to[0]]}}
response = service.directions(
[origin, destination], MAPBOX_MODES[mode]
)
response.raise_for_status()
route = response.geojson()['features'][0]
# Fix longitude/latitude inversion in geojson output
geometry = route['geometry']
geometry['coordinates'] = [
(x[1], x[0]) for x in geometry['coordinates']
]
sections = [{
"geojson": geometry,
"color": "000"
}]
travel_time = route['properties']['duration']
except (requests.exceptions.RequestException,
IndexError, KeyError) as exc:
# Ignore any possible exception
LOGGER.warning(
"An exception occurred during travel time lookup on "
"Mapbox: %s.",
str(exc)
)
else:
LOGGER.warning(
"No API key available for travel time lookup. Please provide "
"a Mapbox API key. Skipping travel time lookup."
)
if travel_time: if travel_time:
return { return {
"time": travel_time, "time": travel_time,

View File

@ -8,6 +8,7 @@ enum34; python_version < '3.4'
functools32; python_version < '3.2.3' functools32; python_version < '3.2.3'
future future
imagehash imagehash
mapbox
pillow pillow
requests requests
requests_mock requests_mock