Support other transport modes, fix #62

This commit is contained in:
Lucas Verney 2018-09-07 19:28:15 +02:00
parent 9e71b946e3
commit e0e04317ca
6 changed files with 168 additions and 54 deletions

View File

@ -112,7 +112,10 @@ List of configuration options:
means that it will store the database in the default location, in
`data_directory`.
* `navitia_api_key` is an API token for [Navitia](https://www.navitia.io/)
which is required to compute travel times.
which is required to compute travel times for `PUBLIC_TRANSPORT` mode.
* `mapbox_api_key` is an API token for [Mapbox](http://mapbox.com/)
which is required to compute travel times for `WALK`, `BIKE` and `CAR`
modes.
* `modules_path` is the path to the WebOOB modules. It can be `null` if you
want WebOOB to use the locally installed [WebOOB
modules](https://git.weboob.org/weboob/modules), which you should install
@ -136,6 +139,10 @@ List of configuration options:
`1500`). This is useful to avoid false-positive.
* `duplicate_threshold` is the minimum score in the deep duplicate detection
step to consider two flats as being duplicates (defaults to `15`).
* `serve_images_locally` lets you download all the images from the housings
websites when importing the posts. Then, all your Flatisfy works standalone,
serving the local copy of the images instead of fetching the images from the
remote websites every time you look through the fetched housing posts.
_Note:_ In production, you can either use the `serve` command with a reliable
webserver instead of the default Bottle webserver (specifying a `webserver`
@ -160,14 +167,14 @@ under the `constraints` key. The available constraints are:
* `postal_codes` (as strings) is a list of postal codes. You should include any postal code
you want, and especially the postal codes close to the precise location you
want.
* `time_to` is a dictionary of places to compute travel time to them (using
public transport, relies on [Navitia API](http://navitia.io/)).
* `time_to` is a dictionary of places to compute travel time to them.
Typically,
```
"time_to": {
"foobar": {
"gps": [LAT, LNG],
"mode": A transport mode,
"time": [min, max]
}
}
@ -176,7 +183,10 @@ under the `constraints` key. The available constraints are:
means that the housings must be between the `min` and `max` bounds (possibly
`null`) from the place identified by the GPS coordinates `LAT` and `LNG`
(latitude and longitude), and we call this place `foobar` in human-readable
form. Beware that `time` constraints are in **seconds**.
form. `mode` should be either `PUBLIC_TRANSPORT`, `WALK`, `BIKE` or `CAR`.
Beware that `time` constraints are in **seconds**. You should take
some margin as the travel time computation is done with found nearby public
transport stations, which is only a rough estimate of the flat position.
* `minimum_nb_photos` lets you filter out posts with less than this number of
photos.
* `description_should_contain` lets you specify a list of terms that should

View File

@ -19,6 +19,7 @@ from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
from flatisfy import data
from flatisfy import tools
from flatisfy.constants import TimeToModes
from flatisfy.models.postal_code import PostalCode
@ -38,7 +39,8 @@ DEFAULT_CONFIG = {
"minimum_nb_photos": None, # min number of photos
"description_should_contain": [], # list of terms
"time_to": {} # Dict mapping names to {"gps": [lat, lng],
# "time": (min, max) }
# "time": (min, max),
# "mode": Valid mode }
# Time is in seconds
}
},
@ -56,6 +58,8 @@ DEFAULT_CONFIG = {
"serve_images_locally": True,
# Navitia API key
"navitia_api_key": None,
# Mapbox API key
"mapbox_api_key": None,
# Number of filtering passes to run
"passes": 3,
# Maximum number of entries to fetch
@ -151,6 +155,10 @@ def validate_config(config, check_with_data):
assert isinstance(config["duplicate_threshold"], int)
assert isinstance(config["duplicate_image_hash_threshold"], int)
# API keys
assert config["navitia_api_key"] is None or isinstance(config["navitia_api_key"], str) # noqa: E501
assert config["mapbox_api_key"] is None or isinstance(config["mapbox_api_key"], str) # noqa: E501
# Ensure constraints are ok
assert config["constraints"]
for constraint in config["constraints"].values():
@ -209,6 +217,8 @@ def validate_config(config, check_with_data):
assert len(item["gps"]) == 2
assert "time" in item
_check_constraints_bounds(item["time"])
if "mode" in item:
TimeToModes[item["mode"]]
return True
except (AssertionError, KeyError):

View File

@ -4,6 +4,8 @@ Constants used across the app.
"""
from __future__ import absolute_import, print_function, unicode_literals
from enum import Enum
# Some backends give more infos than others. Here is the precedence we want to
# use. First is most important one, last is the one that will always be
# considered as less trustable if two backends have similar info about a
@ -16,3 +18,10 @@ BACKENDS_BY_PRECEDENCE = [
"explorimmo",
"logicimmo"
]
class TimeToModes(Enum):
PUBLIC_TRANSPORT = -1
WALK = 1
BIKE = 2
CAR = 3

View File

@ -12,6 +12,7 @@ import re
from flatisfy import data
from flatisfy import tools
from flatisfy.constants import TimeToModes
from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport
@ -417,12 +418,14 @@ def compute_travel_times(flats_list, constraint, config):
# For each place, loop over the stations close to the flat, and find
# the minimum travel time.
for place_name, place in constraint["time_to"].items():
mode = place.get("mode", "PUBLIC_TRANSPORT")
time_to_place_dict = None
for station in flat["flatisfy"]["matched_stations"]:
# Time from station is a dict with time and route
time_from_station_dict = tools.get_travel_time_between(
station["gps"],
place["gps"],
TimeToModes[mode],
config
)
if (
@ -436,8 +439,8 @@ def compute_travel_times(flats_list, constraint, config):
if time_to_place_dict:
LOGGER.info(
"Travel time between %s and flat %s is %ds.",
place_name, flat["id"], time_to_place_dict["time"]
"Travel time between %s and flat %s by %s is %ds.",
place_name, flat["id"], mode, time_to_place_dict["time"]
)
flat["flatisfy"]["time_to"][place_name] = time_to_place_dict
return flats_list

View File

@ -15,9 +15,12 @@ import math
import re
import time
import mapbox
import requests
import unidecode
from flatisfy.constants import TimeToModes
LOGGER = logging.getLogger(__name__)
@ -25,6 +28,23 @@ LOGGER = logging.getLogger(__name__)
NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys"
def next_weekday(d, weekday):
"""
Find datetime object for next given weekday.
From
https://stackoverflow.com/questions/6558535/find-the-date-for-the-first-monday-after-a-given-a-date.
:param d: Datetime to search from.
:param weekday: Weekday (0 for Monday, etc)
:returns: The datetime object for the next given weekday.
"""
days_ahead = weekday - d.weekday()
if days_ahead <= 0: # Target day already happened this week
days_ahead += 7
return d + datetime.timedelta(days_ahead)
def convert_arabic_to_roman(arabic):
"""
Convert an arabic literal to a roman one. Limits to 39, which is a rough
@ -322,7 +342,7 @@ def merge_dicts(*args):
return merge_dicts(merged_flat, *args[2:])
def get_travel_time_between(latlng_from, latlng_to, config):
def get_travel_time_between(latlng_from, latlng_to, mode, config):
"""
Query the Navitia API to get the travel time between two points identified
by their latitude and longitude.
@ -330,6 +350,7 @@ def get_travel_time_between(latlng_from, latlng_to, config):
:param latlng_from: A tuple of (latitude, longitude) for the starting
point.
:param latlng_to: A tuple of (latitude, longitude) for the destination.
:param mode: A TimeToMode enum value for the mode of transportation to use.
:return: A dict of the travel time in seconds and sections of the journey
with GeoJSON paths. Returns ``None`` if it could not fetch it.
@ -338,58 +359,118 @@ def get_travel_time_between(latlng_from, latlng_to, config):
Uses the Navitia API. Requires a ``navitia_api_key`` field to be
filled-in in the ``config``.
"""
sections = []
travel_time = None
# Check that Navitia API key is available
if config["navitia_api_key"]:
payload = {
"from": "%s;%s" % (latlng_from[1], latlng_from[0]),
"to": "%s;%s" % (latlng_to[1], latlng_to[0]),
"datetime": datetime.datetime.now().isoformat(),
"count": 1
}
try:
# Do the query to Navitia API
req = requests.get(
NAVITIA_ENDPOINT, params=payload,
auth=(config["navitia_api_key"], "")
if mode == TimeToModes.PUBLIC_TRANSPORT:
# Check that Navitia API key is available
if config["navitia_api_key"]:
# Search route for next Monday at 8am to avoid looking for a route
# in the middle of the night if the fetch is done by night.
date_from = next_weekday(datetime.datetime.now(), 0).replace(
hour=8,
minute=0,
)
req.raise_for_status()
payload = {
"from": "%s;%s" % (latlng_from[1], latlng_from[0]),
"to": "%s;%s" % (latlng_to[1], latlng_to[0]),
"datetime": date_from.isoformat(),
"count": 1
}
try:
# Do the query to Navitia API
req = requests.get(
NAVITIA_ENDPOINT, params=payload,
auth=(config["navitia_api_key"], "")
)
req.raise_for_status()
journeys = req.json()["journeys"][0]
travel_time = journeys["durations"]["total"]
sections = []
for section in journeys["sections"]:
if section["type"] == "public_transport":
# Public transport
sections.append({
"geojson": section["geojson"],
"color": (
section["display_informations"].get("color", None)
)
})
elif section["type"] == "street_network":
# Walking
sections.append({
"geojson": section["geojson"],
"color": None
})
else:
# Skip anything else
continue
except (requests.exceptions.RequestException,
ValueError, IndexError, KeyError) as exc:
# Ignore any possible exception
journeys = req.json()["journeys"][0]
travel_time = journeys["durations"]["total"]
for section in journeys["sections"]:
if section["type"] == "public_transport":
# Public transport
sections.append({
"geojson": section["geojson"],
"color": (
section["display_informations"].get("color", None)
)
})
elif section["type"] == "street_network":
# Walking
sections.append({
"geojson": section["geojson"],
"color": None
})
else:
# Skip anything else
continue
except (requests.exceptions.RequestException,
ValueError, IndexError, KeyError) as exc:
# Ignore any possible exception
LOGGER.warning(
"An exception occurred during travel time lookup on "
"Navitia: %s.",
str(exc)
)
else:
LOGGER.warning(
"An exception occurred during travel time lookup on "
"Navitia: %s.",
str(exc)
"No API key available for travel time lookup. Please provide "
"a Navitia API key. Skipping travel time lookup."
)
else:
LOGGER.warning(
"No API key available for travel time lookup. Please provide "
"a Navitia API key. Skipping travel time lookup."
)
elif mode in [TimeToModes.WALK, TimeToModes.BIKE, TimeToModes.CAR]:
MAPBOX_MODES = {
TimeToModes.WALK: 'mapbox/walking',
TimeToModes.BIKE: 'mapbox/cycling',
TimeToModes.CAR: 'mapbox/driving'
}
# Check that Mapbox API key is available
if config["mapbox_api_key"]:
try:
service = mapbox.Directions(
access_token=config['mapbox_api_key']
)
origin = {
'type': 'Feature',
'properties': {'name': 'Start'},
'geometry': {
'type': 'Point',
'coordinates': [latlng_from[1], latlng_from[0]]}}
destination = {
'type': 'Feature',
'properties': {'name': 'End'},
'geometry': {
'type': 'Point',
'coordinates': [latlng_to[1], latlng_to[0]]}}
response = service.directions(
[origin, destination], MAPBOX_MODES[mode]
)
response.raise_for_status()
route = response.geojson()['features'][0]
# Fix longitude/latitude inversion in geojson output
geometry = route['geometry']
geometry['coordinates'] = [
(x[1], x[0]) for x in geometry['coordinates']
]
sections = [{
"geojson": geometry,
"color": "000"
}]
travel_time = route['properties']['duration']
except (requests.exceptions.RequestException,
IndexError, KeyError) as exc:
# Ignore any possible exception
LOGGER.warning(
"An exception occurred during travel time lookup on "
"Mapbox: %s.",
str(exc)
)
else:
LOGGER.warning(
"No API key available for travel time lookup. Please provide "
"a Mapbox API key. Skipping travel time lookup."
)
if travel_time:
return {
"time": travel_time,

View File

@ -8,6 +8,7 @@ enum34; python_version < '3.4'
functools32; python_version < '3.2.3'
future
imagehash
mapbox
pillow
requests
requests_mock