From e0e04317ca124086a12c83909a561597a84d278f Mon Sep 17 00:00:00 2001 From: "Phyks (Lucas Verney)" Date: Fri, 7 Sep 2018 19:28:15 +0200 Subject: [PATCH] Support other transport modes, fix #62 --- doc/0.getting_started.md | 18 +++- flatisfy/config.py | 12 ++- flatisfy/constants.py | 9 ++ flatisfy/filters/metadata.py | 7 +- flatisfy/tools.py | 175 +++++++++++++++++++++++++---------- requirements.txt | 1 + 6 files changed, 168 insertions(+), 54 deletions(-) diff --git a/doc/0.getting_started.md b/doc/0.getting_started.md index 50c3dd5..81b9704 100644 --- a/doc/0.getting_started.md +++ b/doc/0.getting_started.md @@ -112,7 +112,10 @@ List of configuration options: means that it will store the database in the default location, in `data_directory`. * `navitia_api_key` is an API token for [Navitia](https://www.navitia.io/) - which is required to compute travel times. + which is required to compute travel times for `PUBLIC_TRANSPORT` mode. +* `mapbox_api_key` is an API token for [Mapbox](http://mapbox.com/) + which is required to compute travel times for `WALK`, `BIKE` and `CAR` + modes. * `modules_path` is the path to the WebOOB modules. It can be `null` if you want WebOOB to use the locally installed [WebOOB modules](https://git.weboob.org/weboob/modules), which you should install @@ -136,6 +139,10 @@ List of configuration options: `1500`). This is useful to avoid false-positive. * `duplicate_threshold` is the minimum score in the deep duplicate detection step to consider two flats as being duplicates (defaults to `15`). +* `serve_images_locally` lets you download all the images from the housings + websites when importing the posts. Then, all your Flatisfy works standalone, + serving the local copy of the images instead of fetching the images from the + remote websites every time you look through the fetched housing posts. _Note:_ In production, you can either use the `serve` command with a reliable webserver instead of the default Bottle webserver (specifying a `webserver` @@ -160,14 +167,14 @@ under the `constraints` key. The available constraints are: * `postal_codes` (as strings) is a list of postal codes. You should include any postal code you want, and especially the postal codes close to the precise location you want. -* `time_to` is a dictionary of places to compute travel time to them (using - public transport, relies on [Navitia API](http://navitia.io/)). +* `time_to` is a dictionary of places to compute travel time to them. Typically, ``` "time_to": { "foobar": { "gps": [LAT, LNG], + "mode": A transport mode, "time": [min, max] } } @@ -176,7 +183,10 @@ under the `constraints` key. The available constraints are: means that the housings must be between the `min` and `max` bounds (possibly `null`) from the place identified by the GPS coordinates `LAT` and `LNG` (latitude and longitude), and we call this place `foobar` in human-readable - form. Beware that `time` constraints are in **seconds**. + form. `mode` should be either `PUBLIC_TRANSPORT`, `WALK`, `BIKE` or `CAR`. + Beware that `time` constraints are in **seconds**. You should take + some margin as the travel time computation is done with found nearby public + transport stations, which is only a rough estimate of the flat position. * `minimum_nb_photos` lets you filter out posts with less than this number of photos. * `description_should_contain` lets you specify a list of terms that should diff --git a/flatisfy/config.py b/flatisfy/config.py index 802711b..b1f6c34 100644 --- a/flatisfy/config.py +++ b/flatisfy/config.py @@ -19,6 +19,7 @@ from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES from flatisfy import data from flatisfy import tools +from flatisfy.constants import TimeToModes from flatisfy.models.postal_code import PostalCode @@ -38,7 +39,8 @@ DEFAULT_CONFIG = { "minimum_nb_photos": None, # min number of photos "description_should_contain": [], # list of terms "time_to": {} # Dict mapping names to {"gps": [lat, lng], - # "time": (min, max) } + # "time": (min, max), + # "mode": Valid mode } # Time is in seconds } }, @@ -56,6 +58,8 @@ DEFAULT_CONFIG = { "serve_images_locally": True, # Navitia API key "navitia_api_key": None, + # Mapbox API key + "mapbox_api_key": None, # Number of filtering passes to run "passes": 3, # Maximum number of entries to fetch @@ -151,6 +155,10 @@ def validate_config(config, check_with_data): assert isinstance(config["duplicate_threshold"], int) assert isinstance(config["duplicate_image_hash_threshold"], int) + # API keys + assert config["navitia_api_key"] is None or isinstance(config["navitia_api_key"], str) # noqa: E501 + assert config["mapbox_api_key"] is None or isinstance(config["mapbox_api_key"], str) # noqa: E501 + # Ensure constraints are ok assert config["constraints"] for constraint in config["constraints"].values(): @@ -209,6 +217,8 @@ def validate_config(config, check_with_data): assert len(item["gps"]) == 2 assert "time" in item _check_constraints_bounds(item["time"]) + if "mode" in item: + TimeToModes[item["mode"]] return True except (AssertionError, KeyError): diff --git a/flatisfy/constants.py b/flatisfy/constants.py index 43c4f41..056c949 100644 --- a/flatisfy/constants.py +++ b/flatisfy/constants.py @@ -4,6 +4,8 @@ Constants used across the app. """ from __future__ import absolute_import, print_function, unicode_literals +from enum import Enum + # Some backends give more infos than others. Here is the precedence we want to # use. First is most important one, last is the one that will always be # considered as less trustable if two backends have similar info about a @@ -16,3 +18,10 @@ BACKENDS_BY_PRECEDENCE = [ "explorimmo", "logicimmo" ] + + +class TimeToModes(Enum): + PUBLIC_TRANSPORT = -1 + WALK = 1 + BIKE = 2 + CAR = 3 diff --git a/flatisfy/filters/metadata.py b/flatisfy/filters/metadata.py index 5e0b56b..daeecc2 100644 --- a/flatisfy/filters/metadata.py +++ b/flatisfy/filters/metadata.py @@ -12,6 +12,7 @@ import re from flatisfy import data from flatisfy import tools +from flatisfy.constants import TimeToModes from flatisfy.models.postal_code import PostalCode from flatisfy.models.public_transport import PublicTransport @@ -417,12 +418,14 @@ def compute_travel_times(flats_list, constraint, config): # For each place, loop over the stations close to the flat, and find # the minimum travel time. for place_name, place in constraint["time_to"].items(): + mode = place.get("mode", "PUBLIC_TRANSPORT") time_to_place_dict = None for station in flat["flatisfy"]["matched_stations"]: # Time from station is a dict with time and route time_from_station_dict = tools.get_travel_time_between( station["gps"], place["gps"], + TimeToModes[mode], config ) if ( @@ -436,8 +439,8 @@ def compute_travel_times(flats_list, constraint, config): if time_to_place_dict: LOGGER.info( - "Travel time between %s and flat %s is %ds.", - place_name, flat["id"], time_to_place_dict["time"] + "Travel time between %s and flat %s by %s is %ds.", + place_name, flat["id"], mode, time_to_place_dict["time"] ) flat["flatisfy"]["time_to"][place_name] = time_to_place_dict return flats_list diff --git a/flatisfy/tools.py b/flatisfy/tools.py index 3c500cb..53e449c 100644 --- a/flatisfy/tools.py +++ b/flatisfy/tools.py @@ -15,9 +15,12 @@ import math import re import time +import mapbox import requests import unidecode +from flatisfy.constants import TimeToModes + LOGGER = logging.getLogger(__name__) @@ -25,6 +28,23 @@ LOGGER = logging.getLogger(__name__) NAVITIA_ENDPOINT = "https://api.navitia.io/v1/coverage/fr-idf/journeys" +def next_weekday(d, weekday): + """ + Find datetime object for next given weekday. + + From + https://stackoverflow.com/questions/6558535/find-the-date-for-the-first-monday-after-a-given-a-date. + + :param d: Datetime to search from. + :param weekday: Weekday (0 for Monday, etc) + :returns: The datetime object for the next given weekday. + """ + days_ahead = weekday - d.weekday() + if days_ahead <= 0: # Target day already happened this week + days_ahead += 7 + return d + datetime.timedelta(days_ahead) + + def convert_arabic_to_roman(arabic): """ Convert an arabic literal to a roman one. Limits to 39, which is a rough @@ -322,7 +342,7 @@ def merge_dicts(*args): return merge_dicts(merged_flat, *args[2:]) -def get_travel_time_between(latlng_from, latlng_to, config): +def get_travel_time_between(latlng_from, latlng_to, mode, config): """ Query the Navitia API to get the travel time between two points identified by their latitude and longitude. @@ -330,6 +350,7 @@ def get_travel_time_between(latlng_from, latlng_to, config): :param latlng_from: A tuple of (latitude, longitude) for the starting point. :param latlng_to: A tuple of (latitude, longitude) for the destination. + :param mode: A TimeToMode enum value for the mode of transportation to use. :return: A dict of the travel time in seconds and sections of the journey with GeoJSON paths. Returns ``None`` if it could not fetch it. @@ -338,58 +359,118 @@ def get_travel_time_between(latlng_from, latlng_to, config): Uses the Navitia API. Requires a ``navitia_api_key`` field to be filled-in in the ``config``. """ + sections = [] travel_time = None - # Check that Navitia API key is available - if config["navitia_api_key"]: - payload = { - "from": "%s;%s" % (latlng_from[1], latlng_from[0]), - "to": "%s;%s" % (latlng_to[1], latlng_to[0]), - "datetime": datetime.datetime.now().isoformat(), - "count": 1 - } - try: - # Do the query to Navitia API - req = requests.get( - NAVITIA_ENDPOINT, params=payload, - auth=(config["navitia_api_key"], "") + if mode == TimeToModes.PUBLIC_TRANSPORT: + # Check that Navitia API key is available + if config["navitia_api_key"]: + # Search route for next Monday at 8am to avoid looking for a route + # in the middle of the night if the fetch is done by night. + date_from = next_weekday(datetime.datetime.now(), 0).replace( + hour=8, + minute=0, ) - req.raise_for_status() + payload = { + "from": "%s;%s" % (latlng_from[1], latlng_from[0]), + "to": "%s;%s" % (latlng_to[1], latlng_to[0]), + "datetime": date_from.isoformat(), + "count": 1 + } + try: + # Do the query to Navitia API + req = requests.get( + NAVITIA_ENDPOINT, params=payload, + auth=(config["navitia_api_key"], "") + ) + req.raise_for_status() - journeys = req.json()["journeys"][0] - travel_time = journeys["durations"]["total"] - sections = [] - for section in journeys["sections"]: - if section["type"] == "public_transport": - # Public transport - sections.append({ - "geojson": section["geojson"], - "color": ( - section["display_informations"].get("color", None) - ) - }) - elif section["type"] == "street_network": - # Walking - sections.append({ - "geojson": section["geojson"], - "color": None - }) - else: - # Skip anything else - continue - except (requests.exceptions.RequestException, - ValueError, IndexError, KeyError) as exc: - # Ignore any possible exception + journeys = req.json()["journeys"][0] + travel_time = journeys["durations"]["total"] + for section in journeys["sections"]: + if section["type"] == "public_transport": + # Public transport + sections.append({ + "geojson": section["geojson"], + "color": ( + section["display_informations"].get("color", None) + ) + }) + elif section["type"] == "street_network": + # Walking + sections.append({ + "geojson": section["geojson"], + "color": None + }) + else: + # Skip anything else + continue + except (requests.exceptions.RequestException, + ValueError, IndexError, KeyError) as exc: + # Ignore any possible exception + LOGGER.warning( + "An exception occurred during travel time lookup on " + "Navitia: %s.", + str(exc) + ) + else: LOGGER.warning( - "An exception occurred during travel time lookup on " - "Navitia: %s.", - str(exc) + "No API key available for travel time lookup. Please provide " + "a Navitia API key. Skipping travel time lookup." ) - else: - LOGGER.warning( - "No API key available for travel time lookup. Please provide " - "a Navitia API key. Skipping travel time lookup." - ) + elif mode in [TimeToModes.WALK, TimeToModes.BIKE, TimeToModes.CAR]: + MAPBOX_MODES = { + TimeToModes.WALK: 'mapbox/walking', + TimeToModes.BIKE: 'mapbox/cycling', + TimeToModes.CAR: 'mapbox/driving' + } + # Check that Mapbox API key is available + if config["mapbox_api_key"]: + try: + service = mapbox.Directions( + access_token=config['mapbox_api_key'] + ) + origin = { + 'type': 'Feature', + 'properties': {'name': 'Start'}, + 'geometry': { + 'type': 'Point', + 'coordinates': [latlng_from[1], latlng_from[0]]}} + destination = { + 'type': 'Feature', + 'properties': {'name': 'End'}, + 'geometry': { + 'type': 'Point', + 'coordinates': [latlng_to[1], latlng_to[0]]}} + response = service.directions( + [origin, destination], MAPBOX_MODES[mode] + ) + response.raise_for_status() + route = response.geojson()['features'][0] + # Fix longitude/latitude inversion in geojson output + geometry = route['geometry'] + geometry['coordinates'] = [ + (x[1], x[0]) for x in geometry['coordinates'] + ] + sections = [{ + "geojson": geometry, + "color": "000" + }] + travel_time = route['properties']['duration'] + except (requests.exceptions.RequestException, + IndexError, KeyError) as exc: + # Ignore any possible exception + LOGGER.warning( + "An exception occurred during travel time lookup on " + "Mapbox: %s.", + str(exc) + ) + else: + LOGGER.warning( + "No API key available for travel time lookup. Please provide " + "a Mapbox API key. Skipping travel time lookup." + ) + if travel_time: return { "time": travel_time, diff --git a/requirements.txt b/requirements.txt index e42027d..30bc50b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ enum34; python_version < '3.4' functools32; python_version < '3.2.3' future imagehash +mapbox pillow requests requests_mock