From 0914e18a666e067c0be6e30d466249aa0355015a Mon Sep 17 00:00:00 2001 From: "Phyks (Lucas Verney)" Date: Fri, 14 Dec 2018 16:31:27 +0100 Subject: [PATCH] Add an importer for traffic OpenData (from Bordeaux) --- scripts/opendata/traffic.py | 243 ++++++++++++++++++++++++++++++++++++ scripts/opendata/works.py | 2 +- 2 files changed, 244 insertions(+), 1 deletion(-) create mode 100755 scripts/opendata/traffic.py diff --git a/scripts/opendata/traffic.py b/scripts/opendata/traffic.py new file mode 100755 index 0000000..aff2170 --- /dev/null +++ b/scripts/opendata/traffic.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python +# coding: utf-8 +""" +Import French opendata about traffic on roads. +""" +import io +import json +import logging +import os +import sys +import zipfile + +SCRIPT_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.abspath(os.path.join(SCRIPT_DIRECTORY, '..', '..'))) + +import arrow +import pyproj +import requests + +from functools import partial +from lxml import etree + +from shapely.geometry import Point +from shapely.geometry import mapping, shape +from shapely.ops import transform + +from server.models import db, Report +from server.tools import UTC_now + +level = logging.WARN +RAISE_ON_EXCEPT = False +if 'DEBUG' in os.environ: + level = logging.INFO +if 'RAISE_ON_EXCEPT' in os.environ: + RAISE_ON_EXCEPT = True +logging.basicConfig(level=level) + +# Same as in src/constants.js +REPORT_DOWNVOTES_THRESHOLD = 1 + + +def preprocess_bordeaux(kmz_url): + KML_NAMESPACES = { + 'kml': 'http://www.opengis.net/kml/2.2', + } + EXTENDED_DATA_TO_FIELDS = { + 'GID': { + 'key': 'id', + 'transform': lambda x: x, + }, + 'HEURE': { + 'key': 'datetime', + 'transform': lambda x: arrow.get(x, 'YYYYMMDDHHmmss').isoformat(), + }, + 'TYPEVOIE': { + 'key': 'way', + 'transform': lambda x: x, + }, + 'ETAT': { + 'key': 'state', + 'transform': lambda x: x, + }, + } + + items = [] + + kml = None + r = requests.get(kmz_url) + with zipfile.ZipFile(io.BytesIO(r.content), 'r') as fh: + kml = fh.read('doc.kml') + + if not kml: + return [] + + kml = etree.fromstring(kml) + folder = kml.find('.//kml:Folder', KML_NAMESPACES) + + for placemark in folder.findall('.//kml:Placemark', KML_NAMESPACES): + fields = {} + + extended_data = placemark.findall( + 'kml:ExtendedData//kml:SimpleData', KML_NAMESPACES + ) + for ed in extended_data: + name = ed.get('name') + if name in EXTENDED_DATA_TO_FIELDS: + key_transform = EXTENDED_DATA_TO_FIELDS[name] + fields[key_transform['key']] = ( + key_transform['transform'](ed.text) + ) + + linestring = placemark.find( + 'kml:LineString//kml:coordinates', KML_NAMESPACES + ).text + lnglats = [ + [float(y) for y in x.split(',')[:-1]] for x in linestring.split() + ] + items.append({ + 'fields': fields, + 'geometry': { + 'type': 'LineString', + 'coordinates': lnglats, + }, + 'recordid': fields.get('id'), + 'source': 'opendata-bordeaux', + }) + + return items + + +MIN_DISTANCE_REPORT_DETAILS = 40 # Distance in meters, same as in constants.js +OPENDATA_URLS = { + # Traffic in Bordeaux + # http://data.bordeaux-metropole.fr/data.php?layer=CI_TRAFI_L + # Licence ODbL : https://data.bordeaux-metropole.fr/pdf/ODbL_fr.pdf + "bordeaux": { + "preprocess": preprocess_bordeaux, + "url": "https://data.bordeaux-metropole.fr/files.php?layer=CI_TRAFI_L&ext=KMZ", + }, +} +REPORT_TYPE = 'traffic' + +# Projection from WGS84 (GPS) to Lambert93 (for easy buffering of polygons) +project = partial( + pyproj.transform, + pyproj.Proj(init='epsg:4326'), # source coordinates system + pyproj.Proj(init='epsg:2154') # destination coordinates system +) + + +def process_opendata(name, data, report_type=REPORT_TYPE): + # Coordinates of current reports in db, as shapely Points in Lambert93 + current_reports_points = [] + active_reports_from_db = Report.select().where( + # Load reports from db of the current type + (Report.type == report_type) & + ( + # Either with an expiration_datetime in the future + ( + (Report.expiration_datetime is not None) & + (Report.expiration_datetime > UTC_now()) + ) | + # Or without expiration_datetime but which are still active (shown + # on the map) + ( + (Report.expiration_datetime is None) & + (Report.downvotes < REPORT_DOWNVOTES_THRESHOLD) + ) + ) + ) + for report in active_reports_from_db: + current_reports_points.append( + transform(project, Point(report.lng, report.lat)) + ) + + # TODO: Remove reports which are no longer valid + + # Filter out unknown states and roads without traffic + data = [ + x + for x in data + if x['fields'].get('state') not in ['FLUIDE', 'INCONNU'] + ] + + for item in data: + try: + fields = item['fields'] + + # Get geometry and position + geometry = shape(item['geometry']) + position = geometry.centroid + lng, lat = position.x, position.y + + # Check if this precise position is already in the database + if transform(project, position) in current_reports_points: + logging.info( + ('Ignoring record %s, a similar report is already in ' + 'the database.'), + item['recordid'] + ) + continue + # Check no similar reports is within the area of the report, up + # to the report distance. + overlap_area = transform(project, geometry).buffer( + MIN_DISTANCE_REPORT_DETAILS + ) + is_already_inserted = False + for report_point in current_reports_points: + if report_point.within(overlap_area): + # A similar report is already there + is_already_inserted = True + logging.info( + ('Ignoring record %s, a similar report is already ' + 'in the database.'), + item['recordid'] + ) + break + if is_already_inserted: + # Skip this report if a similar one is nearby + continue + + # Expires in an hour + expiration_datetime = ( + # TODO: Check the datetime value in the opendata file + arrow.get(fields['datetime']).shift(hours=+24).naive + ) + + # Add the report to the db + logging.info('Adding record %s to the database.', + item['recordid']) + Report.create( + type=report_type, + expiration_datetime=expiration_datetime, + lat=lat, + lng=lng, + source=item['source'], + shape_geojson=json.dumps(mapping(geometry)) + ) + except KeyError as exc: + logging.warning( + 'Invalid record %s in %s, missing key: %s', + item.get('recordid', '?'), + name, + exc + ) + + +if __name__ == '__main__': + db.connect() + for name, item in OPENDATA_URLS.items(): + logging.info('Processing opendata from %s', name) + try: + if item['preprocess']: + data = item['preprocess'](item['url']) + else: + r = requests.get(item['url']) + data = r.json() + except (requests.RequestException, ValueError) as exc: + logging.warning('Error while fetching data for %s: %s.', + name, exc) + continue + + process_opendata(name, data) diff --git a/scripts/opendata/works.py b/scripts/opendata/works.py index a94fcea..012bd17 100755 --- a/scripts/opendata/works.py +++ b/scripts/opendata/works.py @@ -560,7 +560,7 @@ def process_opendata(name, data, report_type=REPORT_TYPE): current_reports_points = [] active_reports_from_db = Report.select().where( # Load reports from db of the current type - (Report.type == REPORT_TYPE) & + (Report.type == report_type) & ( # Either with an expiration_datetime in the future (