Use a single common data source for public transports stops
Now makes use of Navitia opendata dumps to cover France. Fixes #65.
This commit is contained in:
parent
376b327379
commit
aa0e1fd965
|
@ -74,8 +74,7 @@ which covers Paris. If you want to run the script using some other location,
|
||||||
you might have to change these files by matching datasets.
|
you might have to change these files by matching datasets.
|
||||||
|
|
||||||
* [LaPoste Hexasmal](https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5) for the list of cities and postal codes in France.
|
* [LaPoste Hexasmal](https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5) for the list of cities and postal codes in France.
|
||||||
* [RATP (Paris) stations](https://data.ratp.fr/explore/dataset/positions-geographiques-des-stations-du-reseau-ratp/table/?disjunctive.stop_name&disjunctive.code_postal&disjunctive.departement) for the list of subway/tram/bus stations with their positions in Paris and nearby areas.
|
* [Navitia public transport datasets](https://navitia.opendatasoft.com/explore/?sort=modified&refine.geographicarea=France) for the list of subway/tram/bus stations with their positions in France. These are the `stops_fr-*.txt` files, extracted from the `NTFS` datasets for each region.
|
||||||
* [Tcl (Lyon) stations](https://download.data.grandlyon.com/wfs/rdata?SERVICE=WFS&VERSION=2.0.0&outputformat=GEOJSON&maxfeatures=4601&request=GetFeature&typename=tcl_sytral.tclarret&SRSNAME=urn:ogc:def:crs:EPSG::4326) for the list of subway/tram/bus stations with their positions in Paris and nearby areas.
|
|
||||||
|
|
||||||
Both datasets are licensed under the Open Data Commons Open Database License
|
Both datasets are licensed under the Open Data Commons Open Database License
|
||||||
(ODbL): https://opendatacommons.org/licenses/odbl/.
|
(ODbL): https://opendatacommons.org/licenses/odbl/.
|
||||||
|
|
|
@ -164,8 +164,8 @@ def main():
|
||||||
if args.cmd == "build-data":
|
if args.cmd == "build-data":
|
||||||
force = True
|
force = True
|
||||||
|
|
||||||
data.preprocess_data(config, force=force)
|
if data.preprocess_data(config, force=force):
|
||||||
LOGGER.info("Done building data!")
|
LOGGER.info("Done building data!")
|
||||||
|
|
||||||
if args.cmd == "build-data":
|
if args.cmd == "build-data":
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
|
@ -43,6 +43,7 @@ def preprocess_data(config, force=False):
|
||||||
|
|
||||||
:params config: A config dictionary.
|
:params config: A config dictionary.
|
||||||
:params force: Whether to force rebuild or not.
|
:params force: Whether to force rebuild or not.
|
||||||
|
:return bool: Whether data have been built or not.
|
||||||
"""
|
"""
|
||||||
# Check if a build is required
|
# Check if a build is required
|
||||||
get_session = database.init_db(config["database"], config["search_index"])
|
get_session = database.init_db(config["database"], config["search_index"])
|
||||||
|
@ -53,7 +54,7 @@ def preprocess_data(config, force=False):
|
||||||
)
|
)
|
||||||
if is_built and not force:
|
if is_built and not force:
|
||||||
# No need to rebuild the database, skip
|
# No need to rebuild the database, skip
|
||||||
return
|
return False
|
||||||
# Otherwise, purge all existing data
|
# Otherwise, purge all existing data
|
||||||
session.query(PublicTransport).delete()
|
session.query(PublicTransport).delete()
|
||||||
session.query(PostalCode).delete()
|
session.query(PostalCode).delete()
|
||||||
|
@ -67,6 +68,7 @@ def preprocess_data(config, force=False):
|
||||||
)
|
)
|
||||||
with get_session() as session:
|
with get_session() as session:
|
||||||
session.add_all(data_objects)
|
session.add_all(data_objects)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
@hash_dict
|
@hash_dict
|
||||||
|
@ -88,7 +90,7 @@ def load_data(model, constraint, config):
|
||||||
areas = []
|
areas = []
|
||||||
# Get areas to fetch from, using postal codes
|
# Get areas to fetch from, using postal codes
|
||||||
for postal_code in constraint["postal_codes"]:
|
for postal_code in constraint["postal_codes"]:
|
||||||
areas.append(data_files.french_postal_codes_to_iso_3166(postal_code))
|
areas.append(data_files.french_postal_codes_to_quarter(postal_code))
|
||||||
# Load data for each area
|
# Load data for each area
|
||||||
areas = list(set(areas))
|
areas = list(set(areas))
|
||||||
for area in areas:
|
for area in areas:
|
||||||
|
|
|
@ -3,10 +3,14 @@
|
||||||
Preprocessing functions to convert input opendata files into SQLAlchemy objects
|
Preprocessing functions to convert input opendata files into SQLAlchemy objects
|
||||||
ready to be stored in the database.
|
ready to be stored in the database.
|
||||||
"""
|
"""
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from backports import csv
|
||||||
|
|
||||||
from flatisfy.models.postal_code import PostalCode
|
from flatisfy.models.postal_code import PostalCode
|
||||||
from flatisfy.models.public_transport import PublicTransport
|
from flatisfy.models.public_transport import PublicTransport
|
||||||
|
|
||||||
|
@ -15,18 +19,20 @@ LOGGER = logging.getLogger(__name__)
|
||||||
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
|
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
|
||||||
def french_postal_codes_to_iso_3166(postal_code):
|
def french_postal_codes_to_quarter(postal_code):
|
||||||
"""
|
"""
|
||||||
Convert a French postal code to the main subdivision in French this postal
|
Convert a French postal code to the main quarter in France this postal
|
||||||
code belongs to (ISO 3166-2 code).
|
code belongs to.
|
||||||
|
|
||||||
:param postal_code: The postal code to convert.
|
:param postal_code: The postal code to convert.
|
||||||
:returns: The ISO 3166-2 code of the subdivision or ``None``.
|
:returns: The quarter of France or ``None``.
|
||||||
"""
|
"""
|
||||||
|
departement = postal_code[:2]
|
||||||
|
|
||||||
# Mapping between areas (main subdivisions in French, ISO 3166-2) and
|
# Mapping between areas (main subdivisions in French, ISO 3166-2) and
|
||||||
# French departements
|
# French departements
|
||||||
# Taken from Wikipedia data.
|
# Taken from Wikipedia data.
|
||||||
area_to_departement = {
|
department_to_subdivision = {
|
||||||
"FR-ARA": ["01", "03", "07", "15", "26", "38", "42", "43", "63", "69",
|
"FR-ARA": ["01", "03", "07", "15", "26", "38", "42", "43", "63", "69",
|
||||||
"73", "74"],
|
"73", "74"],
|
||||||
"FR-BFC": ["21", "25", "39", "58", "70", "71", "89", "90"],
|
"FR-BFC": ["21", "25", "39", "58", "70", "71", "89", "90"],
|
||||||
|
@ -44,17 +50,30 @@ def french_postal_codes_to_iso_3166(postal_code):
|
||||||
"FR-PDL": ["44", "49", "53", "72", "85"],
|
"FR-PDL": ["44", "49", "53", "72", "85"],
|
||||||
"FR-PAC": ["04", "05", "06", "13", "83", "84"]
|
"FR-PAC": ["04", "05", "06", "13", "83", "84"]
|
||||||
}
|
}
|
||||||
|
subdivision_to_quarters = {
|
||||||
|
'FR-IDF': ['FR-IDF'],
|
||||||
|
'FR-NW': ['FR-BRE', 'FR-CVL', 'FR-NOR', 'FR-PDL'],
|
||||||
|
'FR-NE': ['FR-BFC', 'FR-GES', 'FR-HDF'],
|
||||||
|
'FR-SE': ['FR-ARA', 'FR-COR', 'FR-PAC', 'FR-OCC'],
|
||||||
|
'FR-SW': ['FR-NAQ']
|
||||||
|
}
|
||||||
|
|
||||||
departement = postal_code[:2]
|
subdivision = next(
|
||||||
return next(
|
|
||||||
(
|
(
|
||||||
i
|
i
|
||||||
for i in area_to_departement
|
for i, departments in department_to_subdivision.items()
|
||||||
if departement in area_to_departement[i]
|
if departement in departments
|
||||||
|
),
|
||||||
|
None
|
||||||
|
)
|
||||||
|
return next(
|
||||||
|
(
|
||||||
|
i
|
||||||
|
for i, subdivisions in subdivision_to_quarters.items()
|
||||||
|
if subdivision in subdivisions
|
||||||
),
|
),
|
||||||
None
|
None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _preprocess_laposte():
|
def _preprocess_laposte():
|
||||||
|
@ -69,8 +88,8 @@ def _preprocess_laposte():
|
||||||
raw_laposte_data = []
|
raw_laposte_data = []
|
||||||
# Load opendata file
|
# Load opendata file
|
||||||
try:
|
try:
|
||||||
with open(
|
with io.open(
|
||||||
os.path.join(MODULE_DIR, data_file), "r"
|
os.path.join(MODULE_DIR, data_file), "r", encoding='utf-8'
|
||||||
) as fh:
|
) as fh:
|
||||||
raw_laposte_data = json.load(fh)
|
raw_laposte_data = json.load(fh)
|
||||||
except (IOError, ValueError):
|
except (IOError, ValueError):
|
||||||
|
@ -82,7 +101,7 @@ def _preprocess_laposte():
|
||||||
for item in raw_laposte_data:
|
for item in raw_laposte_data:
|
||||||
fields = item["fields"]
|
fields = item["fields"]
|
||||||
try:
|
try:
|
||||||
area = french_postal_codes_to_iso_3166(fields["code_postal"])
|
area = french_postal_codes_to_quarter(fields["code_postal"])
|
||||||
if area is None:
|
if area is None:
|
||||||
LOGGER.info(
|
LOGGER.info(
|
||||||
"No matching area found for postal code %s, skipping it.",
|
"No matching area found for postal code %s, skipping it.",
|
||||||
|
@ -104,72 +123,45 @@ def _preprocess_laposte():
|
||||||
return postal_codes_data
|
return postal_codes_data
|
||||||
|
|
||||||
|
|
||||||
def _preprocess_ratp():
|
def _preprocess_public_transport():
|
||||||
"""
|
"""
|
||||||
Build SQLAlchemy objects from the RATP data (public transport in Paris,
|
Build SQLAlchemy objects from the Navitia public transport data.
|
||||||
France).
|
|
||||||
|
|
||||||
:return: A list of ``PublicTransport`` objects to be inserted in database.
|
:return: A list of ``PublicTransport`` objects to be inserted in database.
|
||||||
"""
|
"""
|
||||||
data_file = "ratp.json"
|
DATA_FILES = {
|
||||||
LOGGER.info("Building from %s data.", data_file)
|
"FR-IDF": "stops_fr-idf.txt",
|
||||||
|
"FR-NW": "stops_fr-nw.txt",
|
||||||
|
"FR-NE": "stops_fr-ne.txt",
|
||||||
|
"FR-SW": "stops_fr-sw.txt",
|
||||||
|
"FR-SE": "stops_fr-se.txt"
|
||||||
|
}
|
||||||
|
|
||||||
ratp_data_raw = []
|
public_transport_data = []
|
||||||
# Load opendata file
|
# Load opendata file
|
||||||
try:
|
for area, data_file in DATA_FILES.items():
|
||||||
with open(os.path.join(MODULE_DIR, data_file), "r") as fh:
|
LOGGER.info("Building from public transport data %s." % data_file)
|
||||||
ratp_data_raw = json.load(fh)
|
try:
|
||||||
except (IOError, ValueError):
|
with io.open(os.path.join(MODULE_DIR, data_file), "r",
|
||||||
LOGGER.error("Invalid raw RATP opendata file.")
|
encoding='utf-8') as fh:
|
||||||
return []
|
filereader = csv.reader(fh)
|
||||||
|
next(filereader, None) # Skip first row (headers)
|
||||||
|
for row in filereader:
|
||||||
|
public_transport_data.append(PublicTransport(
|
||||||
|
name=row[2],
|
||||||
|
area=area,
|
||||||
|
lat=row[3],
|
||||||
|
lng=row[4]
|
||||||
|
))
|
||||||
|
except (IOError, IndexError):
|
||||||
|
LOGGER.error("Invalid raw opendata file: %s." % data_file)
|
||||||
|
return []
|
||||||
|
|
||||||
# Process it
|
return public_transport_data
|
||||||
ratp_data = []
|
|
||||||
for item in ratp_data_raw:
|
|
||||||
fields = item["fields"]
|
|
||||||
ratp_data.append(PublicTransport(
|
|
||||||
name=fields["stop_name"],
|
|
||||||
area="FR-IDF",
|
|
||||||
lat=fields["coord"][0],
|
|
||||||
lng=fields["coord"][1]
|
|
||||||
))
|
|
||||||
return ratp_data
|
|
||||||
|
|
||||||
|
|
||||||
def _preprocess_tcl():
|
|
||||||
"""
|
|
||||||
Build SQLAlchemy objects from the Tcl data (public transport in Lyon,
|
|
||||||
France).
|
|
||||||
|
|
||||||
:return: A list of ``PublicTransport`` objects to be inserted in database.
|
|
||||||
"""
|
|
||||||
data_file = "tcl.json"
|
|
||||||
LOGGER.info("Building from %s data.", data_file)
|
|
||||||
|
|
||||||
tcl_data_raw = []
|
|
||||||
# Load opendata file
|
|
||||||
try:
|
|
||||||
with open(os.path.join(MODULE_DIR, data_file), "r") as fh:
|
|
||||||
tcl_data_raw = json.load(fh)
|
|
||||||
except (IOError, ValueError):
|
|
||||||
LOGGER.error("Invalid raw Tcl opendata file.")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Process it
|
|
||||||
tcl_data = []
|
|
||||||
for item in tcl_data_raw["features"]:
|
|
||||||
tcl_data.append(PublicTransport(
|
|
||||||
name=item["properties"]["nom"],
|
|
||||||
area="FR-ARA",
|
|
||||||
lat=item["geometry"]["coordinates"][1],
|
|
||||||
lng=item["geometry"]["coordinates"][0]
|
|
||||||
))
|
|
||||||
return tcl_data
|
|
||||||
|
|
||||||
|
|
||||||
# List of all the available preprocessing functions. Order can be important.
|
# List of all the available preprocessing functions. Order can be important.
|
||||||
PREPROCESSING_FUNCTIONS = [
|
PREPROCESSING_FUNCTIONS = [
|
||||||
_preprocess_laposte,
|
_preprocess_laposte,
|
||||||
_preprocess_ratp,
|
_preprocess_public_transport
|
||||||
_preprocess_tcl
|
|
||||||
]
|
]
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,5 +1,6 @@
|
||||||
appdirs
|
appdirs
|
||||||
arrow
|
arrow
|
||||||
|
backports.csv
|
||||||
bottle
|
bottle
|
||||||
bottle-sqlalchemy
|
bottle-sqlalchemy
|
||||||
canister
|
canister
|
||||||
|
|
Loading…
Reference in New Issue