Use a single common data source for public transports stops
Now makes use of Navitia opendata dumps to cover France. Fixes #65.
This commit is contained in:
parent
376b327379
commit
aa0e1fd965
@ -74,8 +74,7 @@ which covers Paris. If you want to run the script using some other location,
|
|||||||
you might have to change these files by matching datasets.
|
you might have to change these files by matching datasets.
|
||||||
|
|
||||||
* [LaPoste Hexasmal](https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5) for the list of cities and postal codes in France.
|
* [LaPoste Hexasmal](https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/?disjunctive.code_commune_insee&disjunctive.nom_de_la_commune&disjunctive.code_postal&disjunctive.libell_d_acheminement&disjunctive.ligne_5) for the list of cities and postal codes in France.
|
||||||
* [RATP (Paris) stations](https://data.ratp.fr/explore/dataset/positions-geographiques-des-stations-du-reseau-ratp/table/?disjunctive.stop_name&disjunctive.code_postal&disjunctive.departement) for the list of subway/tram/bus stations with their positions in Paris and nearby areas.
|
* [Navitia public transport datasets](https://navitia.opendatasoft.com/explore/?sort=modified&refine.geographicarea=France) for the list of subway/tram/bus stations with their positions in France. These are the `stops_fr-*.txt` files, extracted from the `NTFS` datasets for each region.
|
||||||
* [Tcl (Lyon) stations](https://download.data.grandlyon.com/wfs/rdata?SERVICE=WFS&VERSION=2.0.0&outputformat=GEOJSON&maxfeatures=4601&request=GetFeature&typename=tcl_sytral.tclarret&SRSNAME=urn:ogc:def:crs:EPSG::4326) for the list of subway/tram/bus stations with their positions in Paris and nearby areas.
|
|
||||||
|
|
||||||
Both datasets are licensed under the Open Data Commons Open Database License
|
Both datasets are licensed under the Open Data Commons Open Database License
|
||||||
(ODbL): https://opendatacommons.org/licenses/odbl/.
|
(ODbL): https://opendatacommons.org/licenses/odbl/.
|
||||||
|
@ -164,8 +164,8 @@ def main():
|
|||||||
if args.cmd == "build-data":
|
if args.cmd == "build-data":
|
||||||
force = True
|
force = True
|
||||||
|
|
||||||
data.preprocess_data(config, force=force)
|
if data.preprocess_data(config, force=force):
|
||||||
LOGGER.info("Done building data!")
|
LOGGER.info("Done building data!")
|
||||||
|
|
||||||
if args.cmd == "build-data":
|
if args.cmd == "build-data":
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
@ -43,6 +43,7 @@ def preprocess_data(config, force=False):
|
|||||||
|
|
||||||
:params config: A config dictionary.
|
:params config: A config dictionary.
|
||||||
:params force: Whether to force rebuild or not.
|
:params force: Whether to force rebuild or not.
|
||||||
|
:return bool: Whether data have been built or not.
|
||||||
"""
|
"""
|
||||||
# Check if a build is required
|
# Check if a build is required
|
||||||
get_session = database.init_db(config["database"], config["search_index"])
|
get_session = database.init_db(config["database"], config["search_index"])
|
||||||
@ -53,7 +54,7 @@ def preprocess_data(config, force=False):
|
|||||||
)
|
)
|
||||||
if is_built and not force:
|
if is_built and not force:
|
||||||
# No need to rebuild the database, skip
|
# No need to rebuild the database, skip
|
||||||
return
|
return False
|
||||||
# Otherwise, purge all existing data
|
# Otherwise, purge all existing data
|
||||||
session.query(PublicTransport).delete()
|
session.query(PublicTransport).delete()
|
||||||
session.query(PostalCode).delete()
|
session.query(PostalCode).delete()
|
||||||
@ -67,6 +68,7 @@ def preprocess_data(config, force=False):
|
|||||||
)
|
)
|
||||||
with get_session() as session:
|
with get_session() as session:
|
||||||
session.add_all(data_objects)
|
session.add_all(data_objects)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
@hash_dict
|
@hash_dict
|
||||||
@ -88,7 +90,7 @@ def load_data(model, constraint, config):
|
|||||||
areas = []
|
areas = []
|
||||||
# Get areas to fetch from, using postal codes
|
# Get areas to fetch from, using postal codes
|
||||||
for postal_code in constraint["postal_codes"]:
|
for postal_code in constraint["postal_codes"]:
|
||||||
areas.append(data_files.french_postal_codes_to_iso_3166(postal_code))
|
areas.append(data_files.french_postal_codes_to_quarter(postal_code))
|
||||||
# Load data for each area
|
# Load data for each area
|
||||||
areas = list(set(areas))
|
areas = list(set(areas))
|
||||||
for area in areas:
|
for area in areas:
|
||||||
|
@ -3,10 +3,14 @@
|
|||||||
Preprocessing functions to convert input opendata files into SQLAlchemy objects
|
Preprocessing functions to convert input opendata files into SQLAlchemy objects
|
||||||
ready to be stored in the database.
|
ready to be stored in the database.
|
||||||
"""
|
"""
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from backports import csv
|
||||||
|
|
||||||
from flatisfy.models.postal_code import PostalCode
|
from flatisfy.models.postal_code import PostalCode
|
||||||
from flatisfy.models.public_transport import PublicTransport
|
from flatisfy.models.public_transport import PublicTransport
|
||||||
|
|
||||||
@ -15,18 +19,20 @@ LOGGER = logging.getLogger(__name__)
|
|||||||
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
|
MODULE_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
|
||||||
def french_postal_codes_to_iso_3166(postal_code):
|
def french_postal_codes_to_quarter(postal_code):
|
||||||
"""
|
"""
|
||||||
Convert a French postal code to the main subdivision in French this postal
|
Convert a French postal code to the main quarter in France this postal
|
||||||
code belongs to (ISO 3166-2 code).
|
code belongs to.
|
||||||
|
|
||||||
:param postal_code: The postal code to convert.
|
:param postal_code: The postal code to convert.
|
||||||
:returns: The ISO 3166-2 code of the subdivision or ``None``.
|
:returns: The quarter of France or ``None``.
|
||||||
"""
|
"""
|
||||||
|
departement = postal_code[:2]
|
||||||
|
|
||||||
# Mapping between areas (main subdivisions in French, ISO 3166-2) and
|
# Mapping between areas (main subdivisions in French, ISO 3166-2) and
|
||||||
# French departements
|
# French departements
|
||||||
# Taken from Wikipedia data.
|
# Taken from Wikipedia data.
|
||||||
area_to_departement = {
|
department_to_subdivision = {
|
||||||
"FR-ARA": ["01", "03", "07", "15", "26", "38", "42", "43", "63", "69",
|
"FR-ARA": ["01", "03", "07", "15", "26", "38", "42", "43", "63", "69",
|
||||||
"73", "74"],
|
"73", "74"],
|
||||||
"FR-BFC": ["21", "25", "39", "58", "70", "71", "89", "90"],
|
"FR-BFC": ["21", "25", "39", "58", "70", "71", "89", "90"],
|
||||||
@ -44,17 +50,30 @@ def french_postal_codes_to_iso_3166(postal_code):
|
|||||||
"FR-PDL": ["44", "49", "53", "72", "85"],
|
"FR-PDL": ["44", "49", "53", "72", "85"],
|
||||||
"FR-PAC": ["04", "05", "06", "13", "83", "84"]
|
"FR-PAC": ["04", "05", "06", "13", "83", "84"]
|
||||||
}
|
}
|
||||||
|
subdivision_to_quarters = {
|
||||||
|
'FR-IDF': ['FR-IDF'],
|
||||||
|
'FR-NW': ['FR-BRE', 'FR-CVL', 'FR-NOR', 'FR-PDL'],
|
||||||
|
'FR-NE': ['FR-BFC', 'FR-GES', 'FR-HDF'],
|
||||||
|
'FR-SE': ['FR-ARA', 'FR-COR', 'FR-PAC', 'FR-OCC'],
|
||||||
|
'FR-SW': ['FR-NAQ']
|
||||||
|
}
|
||||||
|
|
||||||
departement = postal_code[:2]
|
subdivision = next(
|
||||||
return next(
|
|
||||||
(
|
(
|
||||||
i
|
i
|
||||||
for i in area_to_departement
|
for i, departments in department_to_subdivision.items()
|
||||||
if departement in area_to_departement[i]
|
if departement in departments
|
||||||
|
),
|
||||||
|
None
|
||||||
|
)
|
||||||
|
return next(
|
||||||
|
(
|
||||||
|
i
|
||||||
|
for i, subdivisions in subdivision_to_quarters.items()
|
||||||
|
if subdivision in subdivisions
|
||||||
),
|
),
|
||||||
None
|
None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _preprocess_laposte():
|
def _preprocess_laposte():
|
||||||
@ -69,8 +88,8 @@ def _preprocess_laposte():
|
|||||||
raw_laposte_data = []
|
raw_laposte_data = []
|
||||||
# Load opendata file
|
# Load opendata file
|
||||||
try:
|
try:
|
||||||
with open(
|
with io.open(
|
||||||
os.path.join(MODULE_DIR, data_file), "r"
|
os.path.join(MODULE_DIR, data_file), "r", encoding='utf-8'
|
||||||
) as fh:
|
) as fh:
|
||||||
raw_laposte_data = json.load(fh)
|
raw_laposte_data = json.load(fh)
|
||||||
except (IOError, ValueError):
|
except (IOError, ValueError):
|
||||||
@ -82,7 +101,7 @@ def _preprocess_laposte():
|
|||||||
for item in raw_laposte_data:
|
for item in raw_laposte_data:
|
||||||
fields = item["fields"]
|
fields = item["fields"]
|
||||||
try:
|
try:
|
||||||
area = french_postal_codes_to_iso_3166(fields["code_postal"])
|
area = french_postal_codes_to_quarter(fields["code_postal"])
|
||||||
if area is None:
|
if area is None:
|
||||||
LOGGER.info(
|
LOGGER.info(
|
||||||
"No matching area found for postal code %s, skipping it.",
|
"No matching area found for postal code %s, skipping it.",
|
||||||
@ -104,72 +123,45 @@ def _preprocess_laposte():
|
|||||||
return postal_codes_data
|
return postal_codes_data
|
||||||
|
|
||||||
|
|
||||||
def _preprocess_ratp():
|
def _preprocess_public_transport():
|
||||||
"""
|
"""
|
||||||
Build SQLAlchemy objects from the RATP data (public transport in Paris,
|
Build SQLAlchemy objects from the Navitia public transport data.
|
||||||
France).
|
|
||||||
|
|
||||||
:return: A list of ``PublicTransport`` objects to be inserted in database.
|
:return: A list of ``PublicTransport`` objects to be inserted in database.
|
||||||
"""
|
"""
|
||||||
data_file = "ratp.json"
|
DATA_FILES = {
|
||||||
LOGGER.info("Building from %s data.", data_file)
|
"FR-IDF": "stops_fr-idf.txt",
|
||||||
|
"FR-NW": "stops_fr-nw.txt",
|
||||||
|
"FR-NE": "stops_fr-ne.txt",
|
||||||
|
"FR-SW": "stops_fr-sw.txt",
|
||||||
|
"FR-SE": "stops_fr-se.txt"
|
||||||
|
}
|
||||||
|
|
||||||
ratp_data_raw = []
|
public_transport_data = []
|
||||||
# Load opendata file
|
# Load opendata file
|
||||||
try:
|
for area, data_file in DATA_FILES.items():
|
||||||
with open(os.path.join(MODULE_DIR, data_file), "r") as fh:
|
LOGGER.info("Building from public transport data %s." % data_file)
|
||||||
ratp_data_raw = json.load(fh)
|
try:
|
||||||
except (IOError, ValueError):
|
with io.open(os.path.join(MODULE_DIR, data_file), "r",
|
||||||
LOGGER.error("Invalid raw RATP opendata file.")
|
encoding='utf-8') as fh:
|
||||||
return []
|
filereader = csv.reader(fh)
|
||||||
|
next(filereader, None) # Skip first row (headers)
|
||||||
|
for row in filereader:
|
||||||
|
public_transport_data.append(PublicTransport(
|
||||||
|
name=row[2],
|
||||||
|
area=area,
|
||||||
|
lat=row[3],
|
||||||
|
lng=row[4]
|
||||||
|
))
|
||||||
|
except (IOError, IndexError):
|
||||||
|
LOGGER.error("Invalid raw opendata file: %s." % data_file)
|
||||||
|
return []
|
||||||
|
|
||||||
# Process it
|
return public_transport_data
|
||||||
ratp_data = []
|
|
||||||
for item in ratp_data_raw:
|
|
||||||
fields = item["fields"]
|
|
||||||
ratp_data.append(PublicTransport(
|
|
||||||
name=fields["stop_name"],
|
|
||||||
area="FR-IDF",
|
|
||||||
lat=fields["coord"][0],
|
|
||||||
lng=fields["coord"][1]
|
|
||||||
))
|
|
||||||
return ratp_data
|
|
||||||
|
|
||||||
|
|
||||||
def _preprocess_tcl():
|
|
||||||
"""
|
|
||||||
Build SQLAlchemy objects from the Tcl data (public transport in Lyon,
|
|
||||||
France).
|
|
||||||
|
|
||||||
:return: A list of ``PublicTransport`` objects to be inserted in database.
|
|
||||||
"""
|
|
||||||
data_file = "tcl.json"
|
|
||||||
LOGGER.info("Building from %s data.", data_file)
|
|
||||||
|
|
||||||
tcl_data_raw = []
|
|
||||||
# Load opendata file
|
|
||||||
try:
|
|
||||||
with open(os.path.join(MODULE_DIR, data_file), "r") as fh:
|
|
||||||
tcl_data_raw = json.load(fh)
|
|
||||||
except (IOError, ValueError):
|
|
||||||
LOGGER.error("Invalid raw Tcl opendata file.")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Process it
|
|
||||||
tcl_data = []
|
|
||||||
for item in tcl_data_raw["features"]:
|
|
||||||
tcl_data.append(PublicTransport(
|
|
||||||
name=item["properties"]["nom"],
|
|
||||||
area="FR-ARA",
|
|
||||||
lat=item["geometry"]["coordinates"][1],
|
|
||||||
lng=item["geometry"]["coordinates"][0]
|
|
||||||
))
|
|
||||||
return tcl_data
|
|
||||||
|
|
||||||
|
|
||||||
# List of all the available preprocessing functions. Order can be important.
|
# List of all the available preprocessing functions. Order can be important.
|
||||||
PREPROCESSING_FUNCTIONS = [
|
PREPROCESSING_FUNCTIONS = [
|
||||||
_preprocess_laposte,
|
_preprocess_laposte,
|
||||||
_preprocess_ratp,
|
_preprocess_public_transport
|
||||||
_preprocess_tcl
|
|
||||||
]
|
]
|
||||||
|
File diff suppressed because one or more lines are too long
59362
flatisfy/data_files/stops_fr-idf.txt
Normal file
59362
flatisfy/data_files/stops_fr-idf.txt
Normal file
File diff suppressed because it is too large
Load Diff
25211
flatisfy/data_files/stops_fr-ne.txt
Normal file
25211
flatisfy/data_files/stops_fr-ne.txt
Normal file
File diff suppressed because it is too large
Load Diff
15287
flatisfy/data_files/stops_fr-nw.txt
Normal file
15287
flatisfy/data_files/stops_fr-nw.txt
Normal file
File diff suppressed because it is too large
Load Diff
49703
flatisfy/data_files/stops_fr-se.txt
Normal file
49703
flatisfy/data_files/stops_fr-se.txt
Normal file
File diff suppressed because it is too large
Load Diff
18344
flatisfy/data_files/stops_fr-sw.txt
Normal file
18344
flatisfy/data_files/stops_fr-sw.txt
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,6 @@
|
|||||||
appdirs
|
appdirs
|
||||||
arrow
|
arrow
|
||||||
|
backports.csv
|
||||||
bottle
|
bottle
|
||||||
bottle-sqlalchemy
|
bottle-sqlalchemy
|
||||||
canister
|
canister
|
||||||
|
Loading…
Reference in New Issue
Block a user