Browse Source

Script to use BD ALTI from IGN

Phyks (Lucas Verney) 4 years ago
parent
commit
982b8c7ae9
1 changed files with 211 additions and 92 deletions
  1. 211
    92
      scripts/bd_alti.py

+ 211
- 92
scripts/bd_alti.py View File

@@ -1,20 +1,32 @@
1 1
 #!/usr/bin/env python3
2
-import math
3
-import os
4
-import pyproj
5
-import sys
6
-
7 2
 """
8 3
 This script filters the BD ALTI from IGN (ESRI grid) to keep only the relevant
9 4
 files for the specified area.
10 5
 
11
-Input is an Arc/Info E00 (ASCII) file from the IGN BD ALTI database.
12
-Output is an ASCII XYZ gridded file, sorted by increasing X and decreasing Y.
6
+Input is an ESRI grid (ASCII) file from the IGN BD ALTI database.
7
+(https://en.wikipedia.org/wiki/Esri_grid#ASCII)
8
+
9
+Output is an Azavea Raster Grid format (ARG) file.
10
+http://geotrellis.io/documentation/0.9.0/geotrellis/io/arg/
11
+
12
+Metadata are going to the standard output, and should be appended to
13
+config.json file in ressources folder.
14
+
15
+Requires the pyproj module.
13 16
 """
14 17
 
18
+import json
19
+import math
20
+import os
21
+import pyproj
22
+import struct
23
+import sys
24
+
15 25
 
16 26
 def has_overlap(a, b):
17 27
     """
28
+    a and b are two intervals (list of bounds)
29
+
18 30
     Returns
19 31
     * False if no overlap between the intervals represented by a and b,
20 32
     * True otherwise
@@ -24,100 +36,207 @@ def has_overlap(a, b):
24 36
 
25 37
 def parse_header(fh):
26 38
     """
27
-    Parses the header of an Arc/Info E00 (ASCII) altitude file.
39
+    Parses the header of an ESRI (ASCII) altitude file.
28 40
     """
29 41
     ncols = int(fh.readline().strip().split(" ")[-1])
30 42
     nrows = int(fh.readline().strip().split(" ")[-1])
31 43
     xllcorner = float(fh.readline().strip().split(" ")[-1])
32 44
     yllcorner = float(fh.readline().strip().split(" ")[-1])
33 45
     cellsize = float(fh.readline().strip().split(" ")[-1])
46
+    nodata = float(fh.readline().strip().split(" ")[-1])
34 47
 
35
-    return ncols, nrows, xllcorner, yllcorner, cellsize
48
+    return ncols, nrows, xllcorner, yllcorner, cellsize, nodata
36 49
 
37 50
 
38
-if len(sys.argv) < 6:
39
-    print("Usage: "+sys.argv[0]+" LAT_MIN LNG_MIN LAT_MAX LNG_MAX DATA_FOLDER")
40
-    sys.exit()
51
+def latlng_to_lambert93(lat, lng):
52
+    """
53
+    Converts a latitude and longitude to Lambert93 X, Y representation.
54
+
55
+    Returns x, y
56
+    """
57
+    wgs84 = pyproj.Proj("+init=EPSG:4326")
58
+    lambert = pyproj.Proj("+init=EPSG:2154")
59
+    x, y = pyproj.transform(wgs84, lambert, lng, lat)
60
+
61
+    return x, y
41 62
 
42
-lat_min = float(sys.argv[1])
43
-lng_min = float(sys.argv[2])
44
-lat_max = float(sys.argv[3])
45
-lng_max = float(sys.argv[4])
46
-data_folder = os.path.expanduser(sys.argv[5])
47 63
 
48
-if len(sys.argv) > 6:
64
+def find_relevant_files(x_min, y_min, x_max, y_max, data_folder):
65
+    """
66
+    Finds the files with relevant points from the data folder.
67
+
68
+    Returns a list of filenames.
69
+    """
70
+    founds = []
71
+    for f in os.listdir(data_folder):
72
+        if not(f.endswith(".asc")):
73
+            continue
74
+        file = os.path.join(data_folder, f)
75
+        with open(file, 'r') as fh:
76
+            ncols, nrows, xllcorner, yllcorner, cellsize, nodata = parse_header(fh)
77
+
78
+            if has_overlap([x_min, x_max],
79
+                           [xllcorner, xllcorner + cellsize * ncols]):
80
+                if has_overlap([y_min, y_max],
81
+                               [yllcorner, yllcorner + cellsize * nrows]):
82
+                    founds.append(file)
83
+    return founds
84
+
85
+
86
+def pack(fmt, nodata, value):
87
+    """
88
+    given fmt and nodata, encodes a value as bytes
89
+    """
90
+    if value is None:
91
+        value = nodata
92
+    return struct.pack(fmt, value)
93
+
94
+
95
+# packs the given values together as bytes
96
+def encode(fmt, nodata, values):
97
+    """
98
+    Packs the given values together as bytes.
99
+    """
100
+    chunks = [pack(fmt, nodata, v) for v in values]
101
+    return b''.join(chunks)
102
+
103
+
104
+def format_arg_out(points, no_data=-99999):
105
+    """
106
+    Format the list of points in arg format. no_data is the value associated to
107
+    no data.
108
+
109
+    Returns out, metadata where out is a binary content ready to be written to
110
+    a file and metadata is a JSON file containing arg metadata.
111
+
112
+    We expect cellheight and cellwidth to be equal and constant across the
113
+    dataset.
114
+
115
+    We assume there is no holes in the data.
116
+    """
117
+    # Sort points list by decreasing Y and then by increasing X
118
+    # (from NW to SE), as it will be output in the ARG file
119
+    points.sort(key=lambda e: (-e[1], e[0]))
120
+
121
+    # Set metadata
122
+    x_min = points[0][0]
123
+    y_min = points[-1][1]
124
+    x_max = points[-1][0]
125
+    y_max = points[0][1]
126
+    cellwidth = points[1][0] - points[0][0]  # X difference between the first two points
127
+    cellheight = cellwidth  # Assume square cells
128
+    metadata = {
129
+        "datatype": "int16",
130
+        "x_min": x_min,
131
+        "y_min": y_min,
132
+        "x_max": x_max,
133
+        "y_max": y_max,
134
+        "cellwidth": cellwidth,
135
+        "cellheight": cellheight,
136
+        "cols": (x_max - x_min) // cellwidth,
137
+        "rows": (y_max - y_min) // cellheight
138
+    }
139
+
140
+    # Get only the z values, replacing the NO_DATA values by None
141
+    z_values = [i[2] if i[2] != no_data else None for i in points]
142
+    max_z = max(z_values)
143
+    min_z = min(z_values)
144
+    format = ">h"
145
+    nodata_out = -(1 << 15)  # -32 768
146
+    min_value = -(1 << 15) + 1  # -32 767
147
+    max_value = (1 >> 15) - 1  # 32 767
148
+    # Convert floating z_values to int levels
149
+    z_values = [int((i - min_z) / (max_z - min_z) * (max_value - min_value) +
150
+        min_value)
151
+        for i in z_values]
152
+    # Get the output as a binary string
153
+    out = encode(format, nodata_out, z_values)
154
+
155
+    return out, metadata
156
+
157
+
158
+if __name__ == "__main__":
159
+    if len(sys.argv) < 7:
160
+        print("Usage: " +
161
+              sys.argv[0] +
162
+              " LAT_MIN LNG_MIN LAT_MAX LNG_MAX DATA_FOLDER OUT_FILE")
163
+        sys.exit()
164
+
165
+    # Parse parameters
166
+    lat_min = float(sys.argv[1])
167
+    lng_min = float(sys.argv[2])
168
+    lat_max = float(sys.argv[3])
169
+    lng_max = float(sys.argv[4])
170
+    data_folder = os.path.expanduser(sys.argv[5])
49 171
     out_file = sys.argv[6]
50
-else:
51
-    out_file = "out.xyz"
52
-
53
-print(("Looking for data between latitudes and longitudes " +
54
-       "({0}, {1}) and ({2}, {3})").format(lat_min, lng_min, lat_max, lng_max))
55
-
56
-wgs84 = pyproj.Proj("+init=EPSG:4326")
57
-lambert = pyproj.Proj("+init=EPSG:2154")
58
-x_min, y_min = pyproj.transform(wgs84, lambert, lng_min, lat_min)
59
-x_max, y_max = pyproj.transform(wgs84, lambert, lng_max, lat_max)
60
-
61
-if x_min > x_max:
62
-    x_min, x_max = x_max, x_min
63
-if y_min > y_max:
64
-    y_min, y_max = y_max, y_min
65
-
66
-print(("Looking for data between map coordinates " +
67
-       "({0}, {1}) and ({2}, {3})").format(x_min, y_min, x_max, y_max))
68
-
69
-founds = []
70
-for f in os.listdir(data_folder):
71
-    if not(f.endswith(".asc")):
72
-        continue
73
-    file = os.path.join(data_folder, f)
74
-    with open(file, 'r') as fh:
75
-        ncols, nrows, xllcorner, yllcorner, cellsize = parse_header(fh)
76
-
77
-        if has_overlap([x_min, x_max],
78
-                       [xllcorner, xllcorner + cellsize * ncols]):
79
-            if has_overlap([y_min, y_max],
80
-                           [yllcorner, yllcorner + cellsize * nrows]):
81
-                founds.append(file)
82
-
83
-if len(founds) == 0:
84
-    print("No matching dataset found =(.")
85
-else:
86
-    print("Matching datasets:")
87
-    print("\n".join(founds))
88
-
89
-
90
-# Extract relevant parts from the datasets and output it as xyz values
91
-out = "X\tY\tZ\n"
92
-for f in founds:
93
-    with open(f, 'r') as fh:
94
-        ncols, nrows, xllcorner, yllcorner, cellsize = parse_header(fh)
95
-
96
-        col_min = math.floor((x_min - xllcorner) / cellsize)
97
-        col_max = math.ceil((x_max - xllcorner) / cellsize)
98
-        # The (0, 0) point is the lower left one, that is on the last line.
99
-        row_max = nrows - math.floor((y_min - yllcorner) / cellsize)
100
-        row_min = nrows - math.ceil((y_max - yllcorner) / cellsize)
101
-
102
-        i = 0
103
-        for line in fh.readlines():
104
-            if i >= row_min and i <= row_max:
105
-                row = [float(j) for j in line.strip().split(" ")]
106
-                for j in range(col_min, col_max):
107
-                    out += "{0}\t{1}\t{2}\n".format(xllcorner + j * cellsize,
108
-                                                    (yllcorner +
109
-                                                     (nrows - i) * cellsize),
110
-                                                    row[j])
111
-            i += 1
112
-
113
-out = out.strip()
114
-
115
-# Write it to out.xyz file
116
-with open(out_file, 'w') as fh:
117
-    fh.write(out)
118
-
119
-if len(sys.argv) < 7:
120
-    print("Found data (also exported to "+out_file+"):")
121
-    print(out)
122
-else:
123
-    print("Found data exported to "+out_file+".")
172
+
173
+    print(("Looking for data between latitudes and longitudes " +
174
+           "({0}, {1}) and ({2}, {3})").format(lat_min,
175
+                                               lng_min,
176
+                                               lat_max,
177
+                                               lng_max))
178
+
179
+    # Convert latitude / longitude coordinates to Lambert93 projection
180
+    x_min, y_min = latlng_to_lambert93(lat_min, lng_min)
181
+    x_max, y_max = latlng_to_lambert93(lat_max, lng_max)
182
+
183
+    if x_min > x_max:
184
+        x_min, x_max = x_max, x_min
185
+    if y_min > y_max:
186
+        y_min, y_max = y_max, y_min
187
+
188
+    print(("Looking for data between map coordinates " +
189
+           "({0}, {1}) and ({2}, {3})").format(x_min, y_min, x_max, y_max))
190
+
191
+    # Loop over all the available data files and find those with relevant points
192
+    founds = find_relevant_files(x_min, y_min, x_max, y_max, data_folder)
193
+
194
+    if len(founds) == 0:
195
+        print("\nNo matching dataset found =(.")
196
+        sys.exit(1)
197
+    else:
198
+        print("\nMatching datasets:")
199
+        print("\n".join(founds))
200
+
201
+    # Extract relevant parts from the datasets and append them to output
202
+    points = []  # Store relevant points as (X, Y, Z) tuples
203
+    for f in founds:
204
+        with open(f, 'r') as fh:
205
+            # We expect all of the files to have same cellsize parameter
206
+            ncols, nrows, xllcorner, yllcorner, cellsize, nodata = parse_header(fh)
207
+
208
+            # Fetch bounds in this file
209
+            col_min = math.floor((x_min - xllcorner) / cellsize)
210
+            col_max = math.ceil((x_max - xllcorner) / cellsize)
211
+            # The (0, 0) point is the lower left one, that is on the last line.
212
+            row_max = nrows - math.floor((y_min - yllcorner) / cellsize)
213
+            row_min = nrows - math.ceil((y_max - yllcorner) / cellsize)
214
+
215
+            i = 0
216
+            for line in fh.readlines():
217
+                if i >= row_min and i <= row_max:
218
+                    row = [float(j) for j in line.strip().split(" ")]
219
+                    for j in range(col_min, col_max):
220
+                        points.append((xllcorner + j * cellsize,
221
+                                       yllcorner + (nrows - i) * cellsize,
222
+                                       row[j]))
223
+                i += 1
224
+
225
+    # Get ARG formatted data
226
+    out, metadata = format_arg_out(points)
227
+
228
+    # Filter metadata according to our needs for config
229
+    metadata = {k.upper(): v for k, v in metadata.items() if k != "datatype"}
230
+    metadata["N_ROWS"] = metadata.pop("ROWS")
231
+    metadata["N_COLS"] = metadata.pop("COLS")
232
+
233
+    # Write raw file to out_file
234
+    with open(out_file, 'wb') as fh:
235
+        fh.write(out)
236
+
237
+    # Print metadata to stdout, as JSON
238
+    print("\n\nConfig file:")
239
+    print(json.dumps(metadata,
240
+                     sort_keys=True,
241
+                     indent=4,
242
+                     separators=(',', ': ')))