1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
- #!/usr/bin/env python
- import os
- import click
- from pathlib import Path
- import json
- import numpy as np
- from tqdm import tqdm
- import shapefile as shp
- from zipfile import ZipFile
- from datetime import datetime as dt
- from shapely.geometry import Polygon, Point
- # Quiet warnings about incorrect polygon orientation
- shp.VERBOSE = False
- SMOKE_EXT = '.zip'
- DENSITY_ORDERING = (
- 'None',
- 'NA',
- 'Light',
- 'Medium',
- 'Heavy',
- )
- def read_polygons(shapefile):
- basename = os.path.splitext(os.path.basename(shapefile))[0]
- date = dt.strptime(basename, 'hms_smoke%Y%m%d')
- polygons = []
- with ZipFile(shapefile, 'r') as zfile:
- with zfile.open(basename + '.shp') as sfile:
- with zfile.open(basename + '.dbf') as dfile:
- with zfile.open(basename + '.shx') as xfile:
- reader = shp.Reader(shp=sfile, shx=xfile, dbf=dfile)
- for poly in reader.shapeRecords():
- info = poly.__geo_interface__
- density = info['properties']['Density']
- if density not in DENSITY_ORDERING:
- print(f'Warning: unknown density {density}')
- assert info['geometry']['type'] == 'Polygon'
- coords = info['geometry']['coordinates']
- assert len(coords) == 1
- polygons.append((Polygon(coords[0]), density))
- return date, polygons
- def load_smoke_points(smokepointsfile):
- with open(smokepointsfile, 'r') as f:
- data = json.load(f)
- get = lambda f: np.array([d[f] for d in data])
- names = get('name')
- latitudes = get('latitude')
- longitudes = get('longitude')
- points = [Point(lon, lat) for lon, lat in zip(longitudes, latitudes)]
- return names, latitudes, longitudes, points
- def get_densities(points, polygons):
- densities = []
- for point in points:
- density_idx = 0
- for polygon, density in polygons:
- if polygon.contains(point):
- didx = DENSITY_ORDERING.index(density)
- if didx > density_idx:
- density_idx = didx
- densities.append(density_idx)
- return densities
- @click.command()
- @click.argument('smokedir', type=click.Path(
- path_type=Path, exists=True
- ))
- @click.argument('smokepoints', type=click.Path(
- path_type=Path, exists=True
- ))
- @click.argument('outputfile', type=click.Path(
- path_type=Path
- ))
- def main(smokedir, smokepoints, outputfile):
- names, lats, lons, points = load_smoke_points(smokepoints)
- smoke_files = []
- for d, dirs, files in os.walk(smokedir):
- for f in files:
- if f.endswith(SMOKE_EXT):
- smoke_files.append(os.path.join(d, f))
- dates = []
- densities = []
- for sm in tqdm(sorted(smoke_files), 'Reading polygons'):
- try:
- date, polygons = read_polygons(sm)
- except IndexError as e:
- print(f'Warning: error parsing {sm}')
- continue
- dates.append(date)
- densities.append(get_densities(points, polygons))
- dates = np.array(dates, dtype='datetime64[D]')
- densities = np.array(densities, dtype=int)
- result = {
- 'names': names,
- 'latitudes': lats,
- 'longitudes': lons,
- 'dates': dates,
- 'densities': densities,
- 'density_categories': np.array(DENSITY_ORDERING),
- }
- np.savez_compressed(outputfile, **result)
- if __name__ == '__main__':
- main()
|