1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
- #!/usr/bin/env python
- import os
- import click
- from pathlib import Path
- import json
- import numpy as np
- from tqdm import tqdm
- from sklearn.metrics.pairwise import haversine_distances
- EARTH_RADIUS_KM = 6371.
- def load_config(configfile):
- with open(configfile, 'r') as f:
- return json.load(f)
- def extract_dataset(smokedata, firedata, dist_bins):
- densities = smokedata['densities']
- smoke_dates = smokedata['dates']
- smoke_fill = np.zeros(densities.shape[-1], dtype=int)
- smoke_dict = {
- date: d for date, d in zip(smoke_dates, densities)
- }
- site_lat = smokedata['latitudes']
- site_lng = smokedata['longitudes']
- LLsite = np.deg2rad(np.column_stack([site_lat, site_lng]))
- fire_lat = firedata['latitudes']
- fire_lng = firedata['longitudes']
- LLfire = np.deg2rad(np.column_stack([fire_lat, fire_lng]))
- days = firedata['dates'].astype('datetime64[D]')
- idx = np.argsort(days)
- days = days[idx]
- LLfire = LLfire[idx]
- day_number = days.astype(int)
- d_bins = np.arange(np.min(day_number), np.max(day_number) + 2)
- bin_days = d_bins[:-1].astype('datetime64[D]')
- labels = np.vstack([
- smoke_dict.get(d, smoke_fill)
- for d in bin_days
- ], dtype=int).T
- print('Computing distances...')
- D = EARTH_RADIUS_KM * haversine_distances(LLsite, LLfire)
- features = np.stack([
- np.histogram2d(
- day_number, Di,
- bins=[d_bins, dist_bins],
- )[0]
- for Di in tqdm(D, 'Computing Histograms')
- ], axis=0)
- # Take cumulative sum so features are total fire points within a given
- # radius
- features = np.cumsum(features, axis=2)
- return {
- 'names': smokedata['names'],
- 'latitudes': site_lat,
- 'longitudes': site_lng,
- 'density_categories': smokedata['density_categories'],
- 'densities': labels,
- 'dates': bin_days,
- 'distance_bins_km': dist_bins,
- 'features': features,
- }
- @click.command()
- @click.argument('smokefile', type=click.Path(
- path_type=Path, exists=True
- ))
- @click.argument('firefile', type=click.Path(
- path_type=Path, exists=True
- ))
- @click.argument('configfile', type=click.Path(
- path_type=Path, exists=True
- ))
- @click.argument('outputfile', type=click.Path(
- path_type=Path
- ))
- def main(smokefile, firefile, configfile, outputfile):
- print('Loading data...')
- smokedata = np.load(smokefile)
- firedata = np.load(firefile)
- config = load_config(configfile)
- dist_bins = config['distance_bins_km']
- print('Extracting dataset...')
- result = extract_dataset(smokedata, firedata, dist_bins)
- np.savez_compressed(outputfile, **result)
- if __name__ == '__main__':
- main()
|