Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

extract_timeseries.py 2.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
  1. #!/usr/bin/env python
  2. import os
  3. import click
  4. from pathlib import Path
  5. import json
  6. import numpy as np
  7. from tqdm import tqdm
  8. from sklearn.metrics.pairwise import haversine_distances
  9. EARTH_RADIUS_KM = 6371.
  10. def load_config(configfile):
  11. with open(configfile, 'r') as f:
  12. return json.load(f)
  13. def extract_dataset(smokedata, firedata, dist_bins):
  14. densities = smokedata['densities']
  15. smoke_dates = smokedata['dates']
  16. smoke_fill = np.zeros(densities.shape[-1], dtype=int)
  17. smoke_dict = {
  18. date: d for date, d in zip(smoke_dates, densities)
  19. }
  20. site_lat = smokedata['latitudes']
  21. site_lng = smokedata['longitudes']
  22. LLsite = np.deg2rad(np.column_stack([site_lat, site_lng]))
  23. fire_lat = firedata['latitudes']
  24. fire_lng = firedata['longitudes']
  25. LLfire = np.deg2rad(np.column_stack([fire_lat, fire_lng]))
  26. days = firedata['dates'].astype('datetime64[D]')
  27. idx = np.argsort(days)
  28. days = days[idx]
  29. LLfire = LLfire[idx]
  30. day_number = days.astype(int)
  31. d_bins = np.arange(np.min(day_number), np.max(day_number) + 2)
  32. bin_days = d_bins[:-1].astype('datetime64[D]')
  33. labels = np.vstack([
  34. smoke_dict.get(d, smoke_fill)
  35. for d in bin_days
  36. ], dtype=int).T
  37. print('Computing distances...')
  38. D = EARTH_RADIUS_KM * haversine_distances(LLsite, LLfire)
  39. features = np.stack([
  40. np.histogram2d(
  41. day_number, Di,
  42. bins=[d_bins, dist_bins],
  43. )[0]
  44. for Di in tqdm(D, 'Computing Histograms')
  45. ], axis=0)
  46. # Take cumulative sum so features are total fire points within a given
  47. # radius
  48. features = np.cumsum(features, axis=2)
  49. return {
  50. 'names': smokedata['names'],
  51. 'latitudes': site_lat,
  52. 'longitudes': site_lng,
  53. 'density_categories': smokedata['density_categories'],
  54. 'densities': labels,
  55. 'dates': bin_days,
  56. 'distance_bins_km': dist_bins,
  57. 'features': features,
  58. }
  59. @click.command()
  60. @click.argument('smokefile', type=click.Path(
  61. path_type=Path, exists=True
  62. ))
  63. @click.argument('firefile', type=click.Path(
  64. path_type=Path, exists=True
  65. ))
  66. @click.argument('configfile', type=click.Path(
  67. path_type=Path, exists=True
  68. ))
  69. @click.argument('outputfile', type=click.Path(
  70. path_type=Path
  71. ))
  72. def main(smokefile, firefile, configfile, outputfile):
  73. print('Loading data...')
  74. smokedata = np.load(smokefile)
  75. firedata = np.load(firefile)
  76. config = load_config(configfile)
  77. dist_bins = config['distance_bins_km']
  78. print('Extracting dataset...')
  79. result = extract_dataset(smokedata, firedata, dist_bins)
  80. np.savez_compressed(outputfile, **result)
  81. if __name__ == '__main__':
  82. main()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...