Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

geos_fp.py 3.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  1. """
  2. Utilities for working with GEOS-FP data
  3. Based on GEOS-CF fetching code here by Kevin Marlis:
  4. https://github.jpl.nasa.gov/aqacf/aqacf-geoscf
  5. """
  6. import os
  7. import json
  8. import click
  9. import xarray as xr
  10. from tqdm import tqdm
  11. from datetime import datetime, timedelta
  12. from tempfile import TemporaryDirectory
  13. import boto3
  14. client = boto3.client(service_name='s3')
  15. def daterange(start_date, end_date):
  16. for n in range(int((end_date - start_date).days)):
  17. yield start_date + timedelta(n)
  18. def build_url(date: datetime.date, time: str, dataset: str) -> str:
  19. base_url = 'https://portal.nccs.nasa.gov/datashare/gmao/geos-fp/das'
  20. date_url = f'{base_url}/Y{date.year}/M{str(date.month).zfill(2)}/D{str(date.day).zfill(2)}'
  21. filename = f'GEOS.fp.asm.{dataset}.{date.strftime("%Y%m%d")}_{time}.V01.nc4'
  22. file_url = f'{date_url}/{filename}'
  23. return file_url, filename
  24. def build_outfile(date: datetime.date, dataset: str) -> str:
  25. return f'GEOS.fp.asm.{dataset}.{date.strftime("%Y%m%d")}.V01.SUB.nc4'
  26. def fetch_daily_results(date: datetime.date, times: list[str],
  27. outdir: str, dataset: str, variables: list[str]):
  28. urls = [
  29. build_url(date, time, dataset=dataset)
  30. for time in times
  31. ]
  32. output = os.path.join(
  33. outdir,
  34. build_outfile(date, dataset=dataset)
  35. )
  36. tempfiles = []
  37. with TemporaryDirectory() as tdir:
  38. for url, filename in tqdm(urls, f'Fetching {date.strftime("%Y%m%d")}'):
  39. tempfile = os.path.join(tdir, filename)
  40. with xr.open_dataset(f'{url}#mode=bytes') as d:
  41. d[variables].to_netcdf(tempfile)
  42. tempfiles.append(tempfile)
  43. with xr.open_mfdataset(tempfiles, join='override') as ds:
  44. ds.to_netcdf(output)
  45. response = client.upload_file(output, 'geos-fp-aer', build_outfile(date, dataset=dataset))
  46. print(response)
  47. os.remove(output)
  48. @click.command()
  49. @click.argument('configfile')
  50. @click.argument('outputdir')
  51. @click.option('-i', '--index', default=0, type=int)
  52. @click.option('-j', '--jobs', default=1, type=int)
  53. def main(configfile, outputdir, index, jobs):
  54. fetch(configfile, outputdir, index, jobs)
  55. def fetch(configfile, outputdir, index, jobs):
  56. with open(configfile, 'r') as f:
  57. config = json.load(f)
  58. start = datetime.strptime(config['start'], '%Y-%m-%d')
  59. end = datetime.strptime(config['end'], '%Y-%m-%d')
  60. drange = [
  61. d for i, d in enumerate(daterange(start, end))
  62. if (i % jobs) == index
  63. ]
  64. tstart = config['time_start']
  65. tint = config['time_interval']
  66. times = [f'{t:04d}' for t in range(tstart, 2400, tint)]
  67. dataset = config['dataset']
  68. dates = []
  69. for date in drange:
  70. outfile = os.path.join(
  71. outputdir, build_outfile(date, dataset=dataset)
  72. )
  73. if os.path.exists(outfile): continue
  74. dates.append(date)
  75. for date in tqdm(dates, 'Fetching Data'):
  76. fetch_daily_results(
  77. date, times, outputdir,
  78. dataset=config['dataset'],
  79. variables=config['variables'],
  80. )
  81. if __name__ == '__main__':
  82. main()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...