1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
- """
- Utilities for working with GEOS-FP data
- Based on GEOS-CF fetching code here by Kevin Marlis:
- https://github.jpl.nasa.gov/aqacf/aqacf-geoscf
- """
- import os
- import json
- import click
- import xarray as xr
- from tqdm import tqdm
- from datetime import datetime, timedelta
- from tempfile import TemporaryDirectory
- import boto3
- client = boto3.client(service_name='s3')
- def daterange(start_date, end_date):
- for n in range(int((end_date - start_date).days)):
- yield start_date + timedelta(n)
- def build_url(date: datetime.date, time: str, dataset: str) -> str:
- base_url = 'https://portal.nccs.nasa.gov/datashare/gmao/geos-fp/das'
- date_url = f'{base_url}/Y{date.year}/M{str(date.month).zfill(2)}/D{str(date.day).zfill(2)}'
- filename = f'GEOS.fp.asm.{dataset}.{date.strftime("%Y%m%d")}_{time}.V01.nc4'
- file_url = f'{date_url}/{filename}'
- return file_url, filename
- def build_outfile(date: datetime.date, dataset: str) -> str:
- return f'GEOS.fp.asm.{dataset}.{date.strftime("%Y%m%d")}.V01.SUB.nc4'
- def fetch_daily_results(date: datetime.date, times: list[str],
- outdir: str, dataset: str, variables: list[str]):
- urls = [
- build_url(date, time, dataset=dataset)
- for time in times
- ]
- output = os.path.join(
- outdir,
- build_outfile(date, dataset=dataset)
- )
- tempfiles = []
- with TemporaryDirectory() as tdir:
- for url, filename in tqdm(urls, f'Fetching {date.strftime("%Y%m%d")}'):
- tempfile = os.path.join(tdir, filename)
- with xr.open_dataset(f'{url}#mode=bytes') as d:
- d[variables].to_netcdf(tempfile)
- tempfiles.append(tempfile)
- with xr.open_mfdataset(tempfiles, join='override') as ds:
- ds.to_netcdf(output)
- response = client.upload_file(output, 'geos-fp-aer', build_outfile(date, dataset=dataset))
- print(response)
- os.remove(output)
- @click.command()
- @click.argument('configfile')
- @click.argument('outputdir')
- @click.option('-i', '--index', default=0, type=int)
- @click.option('-j', '--jobs', default=1, type=int)
- def main(configfile, outputdir, index, jobs):
- fetch(configfile, outputdir, index, jobs)
- def fetch(configfile, outputdir, index, jobs):
- with open(configfile, 'r') as f:
- config = json.load(f)
- start = datetime.strptime(config['start'], '%Y-%m-%d')
- end = datetime.strptime(config['end'], '%Y-%m-%d')
- drange = [
- d for i, d in enumerate(daterange(start, end))
- if (i % jobs) == index
- ]
- tstart = config['time_start']
- tint = config['time_interval']
- times = [f'{t:04d}' for t in range(tstart, 2400, tint)]
- dataset = config['dataset']
- dates = []
- for date in drange:
- outfile = os.path.join(
- outputdir, build_outfile(date, dataset=dataset)
- )
- if os.path.exists(outfile): continue
- dates.append(date)
- for date in tqdm(dates, 'Fetching Data'):
- fetch_daily_results(
- date, times, outputdir,
- dataset=config['dataset'],
- variables=config['variables'],
- )
- if __name__ == '__main__':
- main()
|