1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
- import argparse
- from functools import partial, reduce
- from pathlib import Path
- from joblib import delayed, Parallel
- import numpy as np
- import pandas as pd
- import rioxarray
- from tqdm import tqdm
- classes = [0, 1, 2]
- WORKERS = 16
- def process_tile(tile, *, year):
- with rioxarray.open_rasterio(tile, chunks=(1, 512, 512)).squeeze(drop=True) as ds:
- # TODO: we only consider deadtrees (not potential classes of them)
- # ds = ds.clip(max=1)
- unique, counts = np.unique(ds.values, return_counts=True)
- row_data = dict(zip([f"cl_{int(x)}" for x in unique], counts))
- for c in classes:
- if f"cl_{c}" not in row_data:
- row_data[f"cl_{c}"] = 0
- row_data["total"] = int(ds.count().compute())
- row_data["tile"] = tile.stem.replace(f"ortho_ms_{year}_EPSG3044_", "")
- return row_data
- def main():
- parser = argparse.ArgumentParser()
- parser.add_argument("datapath", type=Path, nargs="+")
- args = parser.parse_args()
- dfs = []
- years = [2017, 2018, 2019, 2020]
- for year in years:
- inpath = None
- for dpath in args.datapath:
- if f"predicted.{year}" in str(dpath):
- inpath = dpath
- if not inpath:
- raise NotImplementedError
- print(f"Processing year: {year}...")
- tiles = sorted(inpath.glob("*.tif"))
- results = Parallel(n_jobs=WORKERS)(
- delayed(partial(process_tile, year=year))(x) for x in tqdm(tiles)
- )
- df = pd.DataFrame(results)
- df["deadarea_m2"] = (
- (df["cl_1"] + df["cl_2"]) * 0.200022269188281 * 0.200022454940277
- ).round(1)
- dfs.append(df)
- # add suffixes to each df (but remove if again form the join column <tile>)
- dfs = [df.add_suffix(f"_{s}") for df, s in zip(dfs, years)]
- dfs = [df.rename(columns={f"tile_{s}": "tile"}) for df, s in zip(dfs, years)]
- dfall = reduce(lambda x, y: pd.merge(x, y, on=["tile"], how="outer"), dfs)
- # cleanup, drop all but one columns total and move tile, total cols to front
- dfall = dfall.rename(columns={f"total_{years[0]}": "total"})
- dfall = dfall[dfall.columns.drop(list(dfall.filter(regex="total_")))]
- colnames = list(dfall)
- colnames.insert(0, colnames.pop(colnames.index("total")))
- colnames.insert(0, colnames.pop(colnames.index("tile")))
- dfall = dfall.loc[:, colnames].convert_dtypes()
- dfall.to_csv(args.datapath[0].parent / "predicted.stats.csv", index=False)
- # merge
- if __name__ == "__main__":
- main()
|