1 | from netCDF4 import Dataset |
---|
2 | from argparse import ArgumentParser |
---|
3 | import numpy as np |
---|
4 | import sys |
---|
5 | |
---|
6 | # |
---|
7 | # Basic iceberg trajectory post-processing python script. |
---|
8 | # This script collates iceberg trajectories from the distributed datasets written |
---|
9 | # out by each processing region and rearranges the ragged arrays into contiguous |
---|
10 | # streams for each unique iceberg. The output arrays are 2D (ntraj, ntimes) arrays. |
---|
11 | # Note that some icebergs may only exist for a subset of the possible times. In these |
---|
12 | # cases the missing instances are filled with invalid (NaN) values. |
---|
13 | # |
---|
14 | # Version 2.0 August 2017. Adapted to process all variables and retain original |
---|
15 | # datatypes. (acc@noc.ac.uk) |
---|
16 | |
---|
17 | parser = ArgumentParser(description='produce collated trajectory file \ |
---|
18 | from distributed output files, e.g. \ |
---|
19 | \n python ./icb_pp.py \ |
---|
20 | -t trajectory_icebergs_004248_ \ |
---|
21 | -n 296 -o trajsout.nc' ) |
---|
22 | |
---|
23 | parser.add_argument('-t',dest='froot', |
---|
24 | help='fileroot_of_distrbuted_data; root name \ |
---|
25 | of distributed trajectory output (usually \ |
---|
26 | completed with XXXX.nc, where XXXX is the \ |
---|
27 | 4 digit processor number)', |
---|
28 | default='trajectory_icebergs_004248_') |
---|
29 | |
---|
30 | parser.add_argument('-n',dest='fnum',help='number of distributed files to process', |
---|
31 | type=int, default=None) |
---|
32 | |
---|
33 | parser.add_argument('-o',dest='fout', |
---|
34 | help='collated_output_file; file name to receive \ |
---|
35 | the collated trajectory data', default='trajsout.nc') |
---|
36 | |
---|
37 | args = parser.parse_args() |
---|
38 | |
---|
39 | default_used = 0 |
---|
40 | if args.froot is None: |
---|
41 | pathstart = 'trajectory_icebergs_004248_' |
---|
42 | default_used = 1 |
---|
43 | else: |
---|
44 | pathstart = args.froot |
---|
45 | |
---|
46 | if args.fnum is None: |
---|
47 | procnum = 0 |
---|
48 | default_used = 1 |
---|
49 | else: |
---|
50 | procnum = args.fnum |
---|
51 | |
---|
52 | if args.fout is None: |
---|
53 | pathout = 'trajsout.nc' |
---|
54 | default_used = 1 |
---|
55 | else: |
---|
56 | pathout = args.fout |
---|
57 | |
---|
58 | if default_used == 1: |
---|
59 | print('At least one default value will be used; command executing is:') |
---|
60 | print('icb_pp.py -t ',pathstart,' -n ',procnum,' -o ',pathout) |
---|
61 | |
---|
62 | if procnum < 1: |
---|
63 | print('Need some files to collate! procnum = ',procnum) |
---|
64 | sys.exit(11) |
---|
65 | |
---|
66 | icu = [] |
---|
67 | times = [] |
---|
68 | # |
---|
69 | # Loop through all distributed datasets to obtain the complete list |
---|
70 | # of iceberg identification numbers and timesteps |
---|
71 | # |
---|
72 | for n in range(procnum): |
---|
73 | nn = '%4.4d' % n |
---|
74 | fw = Dataset(pathstart+nn+'.nc') |
---|
75 | # keep a list of the variables in the first dataset |
---|
76 | if n == 0: |
---|
77 | varlist = fw.variables |
---|
78 | # |
---|
79 | # skip any files with no icebergs |
---|
80 | if len(fw.dimensions['n']) > 0: |
---|
81 | print pathstart+nn+'.nc' |
---|
82 | ic = fw.variables['iceberg_number'][:,0] |
---|
83 | ts = fw.variables['timestep'][:] |
---|
84 | icv = np.unique(ic) |
---|
85 | ts = np.unique(ts) |
---|
86 | print('Min Max ts: ',ts.min(), ts.max()) |
---|
87 | print('Number unique icebergs= ',icv.shape[0]) |
---|
88 | icu.append(icv) |
---|
89 | times.append(ts) |
---|
90 | fw.close() |
---|
91 | # |
---|
92 | # Now flatten the lists and reduce to the unique spanning set |
---|
93 | # |
---|
94 | try: |
---|
95 | icu = np.concatenate(icu) |
---|
96 | except ValueError: |
---|
97 | # No icebergs: create an empty output file. |
---|
98 | print 'No icebergs in the model.' |
---|
99 | fw = Dataset(pathstart+'0000.nc') |
---|
100 | fo = Dataset(pathout, 'w', format='NETCDF4_CLASSIC') |
---|
101 | ntrj = fo.createDimension('ntraj', None) |
---|
102 | icbn = fo.createVariable('iceberg_number', 'i4',('ntraj')) |
---|
103 | n = 0 |
---|
104 | for key, value in varlist.iteritems() : |
---|
105 | if key != "iceberg_number" : |
---|
106 | print 'key is ',key |
---|
107 | oout = fo.createVariable(key, value.dtype, ('ntraj'), |
---|
108 | zlib=True, complevel=1) |
---|
109 | oout.long_name = fw.variables[key].getncattr('long_name') |
---|
110 | oout.units = fw.variables[key].getncattr('units') |
---|
111 | n = n + 1 |
---|
112 | fw.close() |
---|
113 | fo.close() |
---|
114 | sys.exit() |
---|
115 | |
---|
116 | icu = np.unique(icu) |
---|
117 | times = np.concatenate(times) |
---|
118 | times = np.unique(times) |
---|
119 | ntraj = icu.shape[0] |
---|
120 | print(ntraj, ' unique icebergs found across all datasets') |
---|
121 | print('Icebergs ids range from: ',icu.min(), 'to: ',icu.max()) |
---|
122 | print('times range from: ',times.min(), 'to: ', times.max()) |
---|
123 | # |
---|
124 | # Declare array to receive data from all files |
---|
125 | # |
---|
126 | nt = times.shape[0] |
---|
127 | # |
---|
128 | n=0 |
---|
129 | for key, value in varlist.iteritems() : |
---|
130 | if key != "iceberg_number" : |
---|
131 | n = n + 1 |
---|
132 | inarr = np.zeros((n, ntraj, nt)) |
---|
133 | # |
---|
134 | # initially fill with invalid data |
---|
135 | # |
---|
136 | inarr.fill(np.nan) |
---|
137 | # |
---|
138 | # Declare some lists to store variable names, types and long_name and units attributes |
---|
139 | # iceberg_number gets special treatment |
---|
140 | innam = [] |
---|
141 | intyp = [] |
---|
142 | inlngnam = [] |
---|
143 | inunits = [] |
---|
144 | for key, value in varlist.iteritems() : |
---|
145 | if key != "iceberg_number" : |
---|
146 | innam.append(key) |
---|
147 | # |
---|
148 | # reopen the first datset to collect variable attributes |
---|
149 | # (long_name and units only) |
---|
150 | # |
---|
151 | nn = '%4.4d' % 0 |
---|
152 | fw = Dataset(pathstart+nn+'.nc') |
---|
153 | for key, value in varlist.iteritems() : |
---|
154 | if key != "iceberg_number" : |
---|
155 | intyp.append(fw.variables[key].dtype) |
---|
156 | inlngnam.append(fw.variables[key].getncattr('long_name')) |
---|
157 | inunits.append(fw.variables[key].getncattr('units')) |
---|
158 | fw.close() |
---|
159 | # |
---|
160 | # loop through distributed datasets again, this time |
---|
161 | # checking indices against icu and times lists and |
---|
162 | # inserting data into the correct locations in the |
---|
163 | # collated sets. |
---|
164 | # |
---|
165 | for n in range(procnum): |
---|
166 | nn = '%4.4d' % n |
---|
167 | fw = Dataset(pathstart+nn+'.nc') |
---|
168 | # |
---|
169 | # Note many distributed datafiles will contain no iceberg data |
---|
170 | # so skip quickly over these |
---|
171 | m = len(fw.dimensions['n']) |
---|
172 | if m > 0: |
---|
173 | inx = np.zeros(m, dtype=int) |
---|
174 | tsx = np.zeros(m, dtype=int) |
---|
175 | #print pathstart+nn+'.nc' |
---|
176 | ic = fw.variables['iceberg_number'][:,0] |
---|
177 | ts = fw.variables['timestep'][:] |
---|
178 | for k in range(m): |
---|
179 | inxx = np.where(icu == ic[k]) |
---|
180 | inx[k] = inxx[0] |
---|
181 | for k in range(m): |
---|
182 | inxx = np.where(times == ts[k]) |
---|
183 | tsx[k] = inxx[0] |
---|
184 | n = 0 |
---|
185 | for key, value in varlist.iteritems() : |
---|
186 | if key != "iceberg_number" : |
---|
187 | insmall = fw.variables[innam[n]][:] |
---|
188 | inarr[n,inx[:],tsx[:]] = insmall[:] |
---|
189 | n = n + 1 |
---|
190 | fw.close() |
---|
191 | # |
---|
192 | # Finally create the output file and write out the collated sets |
---|
193 | # |
---|
194 | fo = Dataset(pathout, 'w', format='NETCDF4_CLASSIC') |
---|
195 | ntrj = fo.createDimension('ntraj', ntraj) |
---|
196 | nti = fo.createDimension('ntime', None) |
---|
197 | icbn = fo.createVariable('iceberg_number', 'i4',('ntraj')) |
---|
198 | icbn[:] = icu |
---|
199 | n = 0 |
---|
200 | for key, value in varlist.iteritems() : |
---|
201 | if key != "iceberg_number" : |
---|
202 | oout = fo.createVariable(innam[n], intyp[n], ('ntraj','ntime'), |
---|
203 | zlib=True, complevel=1, chunksizes=(1,nt)) |
---|
204 | oout[:,:] = inarr[n,:,:] |
---|
205 | oout.long_name = inlngnam[n] |
---|
206 | oout.units = inunits[n] |
---|
207 | n = n + 1 |
---|
208 | fo.close() |
---|