[4891] | 1 | from netCDF4 import Dataset |
---|
| 2 | from argparse import ArgumentParser |
---|
| 3 | import numpy as np |
---|
| 4 | import sys |
---|
| 5 | |
---|
| 6 | # |
---|
| 7 | # Basic iceberg trajectory post-processing python script. |
---|
| 8 | # This script collates iceberg trajectories from the distributed datasets written |
---|
| 9 | # out by each processing region and rearranges the ragged arrays into contiguous |
---|
| 10 | # streams for each unique iceberg. The output arrays are 2D (ntraj, ntimes) arrays. |
---|
| 11 | # Note that some icebergs may only exist for a subset of the possible times. In these |
---|
| 12 | # cases the missing instances are filled with invalid (NaN) values. |
---|
| 13 | # |
---|
[8509] | 14 | # Version 2.0 August 2017. Adapted to process all variables and retain original |
---|
| 15 | # datatypes. (acc@noc.ac.uk) |
---|
[4891] | 16 | |
---|
[8509] | 17 | parser = ArgumentParser(description='produce collated trajectory file \ |
---|
| 18 | from distributed output files, e.g. \ |
---|
| 19 | \n python ./icb_pp.py \ |
---|
| 20 | -t trajectory_icebergs_004248_ \ |
---|
| 21 | -n 296 -o trajsout.nc' ) |
---|
[4891] | 22 | |
---|
[8509] | 23 | parser.add_argument('-t',dest='froot', |
---|
| 24 | help='fileroot_of_distrbuted_data; root name \ |
---|
| 25 | of distributed trajectory output (usually \ |
---|
| 26 | completed with XXXX.nc, where XXXX is the \ |
---|
| 27 | 4 digit processor number)', |
---|
| 28 | default='trajectory_icebergs_004248_') |
---|
[4891] | 29 | |
---|
| 30 | parser.add_argument('-n',dest='fnum',help='number of distributed files to process', |
---|
[8509] | 31 | type=int, default=None) |
---|
[4891] | 32 | |
---|
[8509] | 33 | parser.add_argument('-o',dest='fout', |
---|
| 34 | help='collated_output_file; file name to receive \ |
---|
| 35 | the collated trajectory data', default='trajsout.nc') |
---|
[4891] | 36 | |
---|
| 37 | args = parser.parse_args() |
---|
| 38 | |
---|
| 39 | default_used = 0 |
---|
| 40 | if args.froot is None: |
---|
| 41 | pathstart = 'trajectory_icebergs_004248_' |
---|
| 42 | default_used = 1 |
---|
| 43 | else: |
---|
| 44 | pathstart = args.froot |
---|
| 45 | |
---|
| 46 | if args.fnum is None: |
---|
| 47 | procnum = 0 |
---|
| 48 | default_used = 1 |
---|
| 49 | else: |
---|
| 50 | procnum = args.fnum |
---|
| 51 | |
---|
| 52 | if args.fout is None: |
---|
| 53 | pathout = 'trajsout.nc' |
---|
| 54 | default_used = 1 |
---|
| 55 | else: |
---|
| 56 | pathout = args.fout |
---|
| 57 | |
---|
| 58 | if default_used == 1: |
---|
| 59 | print('At least one default value will be used; command executing is:') |
---|
| 60 | print('icb_pp.py -t ',pathstart,' -n ',procnum,' -o ',pathout) |
---|
| 61 | |
---|
| 62 | if procnum < 1: |
---|
| 63 | print('Need some files to collate! procnum = ',procnum) |
---|
[6423] | 64 | sys.exit(11) |
---|
[4891] | 65 | |
---|
| 66 | icu = [] |
---|
| 67 | times = [] |
---|
| 68 | # |
---|
| 69 | # Loop through all distributed datasets to obtain the complete list |
---|
| 70 | # of iceberg identification numbers and timesteps |
---|
| 71 | # |
---|
| 72 | for n in range(procnum): |
---|
[8509] | 73 | nn = '%4.4d' % n |
---|
| 74 | fw = Dataset(pathstart+nn+'.nc') |
---|
| 75 | # keep a list of the variables in the first dataset |
---|
| 76 | if n == 0: |
---|
| 77 | varlist = fw.variables |
---|
| 78 | # |
---|
| 79 | # skip any files with no icebergs |
---|
| 80 | if len(fw.dimensions['n']) > 0: |
---|
| 81 | print pathstart+nn+'.nc' |
---|
| 82 | ic = fw.variables['iceberg_number'][:,0] |
---|
| 83 | ts = fw.variables['timestep'][:] |
---|
| 84 | icv = np.unique(ic) |
---|
| 85 | ts = np.unique(ts) |
---|
| 86 | print('Min Max ts: ',ts.min(), ts.max()) |
---|
| 87 | print('Number unique icebergs= ',icv.shape[0]) |
---|
| 88 | icu.append(icv) |
---|
| 89 | times.append(ts) |
---|
| 90 | fw.close() |
---|
[4891] | 91 | # |
---|
| 92 | # Now flatten the lists and reduce to the unique spanning set |
---|
| 93 | # |
---|
| 94 | icu = np.concatenate(icu) |
---|
| 95 | icu = np.unique(icu) |
---|
| 96 | times = np.concatenate(times) |
---|
| 97 | times = np.unique(times) |
---|
| 98 | ntraj = icu.shape[0] |
---|
| 99 | print(ntraj, ' unique icebergs found across all datasets') |
---|
| 100 | print('Icebergs ids range from: ',icu.min(), 'to: ',icu.max()) |
---|
| 101 | print('times range from: ',times.min(), 'to: ', times.max()) |
---|
| 102 | # |
---|
[8509] | 103 | # Declare array to receive data from all files |
---|
[4891] | 104 | # |
---|
| 105 | nt = times.shape[0] |
---|
| 106 | # |
---|
[8509] | 107 | n=0 |
---|
| 108 | for key, value in varlist.iteritems() : |
---|
| 109 | if key != "iceberg_number" : |
---|
| 110 | n = n + 1 |
---|
| 111 | inarr = np.zeros((n, ntraj, nt)) |
---|
| 112 | # |
---|
[4891] | 113 | # initially fill with invalid data |
---|
| 114 | # |
---|
[8509] | 115 | inarr.fill(np.nan) |
---|
[4891] | 116 | # |
---|
[8509] | 117 | # Declare some lists to store variable names, types and long_name and units attributes |
---|
| 118 | # iceberg_number gets special treatment |
---|
| 119 | innam = [] |
---|
| 120 | intyp = [] |
---|
| 121 | inlngnam = [] |
---|
| 122 | inunits = [] |
---|
| 123 | for key, value in varlist.iteritems() : |
---|
| 124 | if key != "iceberg_number" : |
---|
| 125 | innam.append(key) |
---|
| 126 | # |
---|
| 127 | # reopen the first datset to collect variable attributes |
---|
| 128 | # (long_name and units only) |
---|
| 129 | # |
---|
| 130 | nn = '%4.4d' % 0 |
---|
| 131 | fw = Dataset(pathstart+nn+'.nc') |
---|
| 132 | for key, value in varlist.iteritems() : |
---|
| 133 | if key != "iceberg_number" : |
---|
| 134 | intyp.append(fw.variables[key].dtype) |
---|
| 135 | inlngnam.append(fw.variables[key].getncattr('long_name')) |
---|
| 136 | inunits.append(fw.variables[key].getncattr('units')) |
---|
| 137 | fw.close() |
---|
| 138 | # |
---|
[4891] | 139 | # loop through distributed datasets again, this time |
---|
| 140 | # checking indices against icu and times lists and |
---|
| 141 | # inserting data into the correct locations in the |
---|
[8509] | 142 | # collated sets. |
---|
[4891] | 143 | # |
---|
| 144 | for n in range(procnum): |
---|
[8509] | 145 | nn = '%4.4d' % n |
---|
| 146 | fw = Dataset(pathstart+nn+'.nc') |
---|
| 147 | # |
---|
[4891] | 148 | # Note many distributed datafiles will contain no iceberg data |
---|
| 149 | # so skip quickly over these |
---|
[8509] | 150 | m = len(fw.dimensions['n']) |
---|
| 151 | if m > 0: |
---|
| 152 | inx = np.zeros(m, dtype=int) |
---|
| 153 | tsx = np.zeros(m, dtype=int) |
---|
| 154 | #print pathstart+nn+'.nc' |
---|
| 155 | ic = fw.variables['iceberg_number'][:,0] |
---|
| 156 | ts = fw.variables['timestep'][:] |
---|
| 157 | for k in range(m): |
---|
| 158 | inxx = np.where(icu == ic[k]) |
---|
| 159 | inx[k] = inxx[0] |
---|
| 160 | for k in range(m): |
---|
| 161 | inxx = np.where(times == ts[k]) |
---|
| 162 | tsx[k] = inxx[0] |
---|
| 163 | n = 0 |
---|
| 164 | for key, value in varlist.iteritems() : |
---|
| 165 | if key != "iceberg_number" : |
---|
| 166 | insmall = fw.variables[innam[n]][:] |
---|
| 167 | inarr[n,inx[:],tsx[:]] = insmall[:] |
---|
| 168 | n = n + 1 |
---|
| 169 | fw.close() |
---|
| 170 | # |
---|
[4891] | 171 | # Finally create the output file and write out the collated sets |
---|
| 172 | # |
---|
[8509] | 173 | fo = Dataset(pathout, 'w', format='NETCDF4_CLASSIC') |
---|
[4891] | 174 | ntrj = fo.createDimension('ntraj', ntraj) |
---|
| 175 | nti = fo.createDimension('ntime', None) |
---|
[8509] | 176 | icbn = fo.createVariable('iceberg_number', 'i4',('ntraj')) |
---|
[4891] | 177 | icbn[:] = icu |
---|
[8509] | 178 | n = 0 |
---|
| 179 | for key, value in varlist.iteritems() : |
---|
| 180 | if key != "iceberg_number" : |
---|
| 181 | oout = fo.createVariable(innam[n], intyp[n], ('ntraj','ntime'), |
---|
| 182 | zlib=True, complevel=1, chunksizes=(1,nt)) |
---|
| 183 | oout[:,:] = inarr[n,:,:] |
---|
| 184 | oout.long_name = inlngnam[n] |
---|
| 185 | oout.units = inunits[n] |
---|
| 186 | n = n + 1 |
---|
[4891] | 187 | fo.close() |
---|