Context Navigation

icb_pp.py

Last change on this file was 9325, checked in by davestorkey, 6 years ago
Small fix to TOOLS/MISCELLANEOUS/icb_pp.py. Ticket #2008.
Property svn:executable set to ``*
File size: 6.7 KB

Line
1	from netCDF4 import Dataset
2	from argparse import ArgumentParser
3	import numpy as np
4	import sys
5
6	#
7	# Basic iceberg trajectory post-processing python script.
8	# This script collates iceberg trajectories from the distributed datasets written
9	# out by each processing region and rearranges the ragged arrays into contiguous
10	# streams for each unique iceberg. The output arrays are 2D (ntraj, ntimes) arrays.
11	# Note that some icebergs may only exist for a subset of the possible times. In these
12	# cases the missing instances are filled with invalid (NaN) values.
13	#
14	# Version 2.0 August 2017. Adapted to process all variables and retain original
15	# datatypes. (acc@noc.ac.uk)
16
17	parser = ArgumentParser(description='produce collated trajectory file \
18	from distributed output files, e.g. \
19	\n python ./icb_pp.py \
20	-t trajectory_icebergs_004248_ \
21	-n 296 -o trajsout.nc' )
22
23	parser.add_argument('-t',dest='froot',
24	help='fileroot_of_distrbuted_data; root name \
25	of distributed trajectory output (usually \
26	completed with XXXX.nc, where XXXX is the \
27	4 digit processor number)',
28	default='trajectory_icebergs_004248_')
29
30	parser.add_argument('-n',dest='fnum',help='number of distributed files to process',
31	type=int, default=None)
32
33	parser.add_argument('-o',dest='fout',
34	help='collated_output_file; file name to receive \
35	the collated trajectory data', default='trajsout.nc')
36
37	args = parser.parse_args()
38
39	default_used = 0
40	if args.froot is None:
41	pathstart = 'trajectory_icebergs_004248_'
42	default_used = 1
43	else:
44	pathstart = args.froot
45
46	if args.fnum is None:
47	procnum = 0
48	default_used = 1
49	else:
50	procnum = args.fnum
51
52	if args.fout is None:
53	pathout = 'trajsout.nc'
54	default_used = 1
55	else:
56	pathout = args.fout
57
58	if default_used == 1:
59	print('At least one default value will be used; command executing is:')
60	print('icb_pp.py -t ',pathstart,' -n ',procnum,' -o ',pathout)
61
62	if procnum < 1:
63	print('Need some files to collate! procnum = ',procnum)
64	sys.exit(11)
65
66	icu = []
67	times = []
68	#
69	# Loop through all distributed datasets to obtain the complete list
70	# of iceberg identification numbers and timesteps
71	#
72	for n in range(procnum):
73	nn = '%4.4d' % n
74	fw = Dataset(pathstart+nn+'.nc')
75	# keep a list of the variables in the first dataset
76	if n == 0:
77	varlist = fw.variables
78	#
79	# skip any files with no icebergs
80	if len(fw.dimensions['n']) > 0:
81	print pathstart+nn+'.nc'
82	ic = fw.variables['iceberg_number'][:,0]
83	ts = fw.variables['timestep'][:]
84	icv = np.unique(ic)
85	ts = np.unique(ts)
86	print('Min Max ts: ',ts.min(), ts.max())
87	print('Number unique icebergs= ',icv.shape[0])
88	icu.append(icv)
89	times.append(ts)
90	fw.close()
91	#
92	# Now flatten the lists and reduce to the unique spanning set
93	#
94	try:
95	icu = np.concatenate(icu)
96	except ValueError:
97	# No icebergs: create an empty output file.
98	print 'No icebergs in the model.'
99	fw = Dataset(pathstart+'0000.nc')
100	fo = Dataset(pathout, 'w', format='NETCDF4_CLASSIC')
101	ntrj = fo.createDimension('ntraj', None)
102	icbn = fo.createVariable('iceberg_number', 'i4',('ntraj'))
103	n = 0
104	for key, value in varlist.iteritems() :
105	if key != "iceberg_number" :
106	print 'key is ',key
107	oout = fo.createVariable(key, value.dtype, ('ntraj'),
108	zlib=True, complevel=1)
109	oout.long_name = fw.variables[key].getncattr('long_name')
110	oout.units = fw.variables[key].getncattr('units')
111	n = n + 1
112	fw.close()
113	fo.close()
114	sys.exit()
115
116	icu = np.unique(icu)
117	times = np.concatenate(times)
118	times = np.unique(times)
119	ntraj = icu.shape[0]
120	print(ntraj, ' unique icebergs found across all datasets')
121	print('Icebergs ids range from: ',icu.min(), 'to: ',icu.max())
122	print('times range from: ',times.min(), 'to: ', times.max())
123	#
124	# Declare array to receive data from all files
125	#
126	nt = times.shape[0]
127	#
128	n=0
129	for key, value in varlist.iteritems() :
130	if key != "iceberg_number" :
131	n = n + 1
132	inarr = np.zeros((n, ntraj, nt))
133	#
134	# initially fill with invalid data
135	#
136	inarr.fill(np.nan)
137	#
138	# Declare some lists to store variable names, types and long_name and units attributes
139	# iceberg_number gets special treatment
140	innam = []
141	intyp = []
142	inlngnam = []
143	inunits = []
144	for key, value in varlist.iteritems() :
145	if key != "iceberg_number" :
146	innam.append(key)
147	#
148	# reopen the first datset to collect variable attributes
149	# (long_name and units only)
150	#
151	nn = '%4.4d' % 0
152	fw = Dataset(pathstart+nn+'.nc')
153	for key, value in varlist.iteritems() :
154	if key != "iceberg_number" :
155	intyp.append(fw.variables[key].dtype)
156	inlngnam.append(fw.variables[key].getncattr('long_name'))
157	inunits.append(fw.variables[key].getncattr('units'))
158	fw.close()
159	#
160	# loop through distributed datasets again, this time
161	# checking indices against icu and times lists and
162	# inserting data into the correct locations in the
163	# collated sets.
164	#
165	for n in range(procnum):
166	nn = '%4.4d' % n
167	fw = Dataset(pathstart+nn+'.nc')
168	#
169	# Note many distributed datafiles will contain no iceberg data
170	# so skip quickly over these
171	m = len(fw.dimensions['n'])
172	if m > 0:
173	inx = np.zeros(m, dtype=int)
174	tsx = np.zeros(m, dtype=int)
175	#print pathstart+nn+'.nc'
176	ic = fw.variables['iceberg_number'][:,0]
177	ts = fw.variables['timestep'][:]
178	for k in range(m):
179	inxx = np.where(icu == ic[k])
180	inx[k] = inxx[0]
181	for k in range(m):
182	inxx = np.where(times == ts[k])
183	tsx[k] = inxx[0]
184	n = 0
185	for key, value in varlist.iteritems() :
186	if key != "iceberg_number" :
187	insmall = fw.variables[innam[n]][:]
188	inarr[n,inx[:],tsx[:]] = insmall[:]
189	n = n + 1
190	fw.close()
191	#
192	# Finally create the output file and write out the collated sets
193	#
194	fo = Dataset(pathout, 'w', format='NETCDF4_CLASSIC')
195	ntrj = fo.createDimension('ntraj', ntraj)
196	nti = fo.createDimension('ntime', None)
197	icbn = fo.createVariable('iceberg_number', 'i4',('ntraj'))
198	icbn[:] = icu
199	n = 0
200	for key, value in varlist.iteritems() :
201	if key != "iceberg_number" :
202	oout = fo.createVariable(innam[n], intyp[n], ('ntraj','ntime'),
203	zlib=True, complevel=1, chunksizes=(1,nt))
204	oout[:,:] = inarr[n,:,:]
205	oout.long_name = inlngnam[n]
206	oout.units = inunits[n]
207	n = n + 1
208	fo.close()

Note: See TracBrowser for help on using the repository browser.

New URL for NEMO forge! http://forge.nemo-ocean.eu

Context Navigation

source: branches/UKMO/HPC_loop_optimisation/NEMOGCM/TOOLS/MISCELLANEOUS/icb_pp.py

Download in other formats: