New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
diamlr.py in utils/tools_dev_r11751_ENHANCE-05_SimonM-Harmonic_Analysis/DIAMLR – NEMO

source: utils/tools_dev_r11751_ENHANCE-05_SimonM-Harmonic_Analysis/DIAMLR/diamlr.py @ 12018

Last change on this file since 12018 was 12018, checked in by smueller, 4 years ago

Addition of a rudimentary tool for the postprocessing of intermediate model output for multiple-linear-regression analysis (see ticket #2175)

File size: 7.4 KB
Line 
1#! /usr/bin/python
2# ======================================================================
3#                        ***  TOOL diamlr.py  ***
4# Postprocessing of intermediate NEMO model output for
5# multiple-linear-regression analysis (diamlr)
6# ======================================================================
7# History :      !  2019  (S. Mueller)
8# ----------------------------------------------------------------------
9import sys
10# ----------------------------------------------------------------------
11# NEMO/TOOLS 4.0 , NEMO Consortium (2019)
12# $Id$
13# Software governed by the CeCILL license (see ./LICENSE)
14# ----------------------------------------------------------------------
15
16# ----------------------------------------------------------------------
17#                    ***  SUBROUTINE get_args  ***
18# Parse command line arguments
19# ----------------------------------------------------------------------
20def get_args():
21    from argparse import ArgumentParser
22    import re
23
24    # Set up command-line argument parser
25    parser = ArgumentParser(
26        description='Postprocessing of intermediate NEMO model output'+
27                    ' for multiple-linear-regression analysis (diamlr)')
28    parser.add_argument('--file_scalar', help=
29                        'Filename of scalar intermediate NEMO model'+
30                        ' output for multiple-linear-regression'+
31                        ' analysis')
32    parser.add_argument('--file_grid', help=
33                        'Filename of gridded intermediate NEMO model'+
34                        ' output for multiple-linear-regression'+
35                        ' analysis')
36    parser.add_argument('--regressors', nargs='+', required=False,
37                        help='Optional list of regressors to include'+
38                        ' in analysis; if omitted, all available'+
39                        ' regressors will be included')
40    args = parser.parse_args()
41    return args
42
43# ----------------------------------------------------------------------
44#                    ***  SUBROUTINE main  ***
45# Finalisation of multiple-linear-regression analysis
46# ----------------------------------------------------------------------
47def main():
48    from netCDF4 import Dataset as nc
49    import re
50    import numpy as np
51    from os.path import basename, splitext
52    from time import strftime, localtime
53
54    # Get command-line arguments
55    args = get_args()
56
57    # Get filenames/locations of intermdiate diamlr output
58    fn_scalar = args.file_scalar
59    fn_grid   = args.file_grid
60
61    # Open regressor-regressor scalar-product data set
62    f = nc(fn_scalar, 'r')
63
64    # Detect available regressors; reduce list of regressors according
65    # to list of selected regressors (if specified)
66    regs = {}
67    vn_re = re.compile('(diamlr_r[0-9]{3})\.(diamlr_r[0-9]{3})')
68    for vn in f.variables.keys():
69        vn_match = vn_re.match(vn)
70        if (vn_match):
71            reg1 = vn_match.group(1)
72            reg2 = vn_match.group(2)
73            if not args.regressors or reg1 in args.regressors:
74                regs[vn_match.group(1)] = True
75            if not args.regressors or reg2 in args.regressors:
76                regs[vn_match.group(2)] = True
77    regs = regs.keys()
78    regs.sort()
79
80    print('Compile and invert matrix of regressor-regressor scalar'+
81          'products ...')
82
83    # Set up square matrix, XX, of regressor-regressor scalar products
84    xx = np.matrix(np.zeros((len(regs), len(regs))))
85    for i1 in range(len(regs)):
86        vn1 = regs[i1]
87        for i2 in range(regs.index(vn1)+1):
88            vn2 = regs[i2]
89            if f.variables[vn1+'.'+vn2]:
90                xx_sum = np.sum(f.variables[vn1+'.'+vn2][:])
91            else:
92                xx_sum = np.sum(f.variables[vn2+'.'+vn1][:])
93            xx[i1,i2] = xx_sum
94            if i1 != i2:
95                xx[i2,i1] = xx_sum
96
97    # Close regressor-regressor scalar-product data set
98    f.close()
99
100    # Compute inverse matrix, XX^-1; convert matrix to an array to
101    # enable the dot-product computation of XX with a large array below
102    ixx = np.array(xx**-1)
103
104    print('   ... done')
105
106    # Open field-regressor scalar-product data set
107    f = nc(fn_grid, 'r')
108
109    # Detect analysed fields
110    flds = {}
111    vn_re = re.compile('(diamlr_f[0-9]{3})\.(diamlr_r[0-9]{3})')
112    for vn in f.variables.keys():
113        print vn
114        vn_match = vn_re.match(vn)
115        if (vn_match):
116            if vn_match.group(2) in regs:
117                flds[vn_match.group(1)] = True
118    flds = flds.keys()
119    flds.sort()
120
121    # Open and prepare output file, incl. replication of
122    # domain-decomposition metadata (if present) from the
123    # field-regressor data set
124    fn_out = './'+basename(fn_grid)
125    if fn_out.find('diamlr') > 0:
126        fn_out = fn_out.replace('diamlr', 'diamlr_coeffs')
127    else:
128        fn_parts = splitext(basename(fn_grid))
129        fn_out = fn_parts[0]+'_diamlr_coeffs'+fn_parts[1]
130    nc_out = nc(fn_out, 'w', format='NETCDF4', clobber=False)
131    nc_atts = {
132        'name'        : fn_out,
133        'description' : 'Multiple-linear-regression analysis output',
134        'title'       : 'Multiple-linear-regression analysis output',
135        'timeStamp'   : strftime('%Y-%m-%d %H:%M:%S %Z', localtime())}
136    for nc_att in f.ncattrs():
137        if nc_att in ['ibegin', 'jbegin',
138                      'ni', 'nj'] or nc_att.startswith('DOMAIN'):
139           nc_atts[nc_att] = f.getncattr(nc_att)
140    nc_out.setncatts(nc_atts)
141    for nc_dimname in f.dimensions.keys():
142        nc_dim = f.dimensions[nc_dimname]
143        if nc_dim.isunlimited():
144            nc_out.createDimension(nc_dim.name)
145        else:
146            nc_out.createDimension(nc_dim.name, nc_dim.size)
147
148    # Read in fields of scalar products of model diagnostics and
149    # regressors and compute the regression coefficients for the current
150    # field; add resulting fields to output file
151    for fld in flds:
152        print('Completing analysis for field '+fld+'...')
153        xy = np.array([])
154        for reg in regs:
155            if xy.size == 0:
156                xy = np.sum(f.variables[fld+'.'+reg][:],
157                            axis=0)[np.newaxis,:]
158            else:
159                xy = np.r_[xy, np.sum(f.variables[fld+'.'+reg][:],
160                                      axis=0)[np.newaxis,:]]
161        b=np.reshape(np.dot(ixx, np.reshape(xy,(len(xy),xy[0,:].size))),
162                     (xy.shape))
163        print('   ... done')
164
165        for reg in regs:
166            nr = regs.index(reg)
167            nc_gridvar = f.variables[fld+'.'+reg]
168            name = nc_gridvar.name.split('.')
169            nc_var = nc_out.createVariable(name[0]+'-'+name[1],
170                                           nc_gridvar.datatype,
171                                           nc_gridvar.dimensions)
172            for nc_att in nc_gridvar.ncattrs():
173                if nc_att in ['_FillValue', 'missing_value']:
174                    nc_var.setncattr(nc_att, nc_gridvar.getncattr(
175                        nc_att))
176            name = nc_gridvar.getncattr('standard_name').split('.')
177            nc_var.setncattr(
178                'standard_name', name[0]+' regressed on '+name[1])
179            nc_var[0,:] = b[nr,:]
180
181    # Close output file; close field-regressor scalar-product data set
182    nc_out.close()
183    f.close()
184
185# ----------------------------------------------------------------------
186#                     ***  main PROGRAM  ***
187# ----------------------------------------------------------------------
188if __name__ == "__main__":
189
190    main()
Note: See TracBrowser for help on using the repository browser.