New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
diamlr.py in utils/tools_dev_r11751_ENHANCE-05_SimonM-Harmonic_Analysis/DIAMLR – NEMO

source: utils/tools_dev_r11751_ENHANCE-05_SimonM-Harmonic_Analysis/DIAMLR/diamlr.py @ 12033

Last change on this file since 12033 was 12033, checked in by smueller, 4 years ago

Correction of the inclusion of regression coefficients in the output file and minor adjustments of standard output (ticket #2175)

File size: 7.4 KB
Line 
1#! /usr/bin/python
2# ======================================================================
3#                        ***  TOOL diamlr.py  ***
4# Postprocessing of intermediate NEMO model output for
5# multiple-linear-regression analysis (diamlr)
6# ======================================================================
7# History :      !  2019  (S. Mueller)
8# ----------------------------------------------------------------------
9import sys
10# ----------------------------------------------------------------------
11# NEMO/TOOLS 4.0 , NEMO Consortium (2019)
12# $Id$
13# Software governed by the CeCILL license (see ./LICENSE)
14# ----------------------------------------------------------------------
15
16# ----------------------------------------------------------------------
17#                    ***  SUBROUTINE get_args  ***
18# Parse command line arguments
19# ----------------------------------------------------------------------
20def get_args():
21    from argparse import ArgumentParser
22    import re
23
24    # Set up command-line argument parser
25    parser = ArgumentParser(
26        description='Postprocessing of intermediate NEMO model output'+
27                    ' for multiple-linear-regression analysis (diamlr)')
28    parser.add_argument('--file_scalar', help=
29                        'Filename of scalar intermediate NEMO model'+
30                        ' output for multiple-linear-regression'+
31                        ' analysis')
32    parser.add_argument('--file_grid', help=
33                        'Filename of gridded intermediate NEMO model'+
34                        ' output for multiple-linear-regression'+
35                        ' analysis')
36    parser.add_argument('--regressors', nargs='+', required=False,
37                        help='Optional list of regressors to include'+
38                        ' in analysis; if omitted, all available'+
39                        ' regressors will be included')
40    args = parser.parse_args()
41    return args
42
43# ----------------------------------------------------------------------
44#                    ***  SUBROUTINE main  ***
45# Finalisation of multiple-linear-regression analysis
46# ----------------------------------------------------------------------
47def main():
48    from netCDF4 import Dataset as nc
49    import re
50    import numpy as np
51    from os.path import basename, splitext
52    from time import strftime, localtime
53
54    # Get command-line arguments
55    args = get_args()
56
57    # Get filenames/locations of intermdiate diamlr output
58    fn_scalar = args.file_scalar
59    fn_grid   = args.file_grid
60
61    # Open regressor-regressor scalar-product data set
62    f = nc(fn_scalar, 'r')
63
64    # Detect available regressors; reduce list of regressors according
65    # to list of selected regressors (if specified)
66    regs = {}
67    vn_re = re.compile('(diamlr_r[0-9]{3})\.(diamlr_r[0-9]{3})')
68    for vn in f.variables.keys():
69        vn_match = vn_re.match(vn)
70        if (vn_match):
71            reg1 = vn_match.group(1)
72            reg2 = vn_match.group(2)
73            if not args.regressors or reg1 in args.regressors:
74                regs[vn_match.group(1)] = True
75            if not args.regressors or reg2 in args.regressors:
76                regs[vn_match.group(2)] = True
77    regs = regs.keys()
78    regs.sort()
79
80    print('Compile and invert matrix of regressor-regressor scalar'+
81          'products ...')
82
83    # Set up square matrix, XX, of regressor-regressor scalar products
84    xx = np.matrix(np.zeros((len(regs), len(regs))))
85    for i1 in range(len(regs)):
86        vn1 = regs[i1]
87        for i2 in range(regs.index(vn1)+1):
88            vn2 = regs[i2]
89            if f.variables[vn1+'.'+vn2]:
90                xx_sum = np.sum(f.variables[vn1+'.'+vn2][:])
91            else:
92                xx_sum = np.sum(f.variables[vn2+'.'+vn1][:])
93            xx[i1,i2] = xx_sum
94            if i1 != i2:
95                xx[i2,i1] = xx_sum
96
97    # Close regressor-regressor scalar-product data set
98    f.close()
99
100    # Compute inverse matrix, XX^-1; convert matrix to an array to
101    # enable the dot-product computation of XX with a large array below
102    ixx = np.array(xx**-1)
103
104    print('   ... done')
105
106    # Open field-regressor scalar-product data set
107    f = nc(fn_grid, 'r')
108
109    # Detect analysed fields
110    flds = {}
111    vn_re = re.compile('(diamlr_f[0-9]{3})\.(diamlr_r[0-9]{3})')
112    for vn in f.variables.keys():
113        vn_match = vn_re.match(vn)
114        if (vn_match):
115            if vn_match.group(2) in regs:
116                flds[vn_match.group(1)] = True
117    flds = flds.keys()
118    flds.sort()
119
120    # Open and prepare output file, incl. replication of
121    # domain-decomposition metadata (if present) from the
122    # field-regressor data set
123    fn_out = './'+basename(fn_grid)
124    if fn_out.find('diamlr') > 0:
125        fn_out = fn_out.replace('diamlr', 'diamlr_coeffs')
126    else:
127        fn_parts = splitext(basename(fn_grid))
128        fn_out = fn_parts[0]+'_diamlr_coeffs'+fn_parts[1]
129    nc_out = nc(fn_out, 'w', format='NETCDF4', clobber=False)
130    nc_atts = {
131        'name'        : fn_out,
132        'description' : 'Multiple-linear-regression analysis output',
133        'title'       : 'Multiple-linear-regression analysis output',
134        'timeStamp'   : strftime('%Y-%m-%d %H:%M:%S %Z', localtime())}
135    for nc_att in f.ncattrs():
136        if nc_att in ['ibegin', 'jbegin',
137                      'ni', 'nj'] or nc_att.startswith('DOMAIN'):
138           nc_atts[nc_att] = f.getncattr(nc_att)
139    nc_out.setncatts(nc_atts)
140    for nc_dimname in f.dimensions.keys():
141        nc_dim = f.dimensions[nc_dimname]
142        if nc_dim.isunlimited():
143            nc_out.createDimension(nc_dim.name)
144        else:
145            nc_out.createDimension(nc_dim.name, nc_dim.size)
146
147    # Read in fields of scalar products of model diagnostics and
148    # regressors and compute the regression coefficients for the current
149    # field; add resulting fields to output file
150    for fld in flds:
151        print('Completing analysis for field '+fld+' ...')
152        xy = np.array([])
153        for reg in regs:
154            if xy.size == 0:
155                xy = np.sum(f.variables[fld+'.'+reg][:],
156                            axis=0)[np.newaxis,:]
157            else:
158                xy = np.r_[xy, np.sum(f.variables[fld+'.'+reg][:],
159                                      axis=0)[np.newaxis,:]]
160        b=np.reshape(np.dot(ixx, np.reshape(xy,(len(xy),xy[0,:].size))),
161                     (xy.shape))
162        print('   ... done')
163
164        for reg in regs:
165            nr = regs.index(reg)
166            nc_gridvar = f.variables[fld+'.'+reg]
167            name = nc_gridvar.name.split('.')
168            nc_var = nc_out.createVariable(name[0]+'-'+name[1],
169                                           nc_gridvar.datatype,
170                                           nc_gridvar.dimensions)
171            for nc_att in nc_gridvar.ncattrs():
172                if nc_att in ['_FillValue', 'missing_value']:
173                    nc_var.setncattr(nc_att, nc_gridvar.getncattr(
174                        nc_att))
175            name = nc_gridvar.getncattr('standard_name').split('.')
176            nc_var.setncattr(
177                'standard_name', name[0]+' regressed on '+name[1])
178            nc_var[0,:] = b[nr,:].data
179
180    # Close output file; close field-regressor scalar-product data set
181    nc_out.close()
182    f.close()
183
184# ----------------------------------------------------------------------
185#                     ***  main PROGRAM  ***
186# ----------------------------------------------------------------------
187if __name__ == "__main__":
188
189    main()
Note: See TracBrowser for help on using the repository browser.