41 | | ''...'' |
| 41 | '''Stage 1 details''' |
| 42 | |
| 43 | Stage 1 involves gathering all SETTE input files and systematically checking contents for redundancy and opportunities for compression. Where chunking and compression have already been applied, it is also important to check the current settings for validity. Here is a typical example from the {{{AGRIF_DEMO_v4.x.tar}}} set: |
| 44 | |
| 45 | {{{ |
| 46 | nccnkrpt ORCA_R2_zps_domcfg_agrif.nc |
| 47 | |
| 48 | t = UNLIMITED ; // (1 currently) |
| 49 | x = 180 ; |
| 50 | y = 148 ; |
| 51 | z = 31 ; |
| 52 | double time_counter(t) _ChunkSizes = 512 ; |
| 53 | double glamt(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 54 | double glamu(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 55 | double glamv(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 56 | double glamf(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 57 | double gphit(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 58 | double gphiu(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 59 | double gphiv(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 60 | double gphif(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 61 | double e1t(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 62 | double e1u(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 63 | double e1v(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 64 | double e1f(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 65 | double e2t(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 66 | double e2u(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 67 | double e2v(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 68 | double e2f(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 69 | double ff_f(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 70 | double ff_t(t, y, x) _ChunkSizes = 4, 148, 180 ; |
| 71 | double e3t_1d(t, z) _ChunkSizes = 1, 31 ; |
| 72 | double e3w_1d(t, z) _ChunkSizes = 1, 31 ; |
| 73 | double e3t_0(t, z, y, x) _ChunkSizes = 1, 31, 148, 180 ; |
| 74 | double e3u_0(t, z, y, x) _ChunkSizes = 1, 31, 148, 180 ; |
| 75 | double e3v_0(t, z, y, x) _ChunkSizes = 1, 31, 148, 180 ; |
| 76 | double e3f_0(t, z, y, x) _ChunkSizes = 1, 31, 148, 180 ; |
| 77 | double e3w_0(t, z, y, x) _ChunkSizes = 1, 31, 148, 180 ; |
| 78 | double e3uw_0(t, z, y, x) _ChunkSizes = 1, 31, 148, 180 ; |
| 79 | double e3vw_0(t, z, y, x) _ChunkSizes = 1, 31, 148, 180 ; |
| 80 | int bottom_level(t, y, x) _ChunkSizes = 6, 148, 180 ; |
| 81 | int top_level(t, y, x) _ChunkSizes = 6, 148, 180 ; |
| 82 | float bathy_metry(t, y, x) _ChunkSizes = 6, 148, 180 ; |
| 83 | }}} |
| 84 | where {{{nccnkrpt}}} is my bash function defined as: |
| 85 | {{{ |
| 86 | function nccnkrpt { ncks --cdl -m ${1} | grep '=' ; ncdump -s -h ${1} | grep -e ") ;" -e _ChunkSizes | sed -e 's/.*:/\t/' | sed -e ':x /) ;$/ { N; s/;\n//g ; bx }' ; } |
| 87 | }}} |
| 88 | which helps to reduce the verbosity of the ncdump -s -h output into a more digestible form. In this example the dataset has already been chunked and compressed but the chunksizes are an odd choice. Having chunk sizes which span the entire dataset will restrict future scalability since any access to these data will require reading and uncompressing these large chunks irrespective of the size of the calling domain. A chunksize greater than 1 for the t dimension is also wasteful and confusing given that the file only contains a single time-level. |
| 89 | |
| 90 | The 'right' choice for chunksizes is somewhat arbitrary but given our exascale ambitions of efficient performance with processor domains of O(10x10) in size, a target chunk-size around 64x64 would seem a reasonable compromise. Chunk-sizes which are too small will compromise compressibility and require more chunk meta-data in the file. Sizes which are too large will affect scalability and cause unnecessary delays at start-up. For the ORCA2 domain a chunk-size of 60x50 is chosen since this also avoids any underpopulated chunks. For the vertical dimension, I have used a chunk-size of 4 but there is probably little gain here since the volume data is always read as a whole and a full-depth chunk is equally appropriate. Breaking the vertical dimension into smaller chunks, however, may help other applications that use the domain configuration file and only wish to select specific levels. |
| 91 | |
| 92 | All the other files in the original AGRIF_DEMO set are in classic NetCDF3 format and therefore unchunked and uncompressed. All these files have been converted to NetCDF4 with suitable chunk-size choices. The actual sizes vary slightly for each of the 3 AGRIF-level sets so as to avoid under-populated chunks. |
| 93 | |
| 94 | || ORCA_R2_zps_domcfg_agrif.nc || 1_ORCA_R2_zps_domcfg_agrif.nc || 2_ORCA_R05_zps_domcfg_agrif.nc || 3_ORCA_R017_zps_domcfg_agrif.nc || |
| 95 | ||t = UNLIMITED ( 1 ) ||t = UNLIMITED ( 1 ) ||t = UNLIMITED ( 1 ) ||t = UNLIMITED ( 1 ) || |
| 96 | ||x = 180 ||x = 48 ||x = 132 ||x = 134 || |
| 97 | ||y = 148 ||y = 50 ||y = 140 ||y = 128 || |
| 98 | ||z = 31 ||z = 31 ||z = 31 ||z = 31 || |
| 99 | |
| 100 | The complete set of ncks commands used to create the {{{r4.2_RC_FULL}}} set for {{{AGRIF_DEMO_r4.2_RC_FULL}}} from a copy of {{{AGRIF_DEMO_v4.x}}} is: |
| 101 | |
| 102 | {{{ |
| 103 | # AGRIFcmds |
| 104 | #### Mother grid #### |
| 105 | # |
| 106 | ncks --no_abc --cnk_plc='xpl' --cnk_dmn t,1 --cnk_dmn x,60 --cnk_dmn y,50 --cnk_dmn z,4 ORCA_R2_zps_domcfg_agrif.nc new_ORCA_R2_zps_domcfg_agrif.nc |
| 107 | mv new_ORCA_R2_zps_domcfg_agrif.nc ORCA_R2_zps_domcfg_agrif.nc |
| 108 | # |
| 109 | #### Nest level 1 #### |
| 110 | # |
| 111 | for f in 1_chlorophyll.nc 1_geothermal_heating.nc 1_runoff_core_monthly.nc 1_sss_data.nc |
| 112 | do |
| 113 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn time_counter,1 --cnk_dmn x,48 --cnk_dmn y,50 $f new_$f |
| 114 | done |
| 115 | # |
| 116 | for f in 1_data_1m_potential_temperature_nomask.nc 1_data_1m_salinity_nomask.nc |
| 117 | do |
| 118 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn time_counter,1 --cnk_dmn x,48 --cnk_dmn y,50 --cnk_dmn z,4 $f new_$f |
| 119 | done |
| 120 | # |
| 121 | for f in 1_eddy_viscosity_3D.nc 1_ORCA_R2_zps_domcfg_agrif.nc |
| 122 | do |
| 123 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn t,1 --cnk_dmn x,48 --cnk_dmn y,50 --cnk_dmn z,4 $f new_$f |
| 124 | done |
| 125 | # |
| 126 | for f in 1_resto.nc |
| 127 | do |
| 128 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn x,48 --cnk_dmn y,50 --cnk_dmn z,4 $f new_$f |
| 129 | done |
| 130 | # |
| 131 | for f in 1_weights_core_orca2_bicubic_noc.nc 1_weights_core_orca2_bilinear_noc.nc |
| 132 | do |
| 133 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn lon,48 --cnk_dmn lat,50 $f new_$f |
| 134 | done |
| 135 | # |
| 136 | #### Nest level 2 #### |
| 137 | # |
| 138 | for f in 2_chlorophyll.nc 2_geothermal_heating.nc |
| 139 | do |
| 140 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn time_counter,1 --cnk_dmn x,66 --cnk_dmn y,70 $f new_$f |
| 141 | done |
| 142 | # |
| 143 | for f in 2_data_1m_potential_temperature_nomask.nc 2_data_1m_salinity_nomask.nc |
| 144 | do |
| 145 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn time_counter,1 --cnk_dmn x,66 --cnk_dmn y,70 --cnk_dmn z,4 $f new_$f |
| 146 | done |
| 147 | # |
| 148 | for f in 2_ORCA_R05_zps_domcfg_agrif.nc |
| 149 | do |
| 150 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn t,1 --cnk_dmn x,66 --cnk_dmn y,70 --cnk_dmn z,4 $f new_$f |
| 151 | done |
| 152 | # |
| 153 | for f in 2_weights_core2_nordic1_bicub.nc 2_weights_core2_nordic1_bilin.nc |
| 154 | do |
| 155 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn lon,66 --cnk_dmn lat,70 $f new_$f |
| 156 | done |
| 157 | # |
| 158 | #### Nest level 3 #### |
| 159 | # |
| 160 | for f in 3_chlorophyll.nc 3_geothermal_heating.nc |
| 161 | do |
| 162 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn time_counter,1 --cnk_dmn x,67 --cnk_dmn y,64 $f new_$f |
| 163 | done |
| 164 | # |
| 165 | for f in 3_data_1m_potential_temperature_nomask.nc 3_data_1m_salinity_nomask.nc |
| 166 | do |
| 167 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn time_counter,1 --cnk_dmn x,67 --cnk_dmn y,64 --cnk_dmn z,4 $f new_$f |
| 168 | done |
| 169 | # |
| 170 | for f in 3_ORCA_R017_zps_domcfg_agrif.nc |
| 171 | do |
| 172 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn t,1 --cnk_dmn x,67 --cnk_dmn y,64 --cnk_dmn z,4 $f new_$f |
| 173 | done |
| 174 | # |
| 175 | for f in 3_weights_core2_nordic2_bicub.nc 3_weights_core2_nordic2_bilin.nc |
| 176 | do |
| 177 | ncks --no_abc --4 --dfl_lvl 3 --cnk_plc='xpl' --cnk_dmn lon,67 --cnk_dmn lat,64 $f new_$f |
| 178 | done |
| 179 | # |
| 180 | for f in new*; do ff=${f/new_}; mv $f $ff; done |
| 181 | # |
| 182 | }}} |