80 | | Where most of the temporary, full-depth arrays are not necessary because only two vertical levels are required at any one time. In fact even the zea array is unnecessary since the zchl3d array could be repurposed once its value has been used. |
| 116 | Part 2 has obvious inefficiencies because most of the temporary, full-depth arrays are not necessary since only two vertical levels are required at any one time. In fact even the zea array is unnecessary since the zchl3d array could be repurposed once its value has been used. Recalling the chlorophyll value and converting this to a index within the second loop also seems wasteful because this conversion could have been done before the chlorophyll value was stored in part 1 and the index stored instead. |
| 117 | |
| 118 | Part 1 has even greater inefficiencies though since it repeats some computationally expensive calculations in a 3D loop even though much of it is 2D in nature. In fact the complexity of the calculations can be reduced further by operating in log-space. |
| 119 | |
| 120 | After trying a few options and iterating at the preview stage (see earlier investigations below), the final solution choose was: |
| 121 | |
| 122 | {{{!f |
| 123 | CASE( np_RGB , np_RGBc ) !== R-G-B fluxes ==! |
| 124 | ! |
| 125 | ALLOCATE( ze0 (jpi,jpj) , ze1 (jpi,jpj) , & |
| 126 | & ze2 (jpi,jpj) , ze3 (jpi,jpj) , & |
| 127 | & ztmp3d(jpi,jpj,nksr + 1) ) |
| 128 | ! |
| 129 | IF( nqsr == np_RGBc ) THEN !* Variable Chlorophyll |
| 130 | CALL fld_read( kt, 1, sf_chl ) ! Read Chl data and provides it at the current time step |
| 131 | ! |
| 132 | ! Separation in R-G-B depending on the surface Chl |
| 133 | ! perform and store as many of the 2D calculations as possible |
| 134 | ! before the 3D loop (use the temporary 2D arrays to replace the |
| 135 | ! most expensive calculations) |
| 136 | ! |
| 137 | ze0(:,:) = LOG ( MIN( 10. , MAX( 0.03, sf_chl(1)%fnow(:,:,1) ) ) ) ! ze0 = log(zchl) |
| 138 | ze1(:,:) = 0.113328685307 + 0.803 * ze0(:,:) ! ze1 : log(zCze) = log (1.12 * zchl**0.803) |
| 139 | ze2(:,:) = 3.703768066608 + 0.459 * ze0(:,:) ! ze2 : log(zCtot) = log(40.6 * zchl**0.459) |
| 140 | ze3(:,:) = 6.34247346942 - 0.746 * ze2(:,:) ! ze3 : log(zze) = log(568.2 * zCtot**(-0.746)) |
| 141 | WHERE( ze3 > 4.62497281328 ) ze3 = 5.298317366548 - 0.293 * ze2 ! IF( log(zze) > log(102.) ) log(zze) = |
| 142 | ! ! log(200.0 * zCtot**(-0.293)) |
| 143 | ze1(:,:) = EXP(ze1(:,:)) ! ze1 = zCze |
| 144 | ze2(:,:) = 1._wp / ( 0.710 + ze0(:,:) * ( 0.159 + ze0(:,:) * 0.021 ) ) ! ze2 = 1/zdelpsi |
| 145 | ze3(:,:) = 1._wp / EXP(ze3(:,:)) ! ze3 = 1/zze |
| 146 | ! |
| 147 | DO_3D_00_00 ( 1, nksr + 1 ) |
| 148 | ! zchl = ALOG( ze0(ji,jj) ) |
| 149 | zlogc = ze0(ji,jj) |
| 150 | ! |
| 151 | zCb = 0.768 + zlogc * ( 0.087 - zlogc * ( 0.179 + zlogc * 0.025 ) ) |
| 152 | zCmax = 0.299 - zlogc * ( 0.289 - zlogc * 0.579 ) |
| 153 | zpsimax = 0.6 - zlogc * ( 0.640 - zlogc * ( 0.021 + zlogc * 0.115 ) ) |
| 154 | ! zdelpsi = 0.710 + zlogc * ( 0.159 + zlogc * 0.021 ) |
| 155 | ! |
| 156 | zCze = ze1(ji,jj) |
| 157 | zrdpsi = ze2(ji,jj) ! 1/zdelpsi |
| 158 | zpsi = ze3(ji,jj) * gdepw(ji,jj,jk,Kmm) ! gdepw/zze |
| 159 | ! |
| 160 | ! NB. make sure zchl value is such that: zchl = MIN( 10. , MAX( 0.03, zchl ) ) |
| 161 | zchl = MIN( 10. , MAX( 0.03, zCze * ( zCb + zCmax * EXP( -( (zpsi - zpsimax) * zrdpsi )**2 ) ) ) ) |
| 162 | ! Convert chlorophyll value to attenuation coefficient look-up table index |
| 163 | ztmp3d(ji,jj,jk) = 41 + 20.*LOG10(zchl) + 1.e-15 |
| 164 | END_3D |
| 165 | ELSE !* constant chlorophyll |
| 166 | zchl = 0.05 |
| 167 | ! NB. make sure constant value is such that: |
| 168 | zchl = MIN( 10. , MAX( 0.03, zchl ) ) |
| 169 | ! Convert chlorophyll value to attenuation coefficient look-up table index |
| 170 | zlui = 41 + 20.*LOG10(zchl) + 1.e-15 |
| 171 | DO jk = 1, nksr + 1 |
| 172 | ztmp3d(:,:,jk) = zlui |
| 173 | END DO |
| 174 | ENDIF |
| 175 | ! |
| 176 | zcoef = ( 1. - rn_abs ) / 3._wp !* surface equi-partition in R-G-B |
| 177 | DO_2D_00_00 |
| 178 | ze0(ji,jj) = rn_abs * qsr(ji,jj) |
| 179 | ze1(ji,jj) = zcoef * qsr(ji,jj) |
| 180 | ze2(ji,jj) = zcoef * qsr(ji,jj) |
| 181 | ze3(ji,jj) = zcoef * qsr(ji,jj) |
| 182 | ! store the surface SW radiation; re-use the surface ztmp3d array |
| 183 | ! since the surface attenuation coefficient is not used |
| 184 | ztmp3d(ji,jj,1) = qsr(ji,jj) |
| 185 | END_2D |
| 186 | ! |
| 187 | !* interior equi-partition in R-G-B depending on vertical profile of Chl |
| 188 | DO_3D_00_00 ( 2, nksr + 1 ) |
| 189 | ze3t = e3t(ji,jj,jk-1,Kmm) |
| 190 | irgb = NINT( ztmp3d(ji,jj,jk) ) |
| 191 | zc0 = ze0(ji,jj) * EXP( - ze3t * xsi0r ) |
| 192 | zc1 = ze1(ji,jj) * EXP( - ze3t * rkrgb(1,irgb) ) |
| 193 | zc2 = ze2(ji,jj) * EXP( - ze3t * rkrgb(2,irgb) ) |
| 194 | zc3 = ze3(ji,jj) * EXP( - ze3t * rkrgb(3,irgb) ) |
| 195 | ze0(ji,jj) = zc0 |
| 196 | ze1(ji,jj) = zc1 |
| 197 | ze2(ji,jj) = zc2 |
| 198 | ze3(ji,jj) = zc3 |
| 199 | ztmp3d(ji,jj,jk) = ( zc0 + zc1 + zc2 + zc3 ) * wmask(ji,jj,jk) |
| 200 | END_3D |
| 201 | ! |
| 202 | DO_3D_00_00( 1, nksr ) |
| 203 | qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( ztmp3d(ji,jj,jk) - ztmp3d(ji,jj,jk+1) ) |
| 204 | END_3D |
| 205 | ! |
| 206 | DEALLOCATE( ze0 , ze1 , ze2 , ze3 , ztmp3d ) |
| 207 | }}} |
| 208 | |
| 209 | where the difference from the trunk is: |
| 210 | {{{#!diff |
| 211 | --- traqsr.F90 (revision 12954) |
| 212 | +++ traqsr.F90 (working copy) |
| 213 | @@ -109,12 +109,11 @@ |
| 214 | REAL(wp) :: zchl, zcoef, z1_2 ! local scalars |
| 215 | REAL(wp) :: zc0 , zc1 , zc2 , zc3 ! - - |
| 216 | REAL(wp) :: zzc0, zzc1, zzc2, zzc3 ! - - |
| 217 | - REAL(wp) :: zz0 , zz1 ! - - |
| 218 | - REAL(wp) :: zCb, zCmax, zze, zpsi, zpsimax, zdelpsi, zCtot, zCze |
| 219 | - REAL(wp) :: zlogc, zlogc2, zlogc3 |
| 220 | - REAL(wp), ALLOCATABLE, DIMENSION(:,:) :: zekb, zekg, zekr |
| 221 | - REAL(wp), ALLOCATABLE, DIMENSION(:,:,:) :: ze0, ze1, ze2, ze3, zea, ztrdt |
| 222 | - REAL(wp), ALLOCATABLE, DIMENSION(:,:,:) :: zetot, zchl3d |
| 223 | + REAL(wp) :: zz0 , zz1 , ze3t, zlui ! - - |
| 224 | + REAL(wp) :: zCb, zCmax, zpsi, zpsimax, zrdpsi, zCze |
| 225 | + REAL(wp) :: zlogc, zlogze, zlogCtot, zlogCze |
| 226 | + REAL(wp), ALLOCATABLE, DIMENSION(:,:) :: ze0, ze1, ze2, ze3 |
| 227 | + REAL(wp), ALLOCATABLE, DIMENSION(:,:,:) :: ztrdt, zetot, ztmp3d |
| 228 | !!---------------------------------------------------------------------- |
| 229 | ! |
| 230 | IF( ln_timing ) CALL timing_start('tra_qsr') |
| 231 | @@ -159,77 +158,88 @@ |
| 232 | ! |
| 233 | CASE( np_RGB , np_RGBc ) !== R-G-B fluxes ==! |
| 234 | ! |
| 235 | - ALLOCATE( zekb(jpi,jpj) , zekg(jpi,jpj) , zekr (jpi,jpj) , & |
| 236 | - & ze0 (jpi,jpj,jpk) , ze1 (jpi,jpj,jpk) , ze2 (jpi,jpj,jpk) , & |
| 237 | - & ze3 (jpi,jpj,jpk) , zea (jpi,jpj,jpk) , zchl3d(jpi,jpj,jpk) ) |
| 238 | + ALLOCATE( ze0 (jpi,jpj) , ze1 (jpi,jpj) , & |
| 239 | + & ze2 (jpi,jpj) , ze3 (jpi,jpj) , & |
| 240 | + & ztmp3d(jpi,jpj,nksr + 1) ) |
| 241 | ! |
| 242 | IF( nqsr == np_RGBc ) THEN !* Variable Chlorophyll |
| 243 | CALL fld_read( kt, 1, sf_chl ) ! Read Chl data and provides it at the current time step |
| 244 | + ! |
| 245 | + ! Separation in R-G-B depending on the surface Chl |
| 246 | + ! perform and store as many of the 2D calculations as possible |
| 247 | + ! before the 3D loop (use the temporary 2D arrays to replace the |
| 248 | + ! most expensive calculations) |
| 249 | + ! |
| 250 | + ze0(:,:) = LOG ( MIN( 10. , MAX( 0.03, sf_chl(1)%fnow(:,:,1) ) ) ) ! ze0 = log(zchl) |
| 251 | + ze1(:,:) = 0.113328685307 + 0.803 * ze0(:,:) ! ze1 : log(zCze) = log (1.12 * zchl**0.803) |
| 252 | + ze2(:,:) = 3.703768066608 + 0.459 * ze0(:,:) ! ze2 : log(zCtot) = log(40.6 * zchl**0.459) |
| 253 | + ze3(:,:) = 6.34247346942 - 0.746 * ze2(:,:) ! ze3 : log(zze) = log(568.2 * zCtot**(-0.746)) |
| 254 | + WHERE( ze3 > 4.62497281328 ) ze3 = 5.298317366548 - 0.293 * ze2 ! IF( log(zze) > log(102.) ) log(zze) = |
| 255 | + ! ! log(200.0 * zCtot**(-0.293)) |
| 256 | + ze1(:,:) = EXP(ze1(:,:)) ! ze1 = zCze |
| 257 | + ze2(:,:) = 1._wp / ( 0.710 + ze0(:,:) * ( 0.159 + ze0(:,:) * 0.021 ) ) ! ze2 = 1/zdelpsi |
| 258 | + ze3(:,:) = 1._wp / EXP(ze3(:,:)) ! ze3 = 1/zze |
| 259 | + ! |
| 260 | + DO_3D_00_00 ( 1, nksr + 1 ) |
| 261 | + ! zchl = ALOG( ze0(ji,jj) ) |
| 262 | + zlogc = ze0(ji,jj) |
| 263 | + ! |
| 264 | + zCb = 0.768 + zlogc * ( 0.087 - zlogc * ( 0.179 + zlogc * 0.025 ) ) |
| 265 | + zCmax = 0.299 - zlogc * ( 0.289 - zlogc * 0.579 ) |
| 266 | + zpsimax = 0.6 - zlogc * ( 0.640 - zlogc * ( 0.021 + zlogc * 0.115 ) ) |
| 267 | + ! zdelpsi = 0.710 + zlogc * ( 0.159 + zlogc * 0.021 ) |
| 268 | + ! |
| 269 | + zCze = ze1(ji,jj) |
| 270 | + zrdpsi = ze2(ji,jj) ! 1/zdelpsi |
| 271 | + zpsi = ze3(ji,jj) * gdepw(ji,jj,jk,Kmm) ! gdepw/zze |
| 272 | + ! |
| 273 | + ! NB. make sure zchl value is such that: zchl = MIN( 10. , MAX( 0.03, zchl ) ) |
| 274 | + zchl = MIN( 10. , MAX( 0.03, zCze * ( zCb + zCmax * EXP( -( (zpsi - zpsimax) * zrdpsi )**2 ) ) ) ) |
| 275 | + ! Convert chlorophyll value to attenuation coefficient look-up table index |
| 276 | + ztmp3d(ji,jj,jk) = 41 + 20.*LOG10(zchl) + 1.e-15 |
| 277 | + END_3D |
| 278 | + ELSE !* constant chlorophyll |
| 279 | + zchl = 0.05 |
| 280 | + ! NB. make sure constant value is such that: |
| 281 | + zchl = MIN( 10. , MAX( 0.03, zchl ) ) |
| 282 | + ! Convert chlorophyll value to attenuation coefficient look-up table index |
| 283 | + zlui = 41 + 20.*LOG10(zchl) + 1.e-15 |
| 284 | DO jk = 1, nksr + 1 |
| 285 | - DO jj = 2, jpjm1 ! Separation in R-G-B depending of the surface Chl |
| 286 | - DO ji = 2, jpim1 |
| 287 | - zchl = MIN( 10. , MAX( 0.03, sf_chl(1)%fnow(ji,jj,1) ) ) |
| 288 | - zCtot = 40.6 * zchl**0.459 |
| 289 | - zze = 568.2 * zCtot**(-0.746) |
| 290 | - IF( zze > 102. ) zze = 200.0 * zCtot**(-0.293) |
| 291 | - zpsi = gdepw(ji,jj,jk,Kmm) / zze |
| 292 | - ! |
| 293 | - zlogc = LOG( zchl ) |
| 294 | - zlogc2 = zlogc * zlogc |
| 295 | - zlogc3 = zlogc * zlogc * zlogc |
| 296 | - zCb = 0.768 + 0.087 * zlogc - 0.179 * zlogc2 - 0.025 * zlogc3 |
| 297 | - zCmax = 0.299 - 0.289 * zlogc + 0.579 * zlogc2 |
| 298 | - zpsimax = 0.6 - 0.640 * zlogc + 0.021 * zlogc2 + 0.115 * zlogc3 |
| 299 | - zdelpsi = 0.710 + 0.159 * zlogc + 0.021 * zlogc2 |
| 300 | - zCze = 1.12 * (zchl)**0.803 |
| 301 | - ! |
| 302 | - zchl3d(ji,jj,jk) = zCze * ( zCb + zCmax * EXP( -( (zpsi - zpsimax) / zdelpsi )**2 ) ) |
| 303 | - END DO |
| 304 | - ! |
| 305 | - END DO |
| 306 | + ztmp3d(:,:,jk) = zlui |
| 307 | END DO |
| 308 | - ELSE !* constant chrlorophyll |
| 309 | - DO jk = 1, nksr + 1 |
| 310 | - zchl3d(:,:,jk) = 0.05 |
| 311 | - ENDDO |
| 312 | ENDIF |
| 313 | ! |
| 314 | zcoef = ( 1. - rn_abs ) / 3._wp !* surface equi-partition in R-G-B |
| 315 | DO_2D_00_00 |
| 316 | - ze0(ji,jj,1) = rn_abs * qsr(ji,jj) |
| 317 | - ze1(ji,jj,1) = zcoef * qsr(ji,jj) |
| 318 | - ze2(ji,jj,1) = zcoef * qsr(ji,jj) |
| 319 | - ze3(ji,jj,1) = zcoef * qsr(ji,jj) |
| 320 | - zea(ji,jj,1) = qsr(ji,jj) |
| 321 | + ze0(ji,jj) = rn_abs * qsr(ji,jj) |
| 322 | + ze1(ji,jj) = zcoef * qsr(ji,jj) |
| 323 | + ze2(ji,jj) = zcoef * qsr(ji,jj) |
| 324 | + ze3(ji,jj) = zcoef * qsr(ji,jj) |
| 325 | + ! store the surface SW radiation; re-use the surface ztmp3d array |
| 326 | + ! since the surface attenuation coefficient is not used |
| 327 | + ztmp3d(ji,jj,1) = qsr(ji,jj) |
| 328 | END_2D |
| 329 | ! |
| 330 | - DO jk = 2, nksr+1 !* interior equi-partition in R-G-B depending of vertical profile of Chl |
| 331 | - DO_2D_00_00 |
| 332 | - zchl = MIN( 10. , MAX( 0.03, zchl3d(ji,jj,jk) ) ) |
| 333 | - irgb = NINT( 41 + 20.*LOG10(zchl) + 1.e-15 ) |
| 334 | - zekb(ji,jj) = rkrgb(1,irgb) |
| 335 | - zekg(ji,jj) = rkrgb(2,irgb) |
| 336 | - zekr(ji,jj) = rkrgb(3,irgb) |
| 337 | - END_2D |
| 338 | - |
| 339 | - DO_2D_00_00 |
| 340 | - zc0 = ze0(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * xsi0r ) |
| 341 | - zc1 = ze1(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * zekb(ji,jj) ) |
| 342 | - zc2 = ze2(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * zekg(ji,jj) ) |
| 343 | - zc3 = ze3(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * zekr(ji,jj) ) |
| 344 | - ze0(ji,jj,jk) = zc0 |
| 345 | - ze1(ji,jj,jk) = zc1 |
| 346 | - ze2(ji,jj,jk) = zc2 |
| 347 | - ze3(ji,jj,jk) = zc3 |
| 348 | - zea(ji,jj,jk) = ( zc0 + zc1 + zc2 + zc3 ) * wmask(ji,jj,jk) |
| 349 | - END_2D |
| 350 | - END DO |
| 351 | + !* interior equi-partition in R-G-B depending on vertical profile of Chl |
| 352 | + DO_3D_00_00 ( 2, nksr + 1 ) |
| 353 | + ze3t = e3t(ji,jj,jk-1,Kmm) |
| 354 | + irgb = NINT( ztmp3d(ji,jj,jk) ) |
| 355 | + zc0 = ze0(ji,jj) * EXP( - ze3t * xsi0r ) |
| 356 | + zc1 = ze1(ji,jj) * EXP( - ze3t * rkrgb(1,irgb) ) |
| 357 | + zc2 = ze2(ji,jj) * EXP( - ze3t * rkrgb(2,irgb) ) |
| 358 | + zc3 = ze3(ji,jj) * EXP( - ze3t * rkrgb(3,irgb) ) |
| 359 | + ze0(ji,jj) = zc0 |
| 360 | + ze1(ji,jj) = zc1 |
| 361 | + ze2(ji,jj) = zc2 |
| 362 | + ze3(ji,jj) = zc3 |
| 363 | + ztmp3d(ji,jj,jk) = ( zc0 + zc1 + zc2 + zc3 ) * wmask(ji,jj,jk) |
| 364 | + END_3D |
| 365 | ! |
| 366 | DO_3D_00_00( 1, nksr ) |
| 367 | - qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( zea(ji,jj,jk) - zea(ji,jj,jk+1) ) |
| 368 | + qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( ztmp3d(ji,jj,jk) - ztmp3d(ji,jj,jk+1) ) |
| 369 | END_3D |
| 370 | ! |
| 371 | - DEALLOCATE( zekb , zekg , zekr , ze0 , ze1 , ze2 , ze3 , zea , zchl3d ) |
| 372 | + DEALLOCATE( ze0 , ze1 , ze2 , ze3 , ztmp3d ) |
| 373 | ! |
| 374 | CASE( np_2BD ) !== 2-bands fluxes ==! |
| 375 | ! |
| 376 | }}} |
| 377 | |
| 378 | === Earlier Investigations |