| 253 | == Option 2 revisited == |
| 254 | |
| 255 | Following discussions with the previewer, it was decided that low-memory option should be the best approach but the slight deterioration in performance over the original code may be down to the over-zealous replacement of temporary scalars within the second 3D loop. On reflection there are also opportunities to reduce the number of floating point operations and load and store instructions within the first 3D loop. Here is the final set of differences between this improved low-memory solution and the original traqsr.F90: |
| 256 | |
| 257 | {{{#!diff |
| 258 | *** ORG/traqsr.F90 2020-05-13 11:37:57.094258396 +0100 |
| 259 | --- traqsr.F90 2020-05-15 14:48:00.138206859 +0100 |
| 260 | *************** |
| 261 | *** 109,120 **** |
| 262 | REAL(wp) :: zchl, zcoef, z1_2 ! local scalars |
| 263 | REAL(wp) :: zc0 , zc1 , zc2 , zc3 ! - - |
| 264 | REAL(wp) :: zzc0, zzc1, zzc2, zzc3 ! - - |
| 265 | ! REAL(wp) :: zz0 , zz1 ! - - |
| 266 | REAL(wp) :: zCb, zCmax, zze, zpsi, zpsimax, zdelpsi, zCtot, zCze |
| 267 | ! REAL(wp) :: zlogc, zlogc2, zlogc3 |
| 268 | ! REAL(wp), ALLOCATABLE, DIMENSION(:,:) :: zekb, zekg, zekr |
| 269 | ! REAL(wp), ALLOCATABLE, DIMENSION(:,:,:) :: ze0, ze1, ze2, ze3, zea, ztrdt |
| 270 | ! REAL(wp), ALLOCATABLE, DIMENSION(:,:,:) :: zetot, zchl3d |
| 271 | !!---------------------------------------------------------------------- |
| 272 | ! |
| 273 | IF( ln_timing ) CALL timing_start('tra_qsr') |
| 274 | --- 109,119 ---- |
| 275 | REAL(wp) :: zchl, zcoef, z1_2 ! local scalars |
| 276 | REAL(wp) :: zc0 , zc1 , zc2 , zc3 ! - - |
| 277 | REAL(wp) :: zzc0, zzc1, zzc2, zzc3 ! - - |
| 278 | ! REAL(wp) :: zz0 , zz1 , ze3t, zlui ! - - |
| 279 | REAL(wp) :: zCb, zCmax, zze, zpsi, zpsimax, zdelpsi, zCtot, zCze |
| 280 | ! REAL(wp) :: zlogc |
| 281 | ! REAL(wp), ALLOCATABLE, DIMENSION(:,:) :: ze0, ze1, ze2, ze3 |
| 282 | ! REAL(wp), ALLOCATABLE, DIMENSION(:,:,:) :: ztrdt, zetot, ztmp3d |
| 283 | !!---------------------------------------------------------------------- |
| 284 | ! |
| 285 | IF( ln_timing ) CALL timing_start('tra_qsr') |
| 286 | *************** |
| 287 | *** 159,235 **** |
| 288 | ! |
| 289 | CASE( np_RGB , np_RGBc ) !== R-G-B fluxes ==! |
| 290 | ! |
| 291 | ! ALLOCATE( zekb(jpi,jpj) , zekg(jpi,jpj) , zekr (jpi,jpj) , & |
| 292 | ! & ze0 (jpi,jpj,jpk) , ze1 (jpi,jpj,jpk) , ze2 (jpi,jpj,jpk) , & |
| 293 | ! & ze3 (jpi,jpj,jpk) , zea (jpi,jpj,jpk) , zchl3d(jpi,jpj,jpk) ) |
| 294 | ! |
| 295 | IF( nqsr == np_RGBc ) THEN !* Variable Chlorophyll |
| 296 | CALL fld_read( kt, 1, sf_chl ) ! Read Chl data and provides it at the current time step |
| 297 | DO jk = 1, nksr + 1 |
| 298 | ! DO jj = 2, jpjm1 ! Separation in R-G-B depending of the surface Chl |
| 299 | ! DO ji = 2, jpim1 |
| 300 | ! zchl = MIN( 10. , MAX( 0.03, sf_chl(1)%fnow(ji,jj,1) ) ) |
| 301 | ! zCtot = 40.6 * zchl**0.459 |
| 302 | ! zze = 568.2 * zCtot**(-0.746) |
| 303 | ! IF( zze > 102. ) zze = 200.0 * zCtot**(-0.293) |
| 304 | ! zpsi = gdepw(ji,jj,jk,Kmm) / zze |
| 305 | ! ! |
| 306 | ! zlogc = LOG( zchl ) |
| 307 | ! zlogc2 = zlogc * zlogc |
| 308 | ! zlogc3 = zlogc * zlogc * zlogc |
| 309 | ! zCb = 0.768 + 0.087 * zlogc - 0.179 * zlogc2 - 0.025 * zlogc3 |
| 310 | ! zCmax = 0.299 - 0.289 * zlogc + 0.579 * zlogc2 |
| 311 | ! zpsimax = 0.6 - 0.640 * zlogc + 0.021 * zlogc2 + 0.115 * zlogc3 |
| 312 | ! zdelpsi = 0.710 + 0.159 * zlogc + 0.021 * zlogc2 |
| 313 | ! zCze = 1.12 * (zchl)**0.803 |
| 314 | ! ! |
| 315 | ! zchl3d(ji,jj,jk) = zCze * ( zCb + zCmax * EXP( -( (zpsi - zpsimax) / zdelpsi )**2 ) ) |
| 316 | ! END DO |
| 317 | ! ! |
| 318 | ! END DO |
| 319 | END DO |
| 320 | - ELSE !* constant chrlorophyll |
| 321 | - DO jk = 1, nksr + 1 |
| 322 | - zchl3d(:,:,jk) = 0.05 |
| 323 | - ENDDO |
| 324 | ENDIF |
| 325 | ! |
| 326 | zcoef = ( 1. - rn_abs ) / 3._wp !* surface equi-partition in R-G-B |
| 327 | DO_2D_00_00 |
| 328 | ! ze0(ji,jj,1) = rn_abs * qsr(ji,jj) |
| 329 | ! ze1(ji,jj,1) = zcoef * qsr(ji,jj) |
| 330 | ! ze2(ji,jj,1) = zcoef * qsr(ji,jj) |
| 331 | ! ze3(ji,jj,1) = zcoef * qsr(ji,jj) |
| 332 | ! zea(ji,jj,1) = qsr(ji,jj) |
| 333 | END_2D |
| 334 | ! |
| 335 | ! DO jk = 2, nksr+1 !* interior equi-partition in R-G-B depending of vertical profile of Chl |
| 336 | ! DO_2D_00_00 |
| 337 | ! zchl = MIN( 10. , MAX( 0.03, zchl3d(ji,jj,jk) ) ) |
| 338 | ! irgb = NINT( 41 + 20.*LOG10(zchl) + 1.e-15 ) |
| 339 | ! zekb(ji,jj) = rkrgb(1,irgb) |
| 340 | ! zekg(ji,jj) = rkrgb(2,irgb) |
| 341 | ! zekr(ji,jj) = rkrgb(3,irgb) |
| 342 | ! END_2D |
| 343 | ! |
| 344 | ! DO_2D_00_00 |
| 345 | ! zc0 = ze0(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * xsi0r ) |
| 346 | ! zc1 = ze1(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * zekb(ji,jj) ) |
| 347 | ! zc2 = ze2(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * zekg(ji,jj) ) |
| 348 | ! zc3 = ze3(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * zekr(ji,jj) ) |
| 349 | ! ze0(ji,jj,jk) = zc0 |
| 350 | ! ze1(ji,jj,jk) = zc1 |
| 351 | ! ze2(ji,jj,jk) = zc2 |
| 352 | ! ze3(ji,jj,jk) = zc3 |
| 353 | ! zea(ji,jj,jk) = ( zc0 + zc1 + zc2 + zc3 ) * wmask(ji,jj,jk) |
| 354 | ! END_2D |
| 355 | ! END DO |
| 356 | ! |
| 357 | DO_3D_00_00( 1, nksr ) |
| 358 | ! qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( zea(ji,jj,jk) - zea(ji,jj,jk+1) ) |
| 359 | END_3D |
| 360 | ! |
| 361 | ! DEALLOCATE( zekb , zekg , zekr , ze0 , ze1 , ze2 , ze3 , zea , zchl3d ) |
| 362 | ! |
| 363 | CASE( np_2BD ) !== 2-bands fluxes ==! |
| 364 | ! |
| 365 | --- 158,232 ---- |
| 366 | ! |
| 367 | CASE( np_RGB , np_RGBc ) !== R-G-B fluxes ==! |
| 368 | ! |
| 369 | ! ALLOCATE( ze0 (jpi,jpj) , ze1 (jpi,jpj) , & |
| 370 | ! & ze2 (jpi,jpj) , ze3 (jpi,jpj) , & |
| 371 | ! & ztmp3d(jpi,jpj,nksr + 1) ) |
| 372 | ! |
| 373 | IF( nqsr == np_RGBc ) THEN !* Variable Chlorophyll |
| 374 | CALL fld_read( kt, 1, sf_chl ) ! Read Chl data and provides it at the current time step |
| 375 | + ! Separation in R-G-B depending of the surface Chl |
| 376 | + DO_3D_00_00 ( 1, nksr + 1 ) |
| 377 | + zchl = MIN( 10. , MAX( 0.03, sf_chl(1)%fnow(ji,jj,1) ) ) |
| 378 | + zCze = 1.12 * (zchl)**0.803 |
| 379 | + zCtot = 40.6 * zchl**0.459 |
| 380 | + zlogc = LOG( zchl ) |
| 381 | + ! |
| 382 | + zCb = 0.768 + zlogc * ( 0.087 - zlogc * ( 0.179 + zlogc * 0.025 ) ) |
| 383 | + zCmax = 0.299 - zlogc * ( 0.289 - zlogc * 0.579 ) |
| 384 | + zpsimax = 0.6 - zlogc * ( 0.640 - zlogc * ( 0.021 + zlogc * 0.115 ) ) |
| 385 | + zdelpsi = 0.710 + zlogc * ( 0.159 + zlogc * 0.021 ) |
| 386 | + ! |
| 387 | + zze = 568.2 * zCtot**(-0.746) |
| 388 | + IF( zze > 102. ) zze = 200.0 * zCtot**(-0.293) |
| 389 | + zpsi = gdepw(ji,jj,jk,Kmm) / zze |
| 390 | + ! |
| 391 | + ! NB. make sure zchl value is such that: zchl = MIN( 10. , MAX( 0.03, zchl ) ) |
| 392 | + zchl = MIN( 10. , MAX( 0.03, zCze * ( zCb + zCmax * EXP( -( (zpsi - zpsimax) / zdelpsi )**2 ) ) ) ) |
| 393 | + ! Convert chlorophyll value to attenuation coefficient look-up table index |
| 394 | + ztmp3d(ji,jj,jk) = 41 + 20.*LOG10(zchl) + 1.e-15 |
| 395 | + END_3D |
| 396 | + ELSE !* constant chlorophyll |
| 397 | + zchl = 0.05 |
| 398 | + ! NB. make sure constant value is such that: |
| 399 | + zchl = MIN( 10. , MAX( 0.03, zchl ) ) |
| 400 | + ! Convert chlorophyll value to attenuation coefficient look-up table index |
| 401 | + zlui = 41 + 20.*LOG10(zchl) + 1.e-15 |
| 402 | DO jk = 1, nksr + 1 |
| 403 | ! ztmp3d(:,:,jk) = zlui |
| 404 | END DO |
| 405 | ENDIF |
| 406 | ! |
| 407 | zcoef = ( 1. - rn_abs ) / 3._wp !* surface equi-partition in R-G-B |
| 408 | DO_2D_00_00 |
| 409 | ! ze0(ji,jj) = rn_abs * qsr(ji,jj) |
| 410 | ! ze1(ji,jj) = zcoef * qsr(ji,jj) |
| 411 | ! ze2(ji,jj) = zcoef * qsr(ji,jj) |
| 412 | ! ze3(ji,jj) = zcoef * qsr(ji,jj) |
| 413 | ! ! store the surface SW radiation; re-use the surface ztmp3d array |
| 414 | ! ! since the surface attenuation coefficient is not used |
| 415 | ! ztmp3d(ji,jj,1) = qsr(ji,jj) |
| 416 | END_2D |
| 417 | ! |
| 418 | ! !* interior equi-partition in R-G-B depending of vertical profile of Chl |
| 419 | ! DO_3D_00_00 ( 2, nksr + 1 ) |
| 420 | ! ze3t = e3t(ji,jj,jk-1,Kmm) |
| 421 | ! irgb = NINT( ztmp3d(ji,jj,jk) ) |
| 422 | ! zc0 = ze0(ji,jj) * EXP( - ze3t * xsi0r ) |
| 423 | ! zc1 = ze1(ji,jj) * EXP( - ze3t * rkrgb(1,irgb) ) |
| 424 | ! zc2 = ze2(ji,jj) * EXP( - ze3t * rkrgb(2,irgb) ) |
| 425 | ! zc3 = ze3(ji,jj) * EXP( - ze3t * rkrgb(3,irgb) ) |
| 426 | ! ze0(ji,jj) = zc0 |
| 427 | ! ze1(ji,jj) = zc1 |
| 428 | ! ze2(ji,jj) = zc2 |
| 429 | ! ze3(ji,jj) = zc3 |
| 430 | ! ztmp3d(ji,jj,jk) = ( zc0 + zc1 + zc2 + zc3 ) * wmask(ji,jj,jk) |
| 431 | ! END_3D |
| 432 | ! |
| 433 | DO_3D_00_00( 1, nksr ) |
| 434 | ! qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( ztmp3d(ji,jj,jk) - ztmp3d(ji,jj,jk+1) ) |
| 435 | END_3D |
| 436 | ! |
| 437 | ! DEALLOCATE( ze0 , ze1 , ze2 , ze3 , ztmp3d ) |
| 438 | ! |
| 439 | CASE( np_2BD ) !== 2-bands fluxes ==! |
| 440 | ! |
| 441 | }}} |