New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
Changeset 12340 for NEMO/branches/2019/dev_r11943_MERGE_2019/src/OCE/DYN/sshwzv.F90 – NEMO

Ignore:
Timestamp:
2020-01-27T15:31:53+01:00 (4 years ago)
Author:
acc
Message:

Branch 2019/dev_r11943_MERGE_2019. This commit introduces basic do loop macro
substitution to the 2019 option 1, merge branch. These changes have been SETTE
tested. The only addition is the do_loop_substitute.h90 file in the OCE directory but
the macros defined therein are used throughout the code to replace identifiable, 2D-
and 3D- nested loop opening and closing statements with single-line alternatives. Code
indents are also adjusted accordingly.

The following explanation is taken from comments in the new header file:

This header file contains preprocessor definitions and macros used in the do-loop
substitutions introduced between version 4.0 and 4.2. The primary aim of these macros
is to assist in future applications of tiling to improve performance. This is expected
to be achieved by alternative versions of these macros in selected locations. The
initial introduction of these macros simply replaces all identifiable nested 2D- and
3D-loops with single line statements (and adjusts indenting accordingly). Do loops
are identifiable if they comform to either:

DO jk = ....

DO jj = .... DO jj = ...

DO ji = .... DO ji = ...
. OR .
. .

END DO END DO

END DO END DO

END DO

and white-space variants thereof.

Additionally, only loops with recognised jj and ji loops limits are treated; these are:
Lower limits of 1, 2 or fs_2
Upper limits of jpi, jpim1 or fs_jpim1 (for ji) or jpj, jpjm1 or fs_jpjm1 (for jj)

The macro naming convention takes the form: DO_2D_BT_LR where:

B is the Bottom offset from the PE's inner domain;
T is the Top offset from the PE's inner domain;
L is the Left offset from the PE's inner domain;
R is the Right offset from the PE's inner domain

So, given an inner domain of 2,jpim1 and 2,jpjm1, a typical example would replace:

DO jj = 2, jpj

DO ji = 1, jpim1
.
.

END DO

END DO

with:

DO_2D_01_10
.
.
END_2D

similar conventions apply to the 3D loops macros. jk loop limits are retained
through macro arguments and are not restricted. This includes the possibility of
strides for which an extra set of DO_3DS macros are defined.

In the example definition below the inner PE domain is defined by start indices of
(kIs, kJs) and end indices of (kIe, KJe)

#define DO_2D_00_00 DO jj = kJs, kJe ; DO ji = kIs, kIe
#define END_2D END DO ; END DO

TO DO:


Only conventional nested loops have been identified and replaced by this step. There are constructs such as:

DO jk = 2, jpkm1

z2d(:,:) = z2d(:,:) + e3w(:,:,jk,Kmm) * z3d(:,:,jk) * wmask(:,:,jk)

END DO

which may need to be considered.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • NEMO/branches/2019/dev_r11943_MERGE_2019/src/OCE/DYN/sshwzv.F90

    r12236 r12340  
    5050   !! * Substitutions 
    5151#  include "vectopt_loop_substitute.h90" 
     52#  include "do_loop_substitute.h90" 
    5253   !!---------------------------------------------------------------------- 
    5354   !! NEMO/OCE 4.0 , NEMO Consortium (2018) 
     
    177178            ! horizontal divergence of thickness diffusion transport ( velocity multiplied by e3t) 
    178179            ! - ML - note: computation already done in dom_vvl_sf_nxt. Could be optimized (not critical and clearer this way) 
    179             DO jj = 2, jpjm1 
    180                DO ji = fs_2, fs_jpim1   ! vector opt. 
    181                   zhdiv(ji,jj,jk) = r1_e1e2t(ji,jj) * ( un_td(ji,jj,jk) - un_td(ji-1,jj,jk) + vn_td(ji,jj,jk) - vn_td(ji,jj-1,jk) ) 
    182                END DO 
    183             END DO 
     180            DO_2D_00_00 
     181               zhdiv(ji,jj,jk) = r1_e1e2t(ji,jj) * ( un_td(ji,jj,jk) - un_td(ji-1,jj,jk) + vn_td(ji,jj,jk) - vn_td(ji,jj-1,jk) ) 
     182            END_2D 
    184183         END DO 
    185184         CALL lbc_lnk('sshwzv', zhdiv, 'T', 1.)  ! - ML - Perhaps not necessary: not used for horizontal "connexions" 
     
    311310      ! Calculate Courant numbers 
    312311      IF( ln_vvl_ztilde .OR. ln_vvl_layer ) THEN 
    313          DO jk = 1, jpkm1 
    314             DO jj = 2, jpjm1 
    315                DO ji = 2, fs_jpim1   ! vector opt. 
    316                   z1_e3t = 1._wp / e3t(ji,jj,jk,Kmm) 
    317                   ! 2*rdt and not r2dt (for restartability) 
    318                   Cu_adv(ji,jj,jk) = 2._wp * rdt * ( ( MAX( ww(ji,jj,jk) , 0._wp ) - MIN( ww(ji,jj,jk+1) , 0._wp ) )                       &   
    319                      &                             + ( MAX( e2u(ji  ,jj)*e3u(ji  ,jj,jk,Kmm)*uu(ji  ,jj,jk,Kmm) + un_td(ji  ,jj,jk), 0._wp ) -   & 
    320                      &                                 MIN( e2u(ji-1,jj)*e3u(ji-1,jj,jk,Kmm)*uu(ji-1,jj,jk,Kmm) + un_td(ji-1,jj,jk), 0._wp ) )   & 
    321                      &                               * r1_e1e2t(ji,jj)                                                                     & 
    322                      &                             + ( MAX( e1v(ji,jj  )*e3v(ji,jj  ,jk,Kmm)*vv(ji,jj  ,jk,Kmm) + vn_td(ji,jj  ,jk), 0._wp ) -   & 
    323                      &                                 MIN( e1v(ji,jj-1)*e3v(ji,jj-1,jk,Kmm)*vv(ji,jj-1,jk,Kmm) + vn_td(ji,jj-1,jk), 0._wp ) )   & 
    324                      &                               * r1_e1e2t(ji,jj)                                                                     & 
    325                      &                             ) * z1_e3t 
    326                END DO 
    327             END DO 
    328          END DO 
     312         DO_3D_00_00( 1, jpkm1 ) 
     313            z1_e3t = 1._wp / e3t(ji,jj,jk,Kmm) 
     314            ! 2*rdt and not r2dt (for restartability) 
     315            Cu_adv(ji,jj,jk) = 2._wp * rdt * ( ( MAX( ww(ji,jj,jk) , 0._wp ) - MIN( ww(ji,jj,jk+1) , 0._wp ) )                       &   
     316               &                             + ( MAX( e2u(ji  ,jj)*e3u(ji  ,jj,jk,Kmm)*uu(ji  ,jj,jk,Kmm) + un_td(ji  ,jj,jk), 0._wp ) -   & 
     317               &                                 MIN( e2u(ji-1,jj)*e3u(ji-1,jj,jk,Kmm)*uu(ji-1,jj,jk,Kmm) + un_td(ji-1,jj,jk), 0._wp ) )   & 
     318               &                               * r1_e1e2t(ji,jj)                                                                     & 
     319               &                             + ( MAX( e1v(ji,jj  )*e3v(ji,jj  ,jk,Kmm)*vv(ji,jj  ,jk,Kmm) + vn_td(ji,jj  ,jk), 0._wp ) -   & 
     320               &                                 MIN( e1v(ji,jj-1)*e3v(ji,jj-1,jk,Kmm)*vv(ji,jj-1,jk,Kmm) + vn_td(ji,jj-1,jk), 0._wp ) )   & 
     321               &                               * r1_e1e2t(ji,jj)                                                                     & 
     322               &                             ) * z1_e3t 
     323         END_3D 
    329324      ELSE 
    330          DO jk = 1, jpkm1 
    331             DO jj = 2, jpjm1 
    332                DO ji = 2, fs_jpim1   ! vector opt. 
    333                   z1_e3t = 1._wp / e3t(ji,jj,jk,Kmm) 
    334                   ! 2*rdt and not r2dt (for restartability) 
    335                   Cu_adv(ji,jj,jk) = 2._wp * rdt * ( ( MAX( ww(ji,jj,jk) , 0._wp ) - MIN( ww(ji,jj,jk+1) , 0._wp ) )   &  
    336                      &                             + ( MAX( e2u(ji  ,jj)*e3u(ji  ,jj,jk,Kmm)*uu(ji  ,jj,jk,Kmm), 0._wp ) -   & 
    337                      &                                 MIN( e2u(ji-1,jj)*e3u(ji-1,jj,jk,Kmm)*uu(ji-1,jj,jk,Kmm), 0._wp ) )   & 
    338                      &                               * r1_e1e2t(ji,jj)                                                 & 
    339                      &                             + ( MAX( e1v(ji,jj  )*e3v(ji,jj  ,jk,Kmm)*vv(ji,jj  ,jk,Kmm), 0._wp ) -   & 
    340                      &                                 MIN( e1v(ji,jj-1)*e3v(ji,jj-1,jk,Kmm)*vv(ji,jj-1,jk,Kmm), 0._wp ) )   & 
    341                      &                               * r1_e1e2t(ji,jj)                                                 & 
    342                      &                             ) * z1_e3t 
    343                END DO 
    344             END DO 
    345          END DO 
     325         DO_3D_00_00( 1, jpkm1 ) 
     326            z1_e3t = 1._wp / e3t(ji,jj,jk,Kmm) 
     327            ! 2*rdt and not r2dt (for restartability) 
     328            Cu_adv(ji,jj,jk) = 2._wp * rdt * ( ( MAX( ww(ji,jj,jk) , 0._wp ) - MIN( ww(ji,jj,jk+1) , 0._wp ) )   &  
     329               &                             + ( MAX( e2u(ji  ,jj)*e3u(ji  ,jj,jk,Kmm)*uu(ji  ,jj,jk,Kmm), 0._wp ) -   & 
     330               &                                 MIN( e2u(ji-1,jj)*e3u(ji-1,jj,jk,Kmm)*uu(ji-1,jj,jk,Kmm), 0._wp ) )   & 
     331               &                               * r1_e1e2t(ji,jj)                                                 & 
     332               &                             + ( MAX( e1v(ji,jj  )*e3v(ji,jj  ,jk,Kmm)*vv(ji,jj  ,jk,Kmm), 0._wp ) -   & 
     333               &                                 MIN( e1v(ji,jj-1)*e3v(ji,jj-1,jk,Kmm)*vv(ji,jj-1,jk,Kmm), 0._wp ) )   & 
     334               &                               * r1_e1e2t(ji,jj)                                                 & 
     335               &                             ) * z1_e3t 
     336         END_3D 
    346337      ENDIF 
    347338      CALL lbc_lnk( 'sshwzv', Cu_adv, 'T', 1. ) 
     
    350341      ! 
    351342      IF( MAXVAL( Cu_adv(:,:,:) ) > Cu_min ) THEN       ! Quick check if any breaches anywhere 
    352          DO jk = jpkm1, 2, -1                           ! or scan Courant criterion and partition 
    353             DO jj = 1, jpj                              ! w where necessary 
    354                DO ji = 1, jpi 
    355                   ! 
    356                   zCu = MAX( Cu_adv(ji,jj,jk) , Cu_adv(ji,jj,jk-1) ) 
     343         DO_3DS_11_11( jpkm1, 2, -1 ) 
     344            ! 
     345            zCu = MAX( Cu_adv(ji,jj,jk) , Cu_adv(ji,jj,jk-1) ) 
    357346! alt: 
    358347!                  IF ( ww(ji,jj,jk) > 0._wp ) THEN  
     
    361350!                     zCu =  Cu_adv(ji,jj,jk-1) 
    362351!                  ENDIF  
    363                   ! 
    364                   IF( zCu <= Cu_min ) THEN              !<-- Fully explicit 
    365                      zcff = 0._wp 
    366                   ELSEIF( zCu < Cu_cut ) THEN           !<-- Mixed explicit 
    367                      zcff = ( zCu - Cu_min )**2 
    368                      zcff = zcff / ( Fcu + zcff ) 
    369                   ELSE                                  !<-- Mostly implicit 
    370                      zcff = ( zCu - Cu_max )/ zCu 
    371                   ENDIF 
    372                   zcff = MIN(1._wp, zcff) 
    373                   ! 
    374                   wi(ji,jj,jk) =           zcff   * ww(ji,jj,jk) 
    375                   ww(ji,jj,jk) = ( 1._wp - zcff ) * ww(ji,jj,jk) 
    376                   ! 
    377                   Cu_adv(ji,jj,jk) = zcff               ! Reuse array to output coefficient below and in stp_ctl 
    378                END DO 
    379             END DO 
    380          END DO 
     352            ! 
     353            IF( zCu <= Cu_min ) THEN              !<-- Fully explicit 
     354               zcff = 0._wp 
     355            ELSEIF( zCu < Cu_cut ) THEN           !<-- Mixed explicit 
     356               zcff = ( zCu - Cu_min )**2 
     357               zcff = zcff / ( Fcu + zcff ) 
     358            ELSE                                  !<-- Mostly implicit 
     359               zcff = ( zCu - Cu_max )/ zCu 
     360            ENDIF 
     361            zcff = MIN(1._wp, zcff) 
     362            ! 
     363            wi(ji,jj,jk) =           zcff   * ww(ji,jj,jk) 
     364            ww(ji,jj,jk) = ( 1._wp - zcff ) * ww(ji,jj,jk) 
     365            ! 
     366            Cu_adv(ji,jj,jk) = zcff               ! Reuse array to output coefficient below and in stp_ctl 
     367         END_3D 
    381368         Cu_adv(:,:,1) = 0._wp  
    382369      ELSE 
Note: See TracChangeset for help on using the changeset viewer.