New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
Changeset 12340 for NEMO/branches/2019/dev_r11943_MERGE_2019/src/OCE/TRA/traadv_cen.F90 – NEMO

Ignore:
Timestamp:
2020-01-27T15:31:53+01:00 (4 years ago)
Author:
acc
Message:

Branch 2019/dev_r11943_MERGE_2019. This commit introduces basic do loop macro
substitution to the 2019 option 1, merge branch. These changes have been SETTE
tested. The only addition is the do_loop_substitute.h90 file in the OCE directory but
the macros defined therein are used throughout the code to replace identifiable, 2D-
and 3D- nested loop opening and closing statements with single-line alternatives. Code
indents are also adjusted accordingly.

The following explanation is taken from comments in the new header file:

This header file contains preprocessor definitions and macros used in the do-loop
substitutions introduced between version 4.0 and 4.2. The primary aim of these macros
is to assist in future applications of tiling to improve performance. This is expected
to be achieved by alternative versions of these macros in selected locations. The
initial introduction of these macros simply replaces all identifiable nested 2D- and
3D-loops with single line statements (and adjusts indenting accordingly). Do loops
are identifiable if they comform to either:

DO jk = ....

DO jj = .... DO jj = ...

DO ji = .... DO ji = ...
. OR .
. .

END DO END DO

END DO END DO

END DO

and white-space variants thereof.

Additionally, only loops with recognised jj and ji loops limits are treated; these are:
Lower limits of 1, 2 or fs_2
Upper limits of jpi, jpim1 or fs_jpim1 (for ji) or jpj, jpjm1 or fs_jpjm1 (for jj)

The macro naming convention takes the form: DO_2D_BT_LR where:

B is the Bottom offset from the PE's inner domain;
T is the Top offset from the PE's inner domain;
L is the Left offset from the PE's inner domain;
R is the Right offset from the PE's inner domain

So, given an inner domain of 2,jpim1 and 2,jpjm1, a typical example would replace:

DO jj = 2, jpj

DO ji = 1, jpim1
.
.

END DO

END DO

with:

DO_2D_01_10
.
.
END_2D

similar conventions apply to the 3D loops macros. jk loop limits are retained
through macro arguments and are not restricted. This includes the possibility of
strides for which an extra set of DO_3DS macros are defined.

In the example definition below the inner PE domain is defined by start indices of
(kIs, kJs) and end indices of (kIe, KJe)

#define DO_2D_00_00 DO jj = kJs, kJe ; DO ji = kIs, kIe
#define END_2D END DO ; END DO

TO DO:


Only conventional nested loops have been identified and replaced by this step. There are constructs such as:

DO jk = 2, jpkm1

z2d(:,:) = z2d(:,:) + e3w(:,:,jk,Kmm) * z3d(:,:,jk) * wmask(:,:,jk)

END DO

which may need to be considered.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • NEMO/branches/2019/dev_r11943_MERGE_2019/src/OCE/TRA/traadv_cen.F90

    r12193 r12340  
    3737   !! * Substitutions 
    3838#  include "vectopt_loop_substitute.h90" 
     39#  include "do_loop_substitute.h90" 
    3940   !!---------------------------------------------------------------------- 
    4041   !! NEMO/OCE 4.0 , NEMO Consortium (2018) 
     
    103104         ! 
    104105         CASE(  2  )                         !* 2nd order centered 
    105             DO jk = 1, jpkm1 
    106                DO jj = 1, jpjm1 
    107                   DO ji = 1, fs_jpim1   ! vector opt. 
    108                      zwx(ji,jj,jk) = 0.5_wp * pU(ji,jj,jk) * ( pt(ji,jj,jk,jn,Kmm) + pt(ji+1,jj  ,jk,jn,Kmm) ) 
    109                      zwy(ji,jj,jk) = 0.5_wp * pV(ji,jj,jk) * ( pt(ji,jj,jk,jn,Kmm) + pt(ji  ,jj+1,jk,jn,Kmm) ) 
    110                   END DO 
    111                END DO 
    112             END DO 
     106            DO_3D_10_10( 1, jpkm1 ) 
     107               zwx(ji,jj,jk) = 0.5_wp * pU(ji,jj,jk) * ( pt(ji,jj,jk,jn,Kmm) + pt(ji+1,jj  ,jk,jn,Kmm) ) 
     108               zwy(ji,jj,jk) = 0.5_wp * pV(ji,jj,jk) * ( pt(ji,jj,jk,jn,Kmm) + pt(ji  ,jj+1,jk,jn,Kmm) ) 
     109            END_3D 
    113110            ! 
    114111         CASE(  4  )                         !* 4th order centered 
    115112            ztu(:,:,jpk) = 0._wp                   ! Bottom value : flux set to zero 
    116113            ztv(:,:,jpk) = 0._wp 
    117             DO jk = 1, jpkm1                       ! masked gradient 
    118                DO jj = 2, jpjm1 
    119                   DO ji = fs_2, fs_jpim1   ! vector opt. 
    120                      ztu(ji,jj,jk) = ( pt(ji+1,jj  ,jk,jn,Kmm) - pt(ji,jj,jk,jn,Kmm) ) * umask(ji,jj,jk) 
    121                      ztv(ji,jj,jk) = ( pt(ji  ,jj+1,jk,jn,Kmm) - pt(ji,jj,jk,jn,Kmm) ) * vmask(ji,jj,jk) 
    122                   END DO 
    123                END DO 
    124             END DO 
     114            DO_3D_00_00( 1, jpkm1 ) 
     115               ztu(ji,jj,jk) = ( pt(ji+1,jj  ,jk,jn,Kmm) - pt(ji,jj,jk,jn,Kmm) ) * umask(ji,jj,jk) 
     116               ztv(ji,jj,jk) = ( pt(ji  ,jj+1,jk,jn,Kmm) - pt(ji,jj,jk,jn,Kmm) ) * vmask(ji,jj,jk) 
     117            END_3D 
    125118            CALL lbc_lnk_multi( 'traadv_cen', ztu, 'U', -1. , ztv, 'V', -1. )   ! Lateral boundary cond. 
    126119            ! 
    127             DO jk = 1, jpkm1                       ! Horizontal advective fluxes 
    128                DO jj = 2, jpjm1 
    129                   DO ji = 1, fs_jpim1   ! vector opt. 
    130                      zC2t_u = pt(ji,jj,jk,jn,Kmm) + pt(ji+1,jj  ,jk,jn,Kmm)   ! C2 interpolation of T at u- & v-points (x2) 
    131                      zC2t_v = pt(ji,jj,jk,jn,Kmm) + pt(ji  ,jj+1,jk,jn,Kmm) 
    132                      !                                                  ! C4 interpolation of T at u- & v-points (x2) 
    133                      zC4t_u =  zC2t_u + r1_6 * ( ztu(ji-1,jj,jk) - ztu(ji+1,jj,jk) ) 
    134                      zC4t_v =  zC2t_v + r1_6 * ( ztv(ji,jj-1,jk) - ztv(ji,jj+1,jk) ) 
    135                      !                                                  ! C4 fluxes 
    136                      zwx(ji,jj,jk) =  0.5_wp * pU(ji,jj,jk) * zC4t_u 
    137                      zwy(ji,jj,jk) =  0.5_wp * pV(ji,jj,jk) * zC4t_v 
    138                   END DO 
    139                END DO 
    140             END DO          
     120            DO_3D_00_10( 1, jpkm1 ) 
     121               zC2t_u = pt(ji,jj,jk,jn,Kmm) + pt(ji+1,jj  ,jk,jn,Kmm)   ! C2 interpolation of T at u- & v-points (x2) 
     122               zC2t_v = pt(ji,jj,jk,jn,Kmm) + pt(ji  ,jj+1,jk,jn,Kmm) 
     123               !                                                  ! C4 interpolation of T at u- & v-points (x2) 
     124               zC4t_u =  zC2t_u + r1_6 * ( ztu(ji-1,jj,jk) - ztu(ji+1,jj,jk) ) 
     125               zC4t_v =  zC2t_v + r1_6 * ( ztv(ji,jj-1,jk) - ztv(ji,jj+1,jk) ) 
     126               !                                                  ! C4 fluxes 
     127               zwx(ji,jj,jk) =  0.5_wp * pU(ji,jj,jk) * zC4t_u 
     128               zwy(ji,jj,jk) =  0.5_wp * pV(ji,jj,jk) * zC4t_v 
     129            END_3D 
    141130            ! 
    142131         CASE DEFAULT 
     
    147136         ! 
    148137         CASE(  2  )                         !* 2nd order centered 
    149             DO jk = 2, jpk 
    150                DO jj = 2, jpjm1 
    151                   DO ji = fs_2, fs_jpim1   ! vector opt. 
    152                      zwz(ji,jj,jk) = 0.5 * pW(ji,jj,jk) * ( pt(ji,jj,jk,jn,Kmm) + pt(ji,jj,jk-1,jn,Kmm) ) * wmask(ji,jj,jk) 
    153                   END DO 
    154                END DO 
    155             END DO 
     138            DO_3D_00_00( 2, jpk ) 
     139               zwz(ji,jj,jk) = 0.5 * pW(ji,jj,jk) * ( pt(ji,jj,jk,jn,Kmm) + pt(ji,jj,jk-1,jn,Kmm) ) * wmask(ji,jj,jk) 
     140            END_3D 
    156141            ! 
    157142         CASE(  4  )                         !* 4th order compact 
    158143            CALL interp_4th_cpt( pt(:,:,:,jn,Kmm) , ztw )      ! ztw = interpolated value of T at w-point 
    159             DO jk = 2, jpkm1 
    160                DO jj = 2, jpjm1 
    161                   DO ji = fs_2, fs_jpim1 
    162                      zwz(ji,jj,jk) = pW(ji,jj,jk) * ztw(ji,jj,jk) * wmask(ji,jj,jk) 
    163                   END DO 
    164                END DO 
    165             END DO 
     144            DO_3D_00_00( 2, jpkm1 ) 
     145               zwz(ji,jj,jk) = pW(ji,jj,jk) * ztw(ji,jj,jk) * wmask(ji,jj,jk) 
     146            END_3D 
    166147            ! 
    167148         END SELECT 
     
    169150         IF( ln_linssh ) THEN                !* top value   (linear free surf. only as zwz is multiplied by wmask) 
    170151            IF( ln_isfcav ) THEN                  ! ice-shelf cavities (top of the ocean) 
    171                DO jj = 1, jpj 
    172                   DO ji = 1, jpi 
    173                      zwz(ji,jj, mikt(ji,jj) ) = pW(ji,jj,mikt(ji,jj)) * pt(ji,jj,mikt(ji,jj),jn,Kmm)  
    174                   END DO 
    175                END DO    
     152               DO_2D_11_11 
     153                  zwz(ji,jj, mikt(ji,jj) ) = pW(ji,jj,mikt(ji,jj)) * pt(ji,jj,mikt(ji,jj),jn,Kmm)  
     154               END_2D 
    176155            ELSE                                   ! no ice-shelf cavities (only ocean surface) 
    177156               zwz(:,:,1) = pW(:,:,1) * pt(:,:,1,jn,Kmm) 
     
    179158         ENDIF 
    180159         !                
    181          DO jk = 1, jpkm1              !--  Divergence of advective fluxes  --! 
    182             DO jj = 2, jpjm1 
    183                DO ji = fs_2, fs_jpim1   ! vector opt. 
    184                   pt(ji,jj,jk,jn,Krhs) = pt(ji,jj,jk,jn,Krhs)    & 
    185                      &             - (  zwx(ji,jj,jk) - zwx(ji-1,jj  ,jk  )    & 
    186                      &                + zwy(ji,jj,jk) - zwy(ji  ,jj-1,jk  )    & 
    187                      &                + zwz(ji,jj,jk) - zwz(ji  ,jj  ,jk+1)  ) * r1_e1e2t(ji,jj) / e3t(ji,jj,jk,Kmm) 
    188                END DO 
    189             END DO 
    190          END DO 
     160         DO_3D_00_00( 1, jpkm1 ) 
     161            pt(ji,jj,jk,jn,Krhs) = pt(ji,jj,jk,jn,Krhs)    & 
     162               &             - (  zwx(ji,jj,jk) - zwx(ji-1,jj  ,jk  )    & 
     163               &                + zwy(ji,jj,jk) - zwy(ji  ,jj-1,jk  )    & 
     164               &                + zwz(ji,jj,jk) - zwz(ji  ,jj  ,jk+1)  ) * r1_e1e2t(ji,jj) / e3t(ji,jj,jk,Kmm) 
     165         END_3D 
    191166         !                             ! trend diagnostics 
    192167         IF( l_trd ) THEN 
Note: See TracChangeset for help on using the changeset viewer.