MODULE lib_fortran !!====================================================================== !! *** MODULE lib_fortran *** !! Fortran utilities: includes some low levels fortran functionality !!====================================================================== !! History : 3.2 ! 2010-05 Michael Dunphy, Rachid BENSHILA Original code !!---------------------------------------------------------------------- !! OPA 9.0 , LOCEAN-IPSL (2005) !! $Id: $ !! This software is governed by the CeCILL licence see modipsl/doc/NEMO_CeCILL.txt !!---------------------------------------------------------------------- USE par_oce USE par_kind USE lib_mpp ! distributed memory computing USE dom_oce USE in_out_manager IMPLICIT NONE PRIVATE PUBLIC glob_sum #if defined key_nosignedzeo PUBLIC SIGN #endif INTERFACE glob_sum #if defined key_mpp_rep1 MODULE PROCEDURE mpp_sum_indep #elif defined key_mpp_rep2 MODULE PROCEDURE mpp_sum_cmpx #else MODULE PROCEDURE glob_sum_2d, glob_sum_3d,glob_sum_2d_a, glob_sum_3d_a #endif END INTERFACE #if defined key_nosignedzeo INTERFACE SIGN MODULE PROCEDURE SIGN_SCALAR, SIGN_ARRAY_1D, SIGN_ARRAY_2D, SIGN_ARRAY_3D, & SIGN_ARRAY_1D_A, SIGN_ARRAY_2D_A, SIGN_ARRAY_3D_A, & SIGN_ARRAY_1D_B, SIGN_ARRAY_2D_B, SIGN_ARRAY_3D_B END INTERFACE #endif CONTAINS FUNCTION glob_sum_2d( ptab ) !!----------------------------------------------------------------------- !! *** FUNCTION glob_sum_2D *** !! !! ** Purpose : perform a sum on the global domain of a 2D array !!----------------------------------------------------------------------- REAL(wp), DIMENSION(:,:),INTENT(in) :: ptab REAL(wp) :: glob_sum_2d !!----------------------------------------------------------------------- glob_sum_2d = SUM( ptab(:,:)*tmask_i(:,:) ) IF( lk_mpp ) CALL mpp_sum( glob_sum_2d ) END FUNCTION glob_sum_2d FUNCTION glob_sum_3d( ptab ) !!----------------------------------------------------------------------- !! *** FUNCTION glob_sum_3D *** !! !! ** Purpose : perform a sum on the global domain of a 3D array !!----------------------------------------------------------------------- REAL(wp), DIMENSION(:,:,:) :: ptab REAL(wp) :: glob_sum_3d ! INTEGER :: jk !!----------------------------------------------------------------------- GLOB_SUM_3D = 0.e0 DO jk = 1, jpk glob_sum_3d = glob_sum_3d + SUM( ptab(:,:,jk)*tmask_i(:,:) ) END DO IF( lk_mpp ) CALL mpp_sum( glob_sum_3d ) END FUNCTION glob_sum_3d FUNCTION glob_sum_2d_a( ptab1, ptab2 ) !!----------------------------------------------------------------------- !! *** FUNCTION glob_sum_2D _a *** !! !! ** Purpose : perform a sum on the global domain of two 2D array !!----------------------------------------------------------------------- REAL(wp), DIMENSION(:,:) :: ptab1, ptab2 REAL(wp), DIMENSION(2) :: glob_sum_2d_a !!----------------------------------------------------------------------- glob_sum_2d_a(1) = SUM( ptab1(:,:)*tmask_i(:,:) ) glob_sum_2d_a(2) = SUM( ptab2(:,:)*tmask_i(:,:) ) IF( lk_mpp ) CALL mpp_sum( glob_sum_2d_a,2 ) END FUNCTION glob_sum_2d_a FUNCTION glob_sum_3d_a( ptab1, ptab2 ) !!----------------------------------------------------------------------- !! *** FUNCTION glob_sum_3D_a *** !! !! ** Purpose : perform a sum on the global domain of two 3D array !!----------------------------------------------------------------------- REAL(wp), DIMENSION(:,:,:) :: ptab1, ptab2 REAL(wp), DIMENSION(2) :: glob_sum_3d_a ! INTEGER :: jk !!----------------------------------------------------------------------- glob_sum_3d_a(:) = 0.e0 DO jk = 1, jpk glob_sum_3d_a(1) = glob_sum_3d_a(1) + SUM( ptab1(:,:,jk)*tmask_i(:,:) ) glob_sum_3d_a(2) = glob_sum_3d_a(2) + SUM( ptab2(:,:,jk)*tmask_i(:,:) ) END DO IF( lk_mpp ) CALL mpp_sum( glob_sum_3d_a,2 ) END FUNCTION glob_sum_3d_a #if defined key_mpp_rep2 FUNCTION mpp_sum_cmpx( pval ) !!---------------------------------------------------------------------- !! *** FUNCTION mpp_sum_cmpx *** !! !! ** Purpose : perform a sum in calling DDPDD routine !! !!---------------------------------------------------------------------- REAL(wp) :: mpp_sum_cmpx ! REAL(wp), DIMENSION(jpi,jpj), INTENT(IN) :: & & pval COMPLEX(wp):: ctmp REAL(wp) ::ztmp INTEGER :: ji,jj !!----------------------------------------------------------------------- ztmp = 0.e0 ctmp = CMPLX(0.e0,0.e0,wp) DO jj = 1,jpj DO ji =1, jpi ztmp = pval(ji,jj) * tmask_i(ji,jj) CALL DDPDD(CMPLX(ztmp,0.e0,wp),ctmp) END DO END DO IF( lk_mpp ) CALL mpp_sum( ctmp ) ! sum over the global domain mpp_sum_cmpx= REAL(ctmp,wp) END FUNCTION mpp_sum_cmpx SUBROUTINE DDPDD( ydda, yddb ) !!---------------------------------------------------------------------- !! *** ROUTINE DDPDD *** !! !! ** Purpose : Add a scalar element to a sum !! !! !! ** Method : The code uses the compensated summation with doublet !! (sum,error) emulated useing complex numbers. ydda is the !! scalar to add to the summ yddb !! !! ** Action : This does only work for MPI. !! !! References : Using Acurate Arithmetics to Improve Numerical !! Reproducibility and Sability in Parallel Applications !! Yun HE and Chris H. Q. DING, Journal of Supercomputing !! 18, 259-277, 2001 !!---------------------------------------------------------------------- COMPLEX(wp), INTENT(in) :: ydda COMPLEX(wp), INTENT(inout) :: yddb REAL(wp) :: zerr, zt1, zt2 ! local work variables ! Compute ydda + yddb using Knuth's trick. zt1 = real(ydda) + real(yddb) zerr = zt1 - real(ydda) zt2 = ((real(yddb) - zerr) + (real(ydda) - (zt1 - zerr))) & + aimag(ydda) + aimag(yddb) ! The result is t1 + t2, after normalization. yddb = cmplx ( zt1 + zt2, zt2 - ((zt1 + zt2) - zt1),wp ) END SUBROUTINE DDPDD #endif #if defined key_mpp_rep1 FUNCTION mpp_sum_indep( pval ) !!---------------------------------------------------------------------- !! *** ROUTINE mpp_sum_indep *** !! !! ** Purpose : Sum all elements in the pval array in !! an accurate order-independent way. !! !! ** Method : The code iterates the compensated summation until the !! result is guaranteed to be within 4*eps of the true sum. !! It then rounds the result to the nearest floating-point !! number whose last three bits are zero, thereby !! guaranteeing an order-independent result. !! !! ** Action : This does only work for MPI. !! It does not work for SHMEM. !! !! References : M. Fisher (ECMWF): IFS code + personal communication !! The algorithm is based on Ogita et al. (2005) !! SIAM J. Sci. Computing, Vol.26, No.6, pp1955-1988. !! This is based in turn on an algorithm !! by Knuth (1969, seminumerical algorithms). !! !! History : !! ! 07-07 (K. Mogensen) Original code heavily based on IFS. !!---------------------------------------------------------------------- REAL(wp) mpp_sum_indep REAL(wp), DIMENSION(jpi,jpj), INTENT(IN) :: pval ! REAL(wp), DIMENSION(3) :: zbuffl REAL(wp), DIMENSION(:), ALLOCATABLE :: zpsums, zperrs, zpcors, zbuffg, zp REAL(wp) :: zcorr, zerr, zolderr, zbeta, zres INTEGER, DIMENSION(:), allocatable :: irecv, istart INTEGER :: ikn, jj ! initialise to avoid uninitialised variables trapping of some compilers to complain. zres = 0.0_wp ; zerr = 0.0_wp ; zbuffl(:) = 0.0_wp ! Get global number of elements ikn = SIZE(pval) # ifdef key_mpp CALL mpp_sum( ikn ) # endif ! Check that the the algorithm can work IF ( ( REAL( 2 * ikn ) * EPSILON( zres ) ) >= 1.0 ) THEN CALL ctl_stop('mpp_sum_indep:', & & 'size of array is too large to guarantee error bounds') ENDIF ALLOCATE( & & zp(MAX(ikn,1)), & & zbuffg(jpnij*SIZE(zbuffl)), & & zpsums(jpnij), & & zperrs(jpnij), & & zpcors(jpnij) & & ) zolderr = HUGE(zerr) ! Copy the input array. This avoids some tricky indexing, at the ! expense of some inefficency. IF ( ikn > 0 ) THEN zp(:) = RESHAPE(pval, (/ jpi * jpj /) ) ELSE zp(1) = 0.0_wp ENDIF k_loop: DO ! Transform local arrays IF ( ikn > 0 ) THEN CALL comp_sum ( zp, ikn, zcorr, zerr ) ENDIF ! Gather partial sums and error bounds to all processors zbuffl(1) = zp(MAX(ikn,1)) IF ( ikn > 0 ) THEN zbuffl(2) = zerr zbuffl(3) = zcorr ELSE zbuffl(2) = 0.0_wp zbuffl(3) = 0.0_wp ENDIF IF ( jpnij > 1 ) THEN ALLOCATE( & & irecv(jpnij), & & istart(jpnij) & & ) CALL mpp_allgatherv( zbuffl, SIZE(zbuffl), & & zbuffg, jpnij * SIZE(zbuffl), irecv, istart ) DEALLOCATE( & & irecv, & & istart & & ) DO jj = 1, jpnij zpsums(jj) = zbuffg(1+(jj-1)*SIZE(zbuffl)) zperrs(jj) = zbuffg(2+(jj-1)*SIZE(zbuffl)) zpcors(jj) = zbuffg(3+(jj-1)*SIZE(zbuffl)) END DO ELSE zpsums(1) = zbuffl(1) zperrs(1) = zbuffl(2) zpcors(1) = zbuffl(3) ENDIF ! Transform partial sums CALL comp_sum( zpsums, jpnij, zcorr, zerr ) zerr = zerr + SUM(zperrs) zcorr = zcorr + SUM(zpcors) ! Calculate final result zres = zpsums(jpnij) + zcorr ! Calculate error bound. This is corollary 4.7 from Ogita et al. ! (2005) zbeta = zerr *( REAL( 2*ikn, wp ) * EPSILON(zres) ) & & /(1.0_wp - REAL( 2*ikn, wp ) * EPSILON(zres) ) zerr = EPSILON(zres) * ABS(zres) & & +(zbeta + ( 2.0_wp * EPSILON(zres) * EPSILON(zres) * ABS(zres) & & +3.0_wp * TINY(zres) ) ) ! Update the last element of the local array zp(MAX(ikn,1)) = zpsums(nproc+1) ! Exit if the global error is small enough IF ( zerr < 4.0_wp * SPACING(zres) ) EXIT k_loop ! Take appropriate action if ZRES cannot be sufficiently refined. IF (zerr >= zolderr) THEN CALL ctl_stop('Failed to refine sum', & & 'Warning: Possiblity of non-reproducible results') ENDIF zolderr = zerr ENDDO k_loop ! At this stage, we have guaranteed that ZRES less than 4*EPS ! away from the exact sum. There are only four floating point ! numbers in this range. So, if we find the nearest number that ! has its last three bits zero, then we have a reproducible result. mpp_sum_indep = fround(zres) DEALLOCATE( & & zpcors, & & zperrs, & & zpsums, & & zbuffg, & & zp & & ) END FUNCTION mpp_sum_indep SUBROUTINE comp_sum( pval, kn, pcorr, perr ) !!---------------------------------------------------------------------- !! *** ROUTINE comp_sum *** !! !! ** Purpose : To perform compensated (i.e. accurate) summation. !! !! ** Method : These routines transform the elements of the array P, !! such that: !! 1) pval(kn) contains sum(pval) !! 2) pval(1)...pval(kn-1) contain the rounding errors !! that were made in calculating sum(pval). !! 3) The exact sum of the elements of pval is unmodified. !! On return, pcorr contains the sum of the rounding errors, !! perr contains the sum of their absolute values. !! After calling this routine, an accurate sum of the !! elements of pval can be calculated as res=pval(n)+pcorr. !! !! ** Action : !! !! References : M. Fisher (ECMWF) IFS code + personal communications !! !! History : !! ! 07-07 (K. Mogensen) Original code heavily based on IFS !!-------------------------------------------------------------------- INTEGER, INTENT(IN) :: kn ! Number of elements in input array REAL(wp), DIMENSION(kn), INTENT(INOUT) :: pval ! Input array to be sum on input ! pval(kn) = sum (pval) on output ! pval(1)...pval(kn-1) = rounding errors on output REAL(wp) :: pcorr ! Sum of rounding errors REAL(wp) :: perr ! Sum of absolute rounding errors !! * Local declarations REAL(wp) :: zx, zz, zpsum INTEGER :: jj pcorr = 0.0_wp perr = 0.0_wp zpsum = pval(1) DO jj = 2, kn ! It is vital that these 4 lines are not optimized in any way that ! changes the results. zx = pval(jj) + zpsum zz = zx - pval(jj) pval(jj-1) = ( pval(jj) - ( zx - zz ) ) + ( zpsum - zz ) zpsum = zx ! Accumulate the correction and the error pcorr = pcorr + pval(jj-1) perr = perr + ABS( pval(jj-1) ) END DO pval(kn) = zpsum END SUBROUTINE comp_sum FUNCTION fround(pres) !!---------------------------------------------------------------------- !! *** ROUTINE fround *** !! !! ** Purpose : Rounding of floating-point number !! !! ** Method : Returns the value of PRES rounded to the nearest !! floating-point number that has its last three bits zero !! This works on big-endian and little-endian machines. !! !! ** Action : !! !! References : M. Fisher (ECMWF) IFS code + personal communication !! !! History : !! ! 07-07 (K. Mogensen) Original code heavily based on IFS. !!---------------------------------------------------------------------- REAL(wp) fround REAL(wp), INTENT(IN) :: pres ! Value to be rounded ! REAL(wp) :: zz(2), zup, zdown INTEGER :: ii(2), iequiv(8), ints_per_real, i_low_word INTEGER :: jj ii(:) = 1 zz(:) = 1.0_wp ! Warning: If wp = 64 bits (or 32 bits for key_sp) this will not work. #if defined key_sp ints_per_real = 32 / BIT_SIZE(ii) #else ints_per_real = 64 / BIT_SIZE(ii) #endif ! Test whether big-endian or little-endian zup = -1.0_wp iequiv(1:ints_per_real) = TRANSFER(zup,iequiv(1:ints_per_real)) IF ( iequiv(1) == 0 ) THEN i_low_word = 1 ! Little-endian ELSE i_low_word = ints_per_real ! Big-endian ENDIF ! Find the nearest number with all 3 lowest-order bits zeroed iequiv(1:ints_per_real) = transfer(pres,iequiv(1:ints_per_real)) zup = pres zdown = pres IF (IBITS(iequiv(i_low_word),0,3)/=0) THEN DO jj = 1, 4 zup = NEAREST( zup, 1.0_wp ) iequiv(1:ints_per_real) = TRANSFER( zup, iequiv(1:ints_per_real) ) IF ( IBITS( iequiv(i_low_word), 0, 3 ) == 0 ) EXIT zdown = NEAREST( zdown, -1.0 ) iequiv(1:ints_per_real) = TRANSFER( zdown, iequiv(1:ints_per_real)) IF ( IBITS( iequiv(i_low_word),0,3) == 0 ) EXIT END DO IF ( IBITS( iequiv( i_low_word ), 0, 3) /= 0 ) THEN CALL ctl_stop('Fround:','This is not possible') ENDIF ENDIF fround = TRANSFER( iequiv(1:ints_per_real), pres ) END FUNCTION fround #endif #if defined key_nosignedzero FUNCTION SIGN_SCALAR(pa,pb) !!----------------------------------------------------------------------- !! *** FUNCTION SIGN_SCALAR *** !! !! ** Purpose : overwrite f95 behaviour of intrinsinc sign function !!----------------------------------------------------------------------- REAL(wp) :: pa,pb ! input REAL(wp) :: SIGN_SCALAR ! result IF ( pb >= 0.e0) THEN SIGN_SCALAR = ABS(pa) ELSE SIGN_SCALAR =-ABS(pa) ENDIF END FUNCTION SIGN_SCALAR FUNCTION SIGN_ARRAY_1D(pa,pb) !!----------------------------------------------------------------------- !! *** FUNCTION SIGN_ARRAY_1D *** !! !! ** Purpose : overwrite f95 behaviour of intrinsinc sign function !!----------------------------------------------------------------------- REAL(wp) :: pa,pb(:) ! input REAL(wp) :: SIGN_ARRAY_1D(SIZE(pb,1)) ! result WHERE ( pb >= 0.e0 ) SIGN_ARRAY_1D = ABS(pa) ELSEWHERE SIGN_ARRAY_1D =-ABS(pa) END WHERE END FUNCTION SIGN_ARRAY_1D FUNCTION SIGN_ARRAY_2D(pa,pb) !!----------------------------------------------------------------------- !! *** FUNCTION SIGN_ARRAY_2D *** !! !! ** Purpose : overwrite f95 behaviour of intrinsinc sign function !!----------------------------------------------------------------------- REAL(wp) :: pa,pb(:,:) ! input REAL(wp) :: SIGN_ARRAY_2D(SIZE(pb,1),SIZE(pb,2)) ! result WHERE ( pb >= 0.e0 ) SIGN_ARRAY_2D = ABS(pa) ELSEWHERE SIGN_ARRAY_2D =-ABS(pa) END WHERE END FUNCTION SIGN_ARRAY_2D FUNCTION SIGN_ARRAY_3D(pa,pb) !!----------------------------------------------------------------------- !! *** FUNCTION SIGN_ARRAY_3D *** !! !! ** Purpose : overwrite f95 behaviour of intrinsinc sign function !!----------------------------------------------------------------------- REAL(wp) :: pa,pb(:,:,:) ! input REAL(wp) :: SIGN_ARRAY_3D(SIZE(pb,1),SIZE(pb,2),SIZE(pb,3)) ! result WHERE ( pb >= 0.e0 ) SIGN_ARRAY_3D = ABS(pa) ELSEWHERE SIGN_ARRAY_3D =-ABS(pa) END WHERE END FUNCTION SIGN_ARRAY_3D FUNCTION SIGN_ARRAY_1D_A(pa,pb) !!----------------------------------------------------------------------- !! *** FUNCTION SIGN_ARRAY_1D_A *** !! !! ** Purpose : overwrite f95 behaviour of intrinsinc sign function !!----------------------------------------------------------------------- REAL(wp) :: pa(:),pb(:) ! input REAL(wp) :: SIGN_ARRAY_1D_A(SIZE(b,1)) ! result WHERE ( pb >= 0.e0 ) SIGN_ARRAY_1D_A = ABS(pa) ELSEWHERE SIGN_ARRAY_1D_A =-ABS(pa) END WHERE END FUNCTION SIGN_ARRAY_1D_A FUNCTION SIGN_ARRAY_2D_A(pa,pb) !!----------------------------------------------------------------------- !! *** FUNCTION SIGN_ARRAY_2D_A *** !! !! ** Purpose : overwrite f95 behaviour of intrinsinc sign function !!----------------------------------------------------------------------- REAL(wp) :: pa(:,:),pb(:,:) ! input REAL(wp) :: SIGN_ARRAY_2D_A(SIZE(pb,1),SIZE(pb,2)) ! result WHERE ( pb >= 0.e0 ) SIGN_ARRAY_2D_A = ABS(pa) ELSEWHERE SIGN_ARRAY_2D_A =-ABS(pa) END WHERE END FUNCTION SIGN_ARRAY_2D_A FUNCTION SIGN_ARRAY_3D_A(pa,pb) !!----------------------------------------------------------------------- !! *** FUNCTION SIGN_ARRAY_3D_A *** !! !! ** Purpose : overwrite f95 behaviour of intrinsinc sign function !!----------------------------------------------------------------------- REAL(wp) :: pa(:,:,:),pb(:,:,:) ! input REAL(wp) :: SIGN_ARRAY_3D_A(SIZE(pb,1),SIZE(pb,2),SIZE(pb,3)) ! result WHERE ( pb >= 0.e0 ) SIGN_ARRAY_3D_A = ABS(pa) ELSEWHERE SIGN_ARRAY_3D_A =-ABS(pa) END WHERE END FUNCTION SIGN_ARRAY_3D_A FUNCTION SIGN_ARRAY_1D_B(pa,pb) !!----------------------------------------------------------------------- !! *** FUNCTION SIGN_ARRAY_1D_B *** !! !! ** Purpose : overwrite f95 behaviour of intrinsinc sign function !!----------------------------------------------------------------------- REAL(wp) :: pa(:),pb ! input REAL(wp) :: SIGN_ARRAY_1D_B(SIZE(pa,1)) ! result IF ( pb >= 0.e0 ) THEN SIGN_ARRAY_1D_B = ABS(pa) ELSE SIGN_ARRAY_1D_B =-ABS(pa) ENDIF END FUNCTION SIGN_ARRAY_1D_B FUNCTION SIGN_ARRAY_2D_B(pa,pb) !!----------------------------------------------------------------------- !! *** FUNCTION SIGN_ARRAY_2D_B *** !! !! ** Purpose : overwrite f95 behaviour of intrinsinc sign function !!----------------------------------------------------------------------- REAL(wp) :: pa(:,:),pb ! input REAL(wp) :: SIGN_ARRAY_2D_B(SIZE(pa,1),SIZE(pa,2)) ! result IF ( pb >= 0.e0 ) THEN SIGN_ARRAY_2D_B = ABS(pa) ELSE SIGN_ARRAY_2D_B =-ABS(pa) ENDIF END FUNCTION SIGN_ARRAY_2D_B FUNCTION SIGN_ARRAY_3D_B(pa,pb) !!----------------------------------------------------------------------- !! *** FUNCTION SIGN_ARRAY_3D_B *** !! !! ** Purpose : overwrite f95 behaviour of intrinsinc sign function !!----------------------------------------------------------------------- REAL(wp) :: pa(:,:,:),pb ! input REAL(wp) :: SIGN_ARRAY_3D_B(SIZE(pa,1),SIZE(pa,2),SIZE(pa,3)) ! result IF (pb >= 0.e0 ) THEN SIGN_ARRAY_3D_B = ABS(pa) ELSE SIGN_ARRAY_3D_B =-ABS(pa) ENDIF END FUNCTION SIGN_ARRAY_3D_B #endif END MODULE lib_fortran