Context Navigation

← Previous Change
Next Change →

lib_fortran.F90

Timestamp:

2010-10-22T17:56:39+02:00 (14 years ago)

Author:

rblod

Message:

Choose one option for mpp reproducibility, see ticket #743

File:

: 1 edited

branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/lib_fortran.F90 (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/lib_fortran.F90

-                      r2287
+                      r2304
    INTERFACE glob_sum
+#if defined key_mpp_rep1
+      MODULE PROCEDURE mpp_sum_indep
+#elif defined key_mpp_rep2
+#if defined key_mpp_rep
       MODULE PROCEDURE mpp_sum_cmpx
 #else
 …
    END FUNCTION glob_sum_3d_a
 #if defined key_mpp_rep2
+#if defined key_mpp_rep
    FUNCTION mpp_sum_cmpx( pval )
       !!----------------------------------------------------------------------
 …
 #endif
-#if defined key_mpp_rep1
-   FUNCTION mpp_sum_indep( pval )
-      !!----------------------------------------------------------------------
-      !!               ***  ROUTINE mpp_sum_indep ***
-      !!
-      !! ** Purpose : Sum all elements in the pval array in
-      !!              an accurate order-independent way.
-      !!
-      !! ** Method  : The code iterates the compensated summation until the
-      !!              result is guaranteed to be within 4*eps of the true sum.
-      !!              It then rounds the result to the nearest floating-point
-      !!              number whose last three bits are zero, thereby
-      !!              guaranteeing an order-independent result.
-      !!
-      !! ** Action  : This does only work for MPI.
-      !!              It does not work for SHMEM.      !!
-      !! References : M. Fisher (ECMWF): IFS code + personal communication
-      !!              The algorithm is based on Ogita et al. (2005)
-      !!              SIAM J. Sci. Computing, Vol.26, No.6, pp1955-1988.
-      !!              This is based in turn on an algorithm
-      !!              by Knuth (1969, seminumerical algorithms).
-      !!
-      !! History :
-      !!        !  07-07  (K. Mogensen)  Original code heavily based on IFS.
-      !!----------------------------------------------------------------------
-      REAL(wp) mpp_sum_indep
-      REAL(wp), DIMENSION(jpi,jpj), INTENT(IN) :: pval
+      !
-      REAL(wp), DIMENSION(3) :: zbuffl
-      REAL(wp), DIMENSION(:), ALLOCATABLE :: zpsums, zperrs, zpcors, zbuffg, zp
-      REAL(wp) :: zcorr, zerr,  zolderr, zbeta, zres
-      INTEGER, DIMENSION(:), allocatable :: irecv, istart
-      INTEGER :: ikn, jj
-      ! initialise to avoid uninitialised variables trapping of some compilers to complain.
-      zres = 0.0_wp ; zerr = 0.0_wp ; zbuffl(:) = 0.0_wp
-      ! Get global number of elements
-      ikn = SIZE(pval)
-# ifdef key_mpp
-      CALL mpp_sum( ikn )
-# endif
-      ! Check that the the algorithm can work
-      IF ( ( REAL( 2 * ikn ) * EPSILON( zres ) ) >= 1.0 ) THEN
-         CALL ctl_stop('mpp_sum_indep:', &
-            &          'size of array is too large to guarantee error bounds')
-      ENDIF
-      ALLOCATE( &
-         & zp(MAX(ikn,1)),                &
-         & zbuffg(jpnij*SIZE(zbuffl)), &
-         & zpsums(jpnij),              &
-         & zperrs(jpnij),              &
-         & zpcors(jpnij)               &
-         & )
-      zolderr = HUGE(zerr)
-      ! Copy the input array. This avoids some tricky indexing, at the
-      ! expense of some inefficency.
-      IF ( ikn > 0 ) THEN
-         zp(:) = RESHAPE(pval, (/ jpi * jpj /) )
-      ELSE
-         zp(1) = 0.0_wp
-      ENDIF
-      k_loop: DO
-         ! Transform local arrays
-         IF ( ikn > 0 ) THEN
-            CALL comp_sum ( zp, ikn, zcorr, zerr )
-         ENDIF
-         ! Gather partial sums and error bounds to all processors
-         zbuffl(1) = zp(MAX(ikn,1))
-         IF ( ikn > 0 ) THEN
-            zbuffl(2) = zerr
-            zbuffl(3) = zcorr
-         ELSE
-            zbuffl(2) = 0.0_wp
-            zbuffl(3) = 0.0_wp
-         ENDIF
-         IF ( jpnij > 1 ) THEN
-            ALLOCATE( &
-               & irecv(jpnij), &
-               & istart(jpnij) &
-               & )
-            CALL mpp_allgatherv( zbuffl, SIZE(zbuffl), &
-               & zbuffg, jpnij * SIZE(zbuffl), irecv, istart )
-            DEALLOCATE( &
-               & irecv, &
-               & istart &
-               & )
-            DO jj = 1, jpnij
-               zpsums(jj) = zbuffg(1+(jj-1)*SIZE(zbuffl))
-               zperrs(jj) = zbuffg(2+(jj-1)*SIZE(zbuffl))
-               zpcors(jj) = zbuffg(3+(jj-1)*SIZE(zbuffl))
-            END DO
-         ELSE
-            zpsums(1) = zbuffl(1)
-            zperrs(1) = zbuffl(2)
-            zpcors(1) = zbuffl(3)
-         ENDIF
-         ! Transform partial sums
-         CALL comp_sum( zpsums, jpnij, zcorr, zerr )
-         zerr  = zerr  + SUM(zperrs)
-         zcorr = zcorr + SUM(zpcors)
-         ! Calculate final result
-         zres = zpsums(jpnij) + zcorr
-         ! Calculate error bound. This is corollary 4.7 from Ogita et al.
-         ! (2005)
-         zbeta = zerr *(  REAL( 2*ikn, wp ) * EPSILON(zres) ) &
-            &  /(1.0_wp - REAL( 2*ikn, wp ) * EPSILON(zres) )
-         zerr = EPSILON(zres) * ABS(zres) &
-            & +(zbeta + ( 2.0_wp * EPSILON(zres) * EPSILON(zres) * ABS(zres) &
-            &            +3.0_wp * TINY(zres) ) )
-         ! Update the last element of the local array
-         zp(MAX(ikn,1)) = zpsums(nproc+1)
-         ! Exit if the global error is small enough
-         IF ( zerr < 4.0_wp * SPACING(zres) ) EXIT k_loop
-         ! Take appropriate action if ZRES cannot be sufficiently refined.
-         IF (zerr >= zolderr) THEN
-            CALL ctl_stop('Failed to refine sum', &
-               &          'Warning: Possiblity of non-reproducible results')
-         ENDIF
-         zolderr = zerr
-      ENDDO k_loop
-      ! At this stage, we have guaranteed that ZRES less than 4*EPS
-      ! away from the exact sum. There are only four floating point
-      ! numbers in this range. So, if we find the nearest number that
-      ! has its last three bits zero, then we have a reproducible result.
-      mpp_sum_indep = fround(zres)
-      DEALLOCATE( &
-         & zpcors, &
-         & zperrs, &
-         & zpsums, &
-         & zbuffg, &
-         & zp      &
-         & )
-   END FUNCTION mpp_sum_indep
-   SUBROUTINE comp_sum( pval, kn, pcorr, perr )
-      !!----------------------------------------------------------------------
-      !!               ***  ROUTINE comp_sum ***
-      !!
-      !! ** Purpose : To perform compensated (i.e. accurate) summation.
-      !!
-      !! ** Method  : These routines transform the elements of the array P,
-      !!              such that:
-      !!              1)  pval(kn)         contains sum(pval)
-      !!              2)  pval(1)...pval(kn-1) contain the rounding errors
-      !!                  that were made in calculating sum(pval).
-      !!              3)  The exact sum of the elements of pval is unmodified.
-      !!              On return, pcorr contains the sum of the rounding errors,
-      !!              perr contains the sum of their absolute values.
-      !!              After calling this routine, an accurate sum of the
-      !!              elements of pval can be calculated as res=pval(n)+pcorr.
-      !!
-      !! ** Action  :
-      !!
-      !! References : M. Fisher (ECMWF) IFS code + personal communications
-      !!
-      !! History :
-      !!        !  07-07  (K. Mogensen)  Original code heavily based on IFS
-      !!--------------------------------------------------------------------
-      INTEGER, INTENT(IN) :: kn         ! Number of elements in input array
-      REAL(wp), DIMENSION(kn), INTENT(INOUT) :: pval    ! Input array to be sum on input
-                                                        ! pval(kn) = sum (pval) on output
-                                                        ! pval(1)...pval(kn-1) = rounding errors on output
-      REAL(wp) :: pcorr   ! Sum of rounding errors
-      REAL(wp) :: perr       ! Sum of absolute rounding errors
-      !! * Local declarations
-      REAL(wp) :: zx, zz, zpsum
-      INTEGER :: jj
-      pcorr = 0.0_wp
-      perr  = 0.0_wp
-      zpsum = pval(1)
-      DO jj = 2, kn
-         ! It is vital that these 4 lines are not optimized in any way that
-         ! changes the results.
-         zx         = pval(jj) + zpsum
-         zz         = zx - pval(jj)
-         pval(jj-1) = ( pval(jj) - ( zx - zz ) ) + ( zpsum - zz )
-         zpsum      = zx
-         ! Accumulate the correction and the error
-         pcorr      = pcorr + pval(jj-1)
-         perr       = perr + ABS( pval(jj-1) )
-      END DO
-      pval(kn) = zpsum
-   END SUBROUTINE comp_sum
-   FUNCTION fround(pres)
-      !!----------------------------------------------------------------------
-      !!               ***  ROUTINE fround ***
-      !!
-      !! ** Purpose : Rounding of floating-point number
-      !!
-      !! ** Method  : Returns the value of PRES rounded to the nearest
-      !!              floating-point number that has its last three bits zero
-      !!              This works on big-endian and little-endian machines.
-      !!
-      !! ** Action  :
-      !!
-      !! References : M. Fisher (ECMWF) IFS code + personal communication
-      !!
-      !! History :
-      !!        !  07-07  (K. Mogensen)  Original code heavily based on IFS.
-      !!----------------------------------------------------------------------
-      REAL(wp) fround
-      REAL(wp), INTENT(IN) :: pres      ! Value to be rounded
+      !
-      REAL(wp) :: zz(2), zup, zdown
-      INTEGER  :: ii(2), iequiv(8), ints_per_real, i_low_word
-      INTEGER  :: jj
-      ii(:) = 1
-      zz(:) = 1.0_wp
-      ! Warning: If wp = 64 bits (or 32 bits for key_sp) this will not work.
-#if defined key_sp
-      ints_per_real = 32 / BIT_SIZE(ii)
-#else
-      ints_per_real = 64 / BIT_SIZE(ii)
-#endif
-      ! Test whether big-endian or little-endian
-      zup = -1.0_wp
-      iequiv(1:ints_per_real) = TRANSFER(zup,iequiv(1:ints_per_real))
-      IF ( iequiv(1) == 0 ) THEN
-         i_low_word = 1                ! Little-endian
-      ELSE
-         i_low_word = ints_per_real    ! Big-endian
-      ENDIF
-      ! Find the nearest number with all 3 lowest-order bits zeroed
-      iequiv(1:ints_per_real) = transfer(pres,iequiv(1:ints_per_real))
-      zup    = pres
-      zdown  = pres
-      IF (IBITS(iequiv(i_low_word),0,3)/=0) THEN
-         DO jj = 1, 4
-            zup = NEAREST( zup, 1.0_wp )
-            iequiv(1:ints_per_real) = TRANSFER( zup, iequiv(1:ints_per_real) )
-            IF ( IBITS( iequiv(i_low_word), 0, 3 ) == 0 ) EXIT
-            zdown = NEAREST( zdown, -1.0 )
-            iequiv(1:ints_per_real) = TRANSFER( zdown, iequiv(1:ints_per_real))
-            IF ( IBITS( iequiv(i_low_word),0,3) == 0 ) EXIT
-         END DO
-         IF ( IBITS( iequiv( i_low_word ), 0, 3) /= 0 ) THEN
-            CALL ctl_stop('Fround:','This is not possible')
-         ENDIF
-      ENDIF
-      fround = TRANSFER( iequiv(1:ints_per_real), pres )
-   END FUNCTION fround
-#endif
 #if defined key_nosignedzero
    FUNCTION SIGN_SCALAR(pa,pb)

Note: See TracChangeset for help on using the changeset viewer.

New URL for NEMO forge! http://forge.nemo-ocean.eu

Context Navigation

Changeset 2304 for branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/lib_fortran.F90

Legend:

branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/lib_fortran.F90

Download in other formats: