Context Navigation

← Previous Changeset
Next Changeset →

Changeset 2304

Timestamp:

2010-10-22T17:56:39+02:00 (14 years ago)

Author:

rblod

Message:

Choose one option for mpp reproducibility, see ticket #743

Location:

branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC

Files:

: 3 edited

DOM/dom_oce.F90 (modified) (1 diff)
lib_fortran.F90 (modified) (3 diffs)
lib_mpp.F90 (modified) (9 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/DOM/dom_oce.F90

r2287	r2304
214	214	!! mpp reproducibility
215	215	!!----------------------------------------------------------------------
216		#if defined key_mpp_rep~~1 \|\| defined key_mpp_re2~~
	216	#if defined key_mpp_rep
217	217	LOGICAL, PUBLIC, PARAMETER :: lk_mpp_rep = .TRUE. !: agrif flag
218	218	#else

branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/lib_fortran.F90

-                      r2287
+                      r2304
    INTERFACE glob_sum
+#if defined key_mpp_rep1
+      MODULE PROCEDURE mpp_sum_indep
+#elif defined key_mpp_rep2
+#if defined key_mpp_rep
       MODULE PROCEDURE mpp_sum_cmpx
 #else
 …
    END FUNCTION glob_sum_3d_a
 #if defined key_mpp_rep2
+#if defined key_mpp_rep
    FUNCTION mpp_sum_cmpx( pval )
       !!----------------------------------------------------------------------
 …
 #endif
-#if defined key_mpp_rep1
-   FUNCTION mpp_sum_indep( pval )
-      !!----------------------------------------------------------------------
-      !!               ***  ROUTINE mpp_sum_indep ***
-      !!
-      !! ** Purpose : Sum all elements in the pval array in
-      !!              an accurate order-independent way.
-      !!
-      !! ** Method  : The code iterates the compensated summation until the
-      !!              result is guaranteed to be within 4*eps of the true sum.
-      !!              It then rounds the result to the nearest floating-point
-      !!              number whose last three bits are zero, thereby
-      !!              guaranteeing an order-independent result.
-      !!
-      !! ** Action  : This does only work for MPI.
-      !!              It does not work for SHMEM.      !!
-      !! References : M. Fisher (ECMWF): IFS code + personal communication
-      !!              The algorithm is based on Ogita et al. (2005)
-      !!              SIAM J. Sci. Computing, Vol.26, No.6, pp1955-1988.
-      !!              This is based in turn on an algorithm
-      !!              by Knuth (1969, seminumerical algorithms).
-      !!
-      !! History :
-      !!        !  07-07  (K. Mogensen)  Original code heavily based on IFS.
-      !!----------------------------------------------------------------------
-      REAL(wp) mpp_sum_indep
-      REAL(wp), DIMENSION(jpi,jpj), INTENT(IN) :: pval
+      !
-      REAL(wp), DIMENSION(3) :: zbuffl
-      REAL(wp), DIMENSION(:), ALLOCATABLE :: zpsums, zperrs, zpcors, zbuffg, zp
-      REAL(wp) :: zcorr, zerr,  zolderr, zbeta, zres
-      INTEGER, DIMENSION(:), allocatable :: irecv, istart
-      INTEGER :: ikn, jj
-      ! initialise to avoid uninitialised variables trapping of some compilers to complain.
-      zres = 0.0_wp ; zerr = 0.0_wp ; zbuffl(:) = 0.0_wp
-      ! Get global number of elements
-      ikn = SIZE(pval)
-# ifdef key_mpp
-      CALL mpp_sum( ikn )
-# endif
-      ! Check that the the algorithm can work
-      IF ( ( REAL( 2 * ikn ) * EPSILON( zres ) ) >= 1.0 ) THEN
-         CALL ctl_stop('mpp_sum_indep:', &
-            &          'size of array is too large to guarantee error bounds')
-      ENDIF
-      ALLOCATE( &
-         & zp(MAX(ikn,1)),                &
-         & zbuffg(jpnij*SIZE(zbuffl)), &
-         & zpsums(jpnij),              &
-         & zperrs(jpnij),              &
-         & zpcors(jpnij)               &
-         & )
-      zolderr = HUGE(zerr)
-      ! Copy the input array. This avoids some tricky indexing, at the
-      ! expense of some inefficency.
-      IF ( ikn > 0 ) THEN
-         zp(:) = RESHAPE(pval, (/ jpi * jpj /) )
-      ELSE
-         zp(1) = 0.0_wp
-      ENDIF
-      k_loop: DO
-         ! Transform local arrays
-         IF ( ikn > 0 ) THEN
-            CALL comp_sum ( zp, ikn, zcorr, zerr )
-         ENDIF
-         ! Gather partial sums and error bounds to all processors
-         zbuffl(1) = zp(MAX(ikn,1))
-         IF ( ikn > 0 ) THEN
-            zbuffl(2) = zerr
-            zbuffl(3) = zcorr
-         ELSE
-            zbuffl(2) = 0.0_wp
-            zbuffl(3) = 0.0_wp
-         ENDIF
-         IF ( jpnij > 1 ) THEN
-            ALLOCATE( &
-               & irecv(jpnij), &
-               & istart(jpnij) &
-               & )
-            CALL mpp_allgatherv( zbuffl, SIZE(zbuffl), &
-               & zbuffg, jpnij * SIZE(zbuffl), irecv, istart )
-            DEALLOCATE( &
-               & irecv, &
-               & istart &
-               & )
-            DO jj = 1, jpnij
-               zpsums(jj) = zbuffg(1+(jj-1)*SIZE(zbuffl))
-               zperrs(jj) = zbuffg(2+(jj-1)*SIZE(zbuffl))
-               zpcors(jj) = zbuffg(3+(jj-1)*SIZE(zbuffl))
-            END DO
-         ELSE
-            zpsums(1) = zbuffl(1)
-            zperrs(1) = zbuffl(2)
-            zpcors(1) = zbuffl(3)
-         ENDIF
-         ! Transform partial sums
-         CALL comp_sum( zpsums, jpnij, zcorr, zerr )
-         zerr  = zerr  + SUM(zperrs)
-         zcorr = zcorr + SUM(zpcors)
-         ! Calculate final result
-         zres = zpsums(jpnij) + zcorr
-         ! Calculate error bound. This is corollary 4.7 from Ogita et al.
-         ! (2005)
-         zbeta = zerr *(  REAL( 2*ikn, wp ) * EPSILON(zres) ) &
-            &  /(1.0_wp - REAL( 2*ikn, wp ) * EPSILON(zres) )
-         zerr = EPSILON(zres) * ABS(zres) &
-            & +(zbeta + ( 2.0_wp * EPSILON(zres) * EPSILON(zres) * ABS(zres) &
-            &            +3.0_wp * TINY(zres) ) )
-         ! Update the last element of the local array
-         zp(MAX(ikn,1)) = zpsums(nproc+1)
-         ! Exit if the global error is small enough
-         IF ( zerr < 4.0_wp * SPACING(zres) ) EXIT k_loop
-         ! Take appropriate action if ZRES cannot be sufficiently refined.
-         IF (zerr >= zolderr) THEN
-            CALL ctl_stop('Failed to refine sum', &
-               &          'Warning: Possiblity of non-reproducible results')
-         ENDIF
-         zolderr = zerr
-      ENDDO k_loop
-      ! At this stage, we have guaranteed that ZRES less than 4*EPS
-      ! away from the exact sum. There are only four floating point
-      ! numbers in this range. So, if we find the nearest number that
-      ! has its last three bits zero, then we have a reproducible result.
-      mpp_sum_indep = fround(zres)
-      DEALLOCATE( &
-         & zpcors, &
-         & zperrs, &
-         & zpsums, &
-         & zbuffg, &
-         & zp      &
-         & )
-   END FUNCTION mpp_sum_indep
-   SUBROUTINE comp_sum( pval, kn, pcorr, perr )
-      !!----------------------------------------------------------------------
-      !!               ***  ROUTINE comp_sum ***
-      !!
-      !! ** Purpose : To perform compensated (i.e. accurate) summation.
-      !!
-      !! ** Method  : These routines transform the elements of the array P,
-      !!              such that:
-      !!              1)  pval(kn)         contains sum(pval)
-      !!              2)  pval(1)...pval(kn-1) contain the rounding errors
-      !!                  that were made in calculating sum(pval).
-      !!              3)  The exact sum of the elements of pval is unmodified.
-      !!              On return, pcorr contains the sum of the rounding errors,
-      !!              perr contains the sum of their absolute values.
-      !!              After calling this routine, an accurate sum of the
-      !!              elements of pval can be calculated as res=pval(n)+pcorr.
-      !!
-      !! ** Action  :
-      !!
-      !! References : M. Fisher (ECMWF) IFS code + personal communications
-      !!
-      !! History :
-      !!        !  07-07  (K. Mogensen)  Original code heavily based on IFS
-      !!--------------------------------------------------------------------
-      INTEGER, INTENT(IN) :: kn         ! Number of elements in input array
-      REAL(wp), DIMENSION(kn), INTENT(INOUT) :: pval    ! Input array to be sum on input
-                                                        ! pval(kn) = sum (pval) on output
-                                                        ! pval(1)...pval(kn-1) = rounding errors on output
-      REAL(wp) :: pcorr   ! Sum of rounding errors
-      REAL(wp) :: perr       ! Sum of absolute rounding errors
-      !! * Local declarations
-      REAL(wp) :: zx, zz, zpsum
-      INTEGER :: jj
-      pcorr = 0.0_wp
-      perr  = 0.0_wp
-      zpsum = pval(1)
-      DO jj = 2, kn
-         ! It is vital that these 4 lines are not optimized in any way that
-         ! changes the results.
-         zx         = pval(jj) + zpsum
-         zz         = zx - pval(jj)
-         pval(jj-1) = ( pval(jj) - ( zx - zz ) ) + ( zpsum - zz )
-         zpsum      = zx
-         ! Accumulate the correction and the error
-         pcorr      = pcorr + pval(jj-1)
-         perr       = perr + ABS( pval(jj-1) )
-      END DO
-      pval(kn) = zpsum
-   END SUBROUTINE comp_sum
-   FUNCTION fround(pres)
-      !!----------------------------------------------------------------------
-      !!               ***  ROUTINE fround ***
-      !!
-      !! ** Purpose : Rounding of floating-point number
-      !!
-      !! ** Method  : Returns the value of PRES rounded to the nearest
-      !!              floating-point number that has its last three bits zero
-      !!              This works on big-endian and little-endian machines.
-      !!
-      !! ** Action  :
-      !!
-      !! References : M. Fisher (ECMWF) IFS code + personal communication
-      !!
-      !! History :
-      !!        !  07-07  (K. Mogensen)  Original code heavily based on IFS.
-      !!----------------------------------------------------------------------
-      REAL(wp) fround
-      REAL(wp), INTENT(IN) :: pres      ! Value to be rounded
+      !
-      REAL(wp) :: zz(2), zup, zdown
-      INTEGER  :: ii(2), iequiv(8), ints_per_real, i_low_word
-      INTEGER  :: jj
-      ii(:) = 1
-      zz(:) = 1.0_wp
-      ! Warning: If wp = 64 bits (or 32 bits for key_sp) this will not work.
-#if defined key_sp
-      ints_per_real = 32 / BIT_SIZE(ii)
-#else
-      ints_per_real = 64 / BIT_SIZE(ii)
-#endif
-      ! Test whether big-endian or little-endian
-      zup = -1.0_wp
-      iequiv(1:ints_per_real) = TRANSFER(zup,iequiv(1:ints_per_real))
-      IF ( iequiv(1) == 0 ) THEN
-         i_low_word = 1                ! Little-endian
-      ELSE
-         i_low_word = ints_per_real    ! Big-endian
-      ENDIF
-      ! Find the nearest number with all 3 lowest-order bits zeroed
-      iequiv(1:ints_per_real) = transfer(pres,iequiv(1:ints_per_real))
-      zup    = pres
-      zdown  = pres
-      IF (IBITS(iequiv(i_low_word),0,3)/=0) THEN
-         DO jj = 1, 4
-            zup = NEAREST( zup, 1.0_wp )
-            iequiv(1:ints_per_real) = TRANSFER( zup, iequiv(1:ints_per_real) )
-            IF ( IBITS( iequiv(i_low_word), 0, 3 ) == 0 ) EXIT
-            zdown = NEAREST( zdown, -1.0 )
-            iequiv(1:ints_per_real) = TRANSFER( zdown, iequiv(1:ints_per_real))
-            IF ( IBITS( iequiv(i_low_word),0,3) == 0 ) EXIT
-         END DO
-         IF ( IBITS( iequiv( i_low_word ), 0, 3) /= 0 ) THEN
-            CALL ctl_stop('Fround:','This is not possible')
-         ENDIF
-      ENDIF
-      fround = TRANSFER( iequiv(1:ints_per_real), pres )
-   END FUNCTION fround
-#endif
 #if defined key_nosignedzero
    FUNCTION SIGN_SCALAR(pa,pb)

branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/lib_mpp.F90

-                      r2287
+                      r2304
    PUBLIC   mppsize, mpprank
-# if defined key_mpp_rep1
-   PUBLIC mpp_allgatherv
-# endif
    !! * Interfaces
    !! define generic interface for these routine as they are called sometimes
 …
    END INTERFACE
    INTERFACE mpp_sum
 # if defined key_mpp_rep2
+# if defined key_mpp_rep
       MODULE PROCEDURE mppsum_a_int, mppsum_int, mppsum_a_real, mppsum_real, &
                        mppsum_realdd, mppsum_a_realdd
 …
    END INTERFACE
-# if defined key_mpp_rep1
-   INTERFACE mpp_allgatherv
-      MODULE PROCEDURE mpp_allgatherv_real, mpp_allgatherv_int
-   END INTERFACE
-# endif
    !! ========================= !!
    !!  MPI  variable definition !!
 …
       mynode = mpprank
+      !
 #if defined key_mpp_rep2
+#if defined key_mpp_rep
       CALL MPI_OP_CREATE(DDPDD_MPI, .TRUE., MPI_SUMDD, ierr)
 #endif
 …
    END SUBROUTINE mppsum_real
 # if defined key_mpp_rep2
+# if defined key_mpp_rep
    SUBROUTINE mppsum_realdd( ytab, kcom )
       !!----------------------------------------------------------------------
 …
    END SUBROUTINE mpi_init_opa
+#if defined key_mpp_rep1
+   SUBROUTINE mpp_allgatherv_real( pvalsin, knoin, pvalsout, ksizeout, &
+      &                            knoout, kstartout )
+      !!----------------------------------------------------------------------
+      !!               ***  ROUTINE mpp_allgatherv_real ***
+      !!
+      !! ** Purpose : Gather a real array on all processors
+      !!
+      !! ** Method  : MPI all gatherv
+      !!
+      !! ** Action  : This does only work for MPI.
+      !!              It does not work for SHMEM.
+      !!
+      !! References : http://www.mpi-forum.org
+      !!
+      !! History :
+      !!        !  08-08  (K. Mogensen)  Original code
+      !!----------------------------------------------------------------------
+      !! * Arguments
+      INTEGER, INTENT(IN) :: &
+         & knoin,     &
+         & ksizeout
+      REAL(wp), DIMENSION(knoin), INTENT(IN) :: &
+         & pvalsin
+      REAL(wp), DIMENSION(ksizeout), INTENT(OUT) :: &
+         & pvalsout
+      INTEGER, DIMENSION(jpnij), INTENT(OUT) :: &
+         & kstartout, &
+         & knoout
+      !! * Local declarations
+      INTEGER :: &
+         & ierr
+      INTEGER :: &
+         & ji
+      !-----------------------------------------------------------------------
+      ! Call the MPI library to get number of data per processor
+      !-----------------------------------------------------------------------
+      CALL mpi_allgather( knoin,  1, mpi_integer, &
+         &                knoout, 1, mpi_integer, &
+         &                mpi_comm_opa, ierr )
+      !-----------------------------------------------------------------------
+      ! Compute starts of each processors contribution
+      !-----------------------------------------------------------------------
+      kstartout(1) = 0
+      DO ji = 2, jpnij
+         kstartout(ji) = kstartout(ji-1) + knoout(ji-1)
+      ENDDO
+      !-----------------------------------------------------------------------
+      ! Call the MPI library to do the gathering of the data
+      !-----------------------------------------------------------------------
+      CALL mpi_allgatherv( pvalsin,  knoin,  MPI_DOUBLE_PRECISION,            &
+         &                 pvalsout, knoout, kstartout, MPI_DOUBLE_PRECISION, &
+         &                 mpi_comm_opa, ierr )
+   END SUBROUTINE mpp_allgatherv_real
+   SUBROUTINE mpp_allgatherv_int( kvalsin, knoin, kvalsout, ksizeout, &
+      &                               knoout, kstartout )
+      !!----------------------------------------------------------------------
+      !!               ***  ROUTINE mpp_allgatherv ***
+      !!
+      !! ** Purpose : Gather an integer array on all processors
+      !!
+      !! ** Method  : MPI all gatherv
+      !!
+      !! ** Action  : This does only work for MPI.
+      !!              It does not work for SHMEM.
+      !!
+      !! References : http://www.mpi-forum.org
+      !!
+      !! History :
+      !!        !  06-07  (K. Mogensen)  Original code
+      !!----------------------------------------------------------------------
+      !! * Arguments
+      INTEGER, INTENT(IN) :: &
+         & knoin,     &
+         & ksizeout
+      INTEGER, DIMENSION(knoin), INTENT(IN) :: &
+         & kvalsin
+      INTEGER, DIMENSION(ksizeout), INTENT(OUT) :: &
+         & kvalsout
+      INTEGER, DIMENSION(jpnij), INTENT(OUT) :: &
+         & kstartout, &
+         & knoout
+      !! * Local declarations
+      INTEGER :: &
+         & ierr
+      INTEGER :: &
+         & ji
+      !-----------------------------------------------------------------------
+      ! Call the MPI library to get number of data per processor
+      !-----------------------------------------------------------------------
+      CALL mpi_allgather( knoin,  1, mpi_integer, &
+         &                knoout, 1, mpi_integer, &
+         &                mpi_comm_opa, ierr )
+      !-----------------------------------------------------------------------
+      ! Compute starts of each processors contribution
+      !-----------------------------------------------------------------------
+      kstartout(1) = 0
+      DO ji = 2, jpnij
+         kstartout(ji) = kstartout(ji-1) + knoout(ji-1)
+      ENDDO
+      !-----------------------------------------------------------------------
+      ! Call the MPI library to do the gathering of the data
+      !-----------------------------------------------------------------------
+      CALL mpi_allgatherv( kvalsin,  knoin,  mpi_integer,            &
+         &                 kvalsout, knoout, kstartout, mpi_integer, &
+         &                 mpi_comm_opa, ierr )
+   END SUBROUTINE mpp_allgatherv_int
+#endif
+#if defined key_mpp_rep2
+#if defined key_mpp_rep
    SUBROUTINE DDPDD_MPI (ydda, yddb, ilen, itype)
       !!---------------------------------------------------------------------
 …
    !!   Default case:            Dummy module        share memory computing
    !!----------------------------------------------------------------------
-# if defined key_mpp_rep1
-   USE par_kind
-   USE par_oce
-   PUBLIC mpp_allgatherv
-# endif
    INTERFACE mpp_sum
 …
    END INTERFACE
-# if defined key_mpp_rep1
-   INTERFACE mpp_allgatherv
-      MODULE PROCEDURE mpp_allgatherv_real, mpp_allgatherv_int
-   END INTERFACE
-# endif
    LOGICAL, PUBLIC, PARAMETER ::   lk_mpp = .FALSE.      !: mpp flag
    INTEGER :: ncomm_ice
 …
       WRITE(*,*) 'mpp_comm_free: You should not have seen this print! error?', kcom
    END SUBROUTINE mpp_comm_free
-# if defined key_mpp_rep1
-   SUBROUTINE mpp_allgatherv_real( pvalsin, knoin, pvalsout, ksizeout, &
-      &                            knoout, kstartout )
-      INTEGER, INTENT(IN) :: &
-         & knoin,     &
-         & ksizeout
-      REAL(wp), DIMENSION(knoin), INTENT(IN) :: &
-         & pvalsin
-      REAL(wp), DIMENSION(ksizeout), INTENT(OUT) :: &
-         & pvalsout
-      INTEGER, DIMENSION(jpnij), INTENT(OUT) :: &
-         & kstartout, &
-         & knoout
-      pvalsout(1:knoin) = pvalsin(1:knoin)
-      kstartout(1) = 0
-      knoout(1) = knoin
-   END SUBROUTINE mpp_allgatherv_real
-   SUBROUTINE mpp_allgatherv_int( kvalsin, knoin, kvalsout, ksizeout, &
-      &                               knoout, kstartout )
-      INTEGER, INTENT(IN) :: &
-         & knoin,     &
-         & ksizeout
-      INTEGER, DIMENSION(knoin), INTENT(IN) :: &
-         & kvalsin
-      INTEGER, DIMENSION(ksizeout), INTENT(OUT) :: &
-         & kvalsout
-      INTEGER, DIMENSION(jpnij), INTENT(OUT) :: &
-         & kstartout, &
-         & knoout
-      kvalsout(1:knoin) = kvalsin(1:knoin)
-      kstartout(1) = 0
-      knoout(1) = knoin
-   END SUBROUTINE mpp_allgatherv_int
-# endif
 #endif
    !!----------------------------------------------------------------------

Note: See TracChangeset for help on using the changeset viewer.

New URL for NEMO forge! http://forge.nemo-ocean.eu

Context Navigation

Changeset 2304

Legend:

branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/DOM/dom_oce.F90

branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/lib_fortran.F90

branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/lib_mpp.F90

Download in other formats: