- Timestamp:
- 2010-10-22T17:56:39+02:00 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/lib_fortran.F90
r2287 r2304 25 25 26 26 INTERFACE glob_sum 27 #if defined key_mpp_rep1 28 MODULE PROCEDURE mpp_sum_indep 29 #elif defined key_mpp_rep2 27 #if defined key_mpp_rep 30 28 MODULE PROCEDURE mpp_sum_cmpx 31 29 #else … … 116 114 END FUNCTION glob_sum_3d_a 117 115 118 #if defined key_mpp_rep 2116 #if defined key_mpp_rep 119 117 FUNCTION mpp_sum_cmpx( pval ) 120 118 !!---------------------------------------------------------------------- … … 182 180 #endif 183 181 184 #if defined key_mpp_rep1185 FUNCTION mpp_sum_indep( pval )186 !!----------------------------------------------------------------------187 !! *** ROUTINE mpp_sum_indep ***188 !!189 !! ** Purpose : Sum all elements in the pval array in190 !! an accurate order-independent way.191 !!192 !! ** Method : The code iterates the compensated summation until the193 !! result is guaranteed to be within 4*eps of the true sum.194 !! It then rounds the result to the nearest floating-point195 !! number whose last three bits are zero, thereby196 !! guaranteeing an order-independent result.197 !!198 !! ** Action : This does only work for MPI.199 !! It does not work for SHMEM. !!200 !! References : M. Fisher (ECMWF): IFS code + personal communication201 !! The algorithm is based on Ogita et al. (2005)202 !! SIAM J. Sci. Computing, Vol.26, No.6, pp1955-1988.203 !! This is based in turn on an algorithm204 !! by Knuth (1969, seminumerical algorithms).205 !!206 !! History :207 !! ! 07-07 (K. Mogensen) Original code heavily based on IFS.208 !!----------------------------------------------------------------------209 REAL(wp) mpp_sum_indep210 REAL(wp), DIMENSION(jpi,jpj), INTENT(IN) :: pval211 !212 REAL(wp), DIMENSION(3) :: zbuffl213 REAL(wp), DIMENSION(:), ALLOCATABLE :: zpsums, zperrs, zpcors, zbuffg, zp214 REAL(wp) :: zcorr, zerr, zolderr, zbeta, zres215 INTEGER, DIMENSION(:), allocatable :: irecv, istart216 INTEGER :: ikn, jj217 218 ! initialise to avoid uninitialised variables trapping of some compilers to complain.219 zres = 0.0_wp ; zerr = 0.0_wp ; zbuffl(:) = 0.0_wp220 ! Get global number of elements221 ikn = SIZE(pval)222 # ifdef key_mpp223 CALL mpp_sum( ikn )224 # endif225 ! Check that the the algorithm can work226 227 IF ( ( REAL( 2 * ikn ) * EPSILON( zres ) ) >= 1.0 ) THEN228 CALL ctl_stop('mpp_sum_indep:', &229 & 'size of array is too large to guarantee error bounds')230 ENDIF231 232 ALLOCATE( &233 & zp(MAX(ikn,1)), &234 & zbuffg(jpnij*SIZE(zbuffl)), &235 & zpsums(jpnij), &236 & zperrs(jpnij), &237 & zpcors(jpnij) &238 & )239 240 zolderr = HUGE(zerr)241 242 ! Copy the input array. This avoids some tricky indexing, at the243 ! expense of some inefficency.244 245 IF ( ikn > 0 ) THEN246 zp(:) = RESHAPE(pval, (/ jpi * jpj /) )247 ELSE248 zp(1) = 0.0_wp249 ENDIF250 251 k_loop: DO252 253 ! Transform local arrays254 255 IF ( ikn > 0 ) THEN256 CALL comp_sum ( zp, ikn, zcorr, zerr )257 ENDIF258 259 ! Gather partial sums and error bounds to all processors260 261 zbuffl(1) = zp(MAX(ikn,1))262 263 IF ( ikn > 0 ) THEN264 zbuffl(2) = zerr265 zbuffl(3) = zcorr266 ELSE267 zbuffl(2) = 0.0_wp268 zbuffl(3) = 0.0_wp269 ENDIF270 271 IF ( jpnij > 1 ) THEN272 ALLOCATE( &273 & irecv(jpnij), &274 & istart(jpnij) &275 & )276 CALL mpp_allgatherv( zbuffl, SIZE(zbuffl), &277 & zbuffg, jpnij * SIZE(zbuffl), irecv, istart )278 DEALLOCATE( &279 & irecv, &280 & istart &281 & )282 283 DO jj = 1, jpnij284 zpsums(jj) = zbuffg(1+(jj-1)*SIZE(zbuffl))285 zperrs(jj) = zbuffg(2+(jj-1)*SIZE(zbuffl))286 zpcors(jj) = zbuffg(3+(jj-1)*SIZE(zbuffl))287 END DO288 289 ELSE290 zpsums(1) = zbuffl(1)291 zperrs(1) = zbuffl(2)292 zpcors(1) = zbuffl(3)293 ENDIF294 295 ! Transform partial sums296 CALL comp_sum( zpsums, jpnij, zcorr, zerr )297 zerr = zerr + SUM(zperrs)298 zcorr = zcorr + SUM(zpcors)299 300 ! Calculate final result301 zres = zpsums(jpnij) + zcorr302 303 ! Calculate error bound. This is corollary 4.7 from Ogita et al.304 ! (2005)305 zbeta = zerr *( REAL( 2*ikn, wp ) * EPSILON(zres) ) &306 & /(1.0_wp - REAL( 2*ikn, wp ) * EPSILON(zres) )307 308 zerr = EPSILON(zres) * ABS(zres) &309 & +(zbeta + ( 2.0_wp * EPSILON(zres) * EPSILON(zres) * ABS(zres) &310 & +3.0_wp * TINY(zres) ) )311 312 ! Update the last element of the local array313 zp(MAX(ikn,1)) = zpsums(nproc+1)314 315 ! Exit if the global error is small enough316 IF ( zerr < 4.0_wp * SPACING(zres) ) EXIT k_loop317 318 ! Take appropriate action if ZRES cannot be sufficiently refined.319 IF (zerr >= zolderr) THEN320 CALL ctl_stop('Failed to refine sum', &321 & 'Warning: Possiblity of non-reproducible results')322 ENDIF323 324 zolderr = zerr325 326 ENDDO k_loop327 328 ! At this stage, we have guaranteed that ZRES less than 4*EPS329 ! away from the exact sum. There are only four floating point330 ! numbers in this range. So, if we find the nearest number that331 ! has its last three bits zero, then we have a reproducible result.332 333 mpp_sum_indep = fround(zres)334 335 DEALLOCATE( &336 & zpcors, &337 & zperrs, &338 & zpsums, &339 & zbuffg, &340 & zp &341 & )342 343 END FUNCTION mpp_sum_indep344 345 SUBROUTINE comp_sum( pval, kn, pcorr, perr )346 !!----------------------------------------------------------------------347 !! *** ROUTINE comp_sum ***348 !!349 !! ** Purpose : To perform compensated (i.e. accurate) summation.350 !!351 !! ** Method : These routines transform the elements of the array P,352 !! such that:353 !! 1) pval(kn) contains sum(pval)354 !! 2) pval(1)...pval(kn-1) contain the rounding errors355 !! that were made in calculating sum(pval).356 !! 3) The exact sum of the elements of pval is unmodified.357 !! On return, pcorr contains the sum of the rounding errors,358 !! perr contains the sum of their absolute values.359 !! After calling this routine, an accurate sum of the360 !! elements of pval can be calculated as res=pval(n)+pcorr.361 !!362 !! ** Action :363 !!364 !! References : M. Fisher (ECMWF) IFS code + personal communications365 !!366 !! History :367 !! ! 07-07 (K. Mogensen) Original code heavily based on IFS368 !!--------------------------------------------------------------------369 INTEGER, INTENT(IN) :: kn ! Number of elements in input array370 REAL(wp), DIMENSION(kn), INTENT(INOUT) :: pval ! Input array to be sum on input371 ! pval(kn) = sum (pval) on output372 ! pval(1)...pval(kn-1) = rounding errors on output373 REAL(wp) :: pcorr ! Sum of rounding errors374 REAL(wp) :: perr ! Sum of absolute rounding errors375 !! * Local declarations376 REAL(wp) :: zx, zz, zpsum377 INTEGER :: jj378 379 pcorr = 0.0_wp380 perr = 0.0_wp381 382 zpsum = pval(1)383 384 DO jj = 2, kn385 386 ! It is vital that these 4 lines are not optimized in any way that387 ! changes the results.388 zx = pval(jj) + zpsum389 zz = zx - pval(jj)390 pval(jj-1) = ( pval(jj) - ( zx - zz ) ) + ( zpsum - zz )391 zpsum = zx392 393 ! Accumulate the correction and the error394 pcorr = pcorr + pval(jj-1)395 perr = perr + ABS( pval(jj-1) )396 397 END DO398 399 pval(kn) = zpsum400 401 END SUBROUTINE comp_sum402 403 FUNCTION fround(pres)404 !!----------------------------------------------------------------------405 !! *** ROUTINE fround ***406 !!407 !! ** Purpose : Rounding of floating-point number408 !!409 !! ** Method : Returns the value of PRES rounded to the nearest410 !! floating-point number that has its last three bits zero411 !! This works on big-endian and little-endian machines.412 !!413 !! ** Action :414 !!415 !! References : M. Fisher (ECMWF) IFS code + personal communication416 !!417 !! History :418 !! ! 07-07 (K. Mogensen) Original code heavily based on IFS.419 !!----------------------------------------------------------------------420 REAL(wp) fround421 REAL(wp), INTENT(IN) :: pres ! Value to be rounded422 !423 REAL(wp) :: zz(2), zup, zdown424 INTEGER :: ii(2), iequiv(8), ints_per_real, i_low_word425 INTEGER :: jj426 427 ii(:) = 1428 zz(:) = 1.0_wp429 430 ! Warning: If wp = 64 bits (or 32 bits for key_sp) this will not work.431 432 #if defined key_sp433 ints_per_real = 32 / BIT_SIZE(ii)434 #else435 ints_per_real = 64 / BIT_SIZE(ii)436 #endif437 438 ! Test whether big-endian or little-endian439 440 zup = -1.0_wp441 iequiv(1:ints_per_real) = TRANSFER(zup,iequiv(1:ints_per_real))442 443 IF ( iequiv(1) == 0 ) THEN444 i_low_word = 1 ! Little-endian445 ELSE446 i_low_word = ints_per_real ! Big-endian447 ENDIF448 449 ! Find the nearest number with all 3 lowest-order bits zeroed450 451 iequiv(1:ints_per_real) = transfer(pres,iequiv(1:ints_per_real))452 zup = pres453 zdown = pres454 455 IF (IBITS(iequiv(i_low_word),0,3)/=0) THEN456 457 DO jj = 1, 4458 459 zup = NEAREST( zup, 1.0_wp )460 iequiv(1:ints_per_real) = TRANSFER( zup, iequiv(1:ints_per_real) )461 IF ( IBITS( iequiv(i_low_word), 0, 3 ) == 0 ) EXIT462 463 zdown = NEAREST( zdown, -1.0 )464 iequiv(1:ints_per_real) = TRANSFER( zdown, iequiv(1:ints_per_real))465 IF ( IBITS( iequiv(i_low_word),0,3) == 0 ) EXIT466 467 END DO468 469 IF ( IBITS( iequiv( i_low_word ), 0, 3) /= 0 ) THEN470 CALL ctl_stop('Fround:','This is not possible')471 ENDIF472 473 ENDIF474 475 fround = TRANSFER( iequiv(1:ints_per_real), pres )476 477 END FUNCTION fround478 #endif479 480 481 182 #if defined key_nosignedzero 482 183 FUNCTION SIGN_SCALAR(pa,pb)
Note: See TracChangeset
for help on using the changeset viewer.