Changeset 2304
- Timestamp:
- 2010-10-22T17:56:39+02:00 (13 years ago)
- Location:
- branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/DOM/dom_oce.F90
r2287 r2304 214 214 !! mpp reproducibility 215 215 !!---------------------------------------------------------------------- 216 #if defined key_mpp_rep 1 || defined key_mpp_re2216 #if defined key_mpp_rep 217 217 LOGICAL, PUBLIC, PARAMETER :: lk_mpp_rep = .TRUE. !: agrif flag 218 218 #else -
branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/lib_fortran.F90
r2287 r2304 25 25 26 26 INTERFACE glob_sum 27 #if defined key_mpp_rep1 28 MODULE PROCEDURE mpp_sum_indep 29 #elif defined key_mpp_rep2 27 #if defined key_mpp_rep 30 28 MODULE PROCEDURE mpp_sum_cmpx 31 29 #else … … 116 114 END FUNCTION glob_sum_3d_a 117 115 118 #if defined key_mpp_rep 2116 #if defined key_mpp_rep 119 117 FUNCTION mpp_sum_cmpx( pval ) 120 118 !!---------------------------------------------------------------------- … … 182 180 #endif 183 181 184 #if defined key_mpp_rep1185 FUNCTION mpp_sum_indep( pval )186 !!----------------------------------------------------------------------187 !! *** ROUTINE mpp_sum_indep ***188 !!189 !! ** Purpose : Sum all elements in the pval array in190 !! an accurate order-independent way.191 !!192 !! ** Method : The code iterates the compensated summation until the193 !! result is guaranteed to be within 4*eps of the true sum.194 !! It then rounds the result to the nearest floating-point195 !! number whose last three bits are zero, thereby196 !! guaranteeing an order-independent result.197 !!198 !! ** Action : This does only work for MPI.199 !! It does not work for SHMEM. !!200 !! References : M. Fisher (ECMWF): IFS code + personal communication201 !! The algorithm is based on Ogita et al. (2005)202 !! SIAM J. Sci. Computing, Vol.26, No.6, pp1955-1988.203 !! This is based in turn on an algorithm204 !! by Knuth (1969, seminumerical algorithms).205 !!206 !! History :207 !! ! 07-07 (K. Mogensen) Original code heavily based on IFS.208 !!----------------------------------------------------------------------209 REAL(wp) mpp_sum_indep210 REAL(wp), DIMENSION(jpi,jpj), INTENT(IN) :: pval211 !212 REAL(wp), DIMENSION(3) :: zbuffl213 REAL(wp), DIMENSION(:), ALLOCATABLE :: zpsums, zperrs, zpcors, zbuffg, zp214 REAL(wp) :: zcorr, zerr, zolderr, zbeta, zres215 INTEGER, DIMENSION(:), allocatable :: irecv, istart216 INTEGER :: ikn, jj217 218 ! initialise to avoid uninitialised variables trapping of some compilers to complain.219 zres = 0.0_wp ; zerr = 0.0_wp ; zbuffl(:) = 0.0_wp220 ! Get global number of elements221 ikn = SIZE(pval)222 # ifdef key_mpp223 CALL mpp_sum( ikn )224 # endif225 ! Check that the the algorithm can work226 227 IF ( ( REAL( 2 * ikn ) * EPSILON( zres ) ) >= 1.0 ) THEN228 CALL ctl_stop('mpp_sum_indep:', &229 & 'size of array is too large to guarantee error bounds')230 ENDIF231 232 ALLOCATE( &233 & zp(MAX(ikn,1)), &234 & zbuffg(jpnij*SIZE(zbuffl)), &235 & zpsums(jpnij), &236 & zperrs(jpnij), &237 & zpcors(jpnij) &238 & )239 240 zolderr = HUGE(zerr)241 242 ! Copy the input array. This avoids some tricky indexing, at the243 ! expense of some inefficency.244 245 IF ( ikn > 0 ) THEN246 zp(:) = RESHAPE(pval, (/ jpi * jpj /) )247 ELSE248 zp(1) = 0.0_wp249 ENDIF250 251 k_loop: DO252 253 ! Transform local arrays254 255 IF ( ikn > 0 ) THEN256 CALL comp_sum ( zp, ikn, zcorr, zerr )257 ENDIF258 259 ! Gather partial sums and error bounds to all processors260 261 zbuffl(1) = zp(MAX(ikn,1))262 263 IF ( ikn > 0 ) THEN264 zbuffl(2) = zerr265 zbuffl(3) = zcorr266 ELSE267 zbuffl(2) = 0.0_wp268 zbuffl(3) = 0.0_wp269 ENDIF270 271 IF ( jpnij > 1 ) THEN272 ALLOCATE( &273 & irecv(jpnij), &274 & istart(jpnij) &275 & )276 CALL mpp_allgatherv( zbuffl, SIZE(zbuffl), &277 & zbuffg, jpnij * SIZE(zbuffl), irecv, istart )278 DEALLOCATE( &279 & irecv, &280 & istart &281 & )282 283 DO jj = 1, jpnij284 zpsums(jj) = zbuffg(1+(jj-1)*SIZE(zbuffl))285 zperrs(jj) = zbuffg(2+(jj-1)*SIZE(zbuffl))286 zpcors(jj) = zbuffg(3+(jj-1)*SIZE(zbuffl))287 END DO288 289 ELSE290 zpsums(1) = zbuffl(1)291 zperrs(1) = zbuffl(2)292 zpcors(1) = zbuffl(3)293 ENDIF294 295 ! Transform partial sums296 CALL comp_sum( zpsums, jpnij, zcorr, zerr )297 zerr = zerr + SUM(zperrs)298 zcorr = zcorr + SUM(zpcors)299 300 ! Calculate final result301 zres = zpsums(jpnij) + zcorr302 303 ! Calculate error bound. This is corollary 4.7 from Ogita et al.304 ! (2005)305 zbeta = zerr *( REAL( 2*ikn, wp ) * EPSILON(zres) ) &306 & /(1.0_wp - REAL( 2*ikn, wp ) * EPSILON(zres) )307 308 zerr = EPSILON(zres) * ABS(zres) &309 & +(zbeta + ( 2.0_wp * EPSILON(zres) * EPSILON(zres) * ABS(zres) &310 & +3.0_wp * TINY(zres) ) )311 312 ! Update the last element of the local array313 zp(MAX(ikn,1)) = zpsums(nproc+1)314 315 ! Exit if the global error is small enough316 IF ( zerr < 4.0_wp * SPACING(zres) ) EXIT k_loop317 318 ! Take appropriate action if ZRES cannot be sufficiently refined.319 IF (zerr >= zolderr) THEN320 CALL ctl_stop('Failed to refine sum', &321 & 'Warning: Possiblity of non-reproducible results')322 ENDIF323 324 zolderr = zerr325 326 ENDDO k_loop327 328 ! At this stage, we have guaranteed that ZRES less than 4*EPS329 ! away from the exact sum. There are only four floating point330 ! numbers in this range. So, if we find the nearest number that331 ! has its last three bits zero, then we have a reproducible result.332 333 mpp_sum_indep = fround(zres)334 335 DEALLOCATE( &336 & zpcors, &337 & zperrs, &338 & zpsums, &339 & zbuffg, &340 & zp &341 & )342 343 END FUNCTION mpp_sum_indep344 345 SUBROUTINE comp_sum( pval, kn, pcorr, perr )346 !!----------------------------------------------------------------------347 !! *** ROUTINE comp_sum ***348 !!349 !! ** Purpose : To perform compensated (i.e. accurate) summation.350 !!351 !! ** Method : These routines transform the elements of the array P,352 !! such that:353 !! 1) pval(kn) contains sum(pval)354 !! 2) pval(1)...pval(kn-1) contain the rounding errors355 !! that were made in calculating sum(pval).356 !! 3) The exact sum of the elements of pval is unmodified.357 !! On return, pcorr contains the sum of the rounding errors,358 !! perr contains the sum of their absolute values.359 !! After calling this routine, an accurate sum of the360 !! elements of pval can be calculated as res=pval(n)+pcorr.361 !!362 !! ** Action :363 !!364 !! References : M. Fisher (ECMWF) IFS code + personal communications365 !!366 !! History :367 !! ! 07-07 (K. Mogensen) Original code heavily based on IFS368 !!--------------------------------------------------------------------369 INTEGER, INTENT(IN) :: kn ! Number of elements in input array370 REAL(wp), DIMENSION(kn), INTENT(INOUT) :: pval ! Input array to be sum on input371 ! pval(kn) = sum (pval) on output372 ! pval(1)...pval(kn-1) = rounding errors on output373 REAL(wp) :: pcorr ! Sum of rounding errors374 REAL(wp) :: perr ! Sum of absolute rounding errors375 !! * Local declarations376 REAL(wp) :: zx, zz, zpsum377 INTEGER :: jj378 379 pcorr = 0.0_wp380 perr = 0.0_wp381 382 zpsum = pval(1)383 384 DO jj = 2, kn385 386 ! It is vital that these 4 lines are not optimized in any way that387 ! changes the results.388 zx = pval(jj) + zpsum389 zz = zx - pval(jj)390 pval(jj-1) = ( pval(jj) - ( zx - zz ) ) + ( zpsum - zz )391 zpsum = zx392 393 ! Accumulate the correction and the error394 pcorr = pcorr + pval(jj-1)395 perr = perr + ABS( pval(jj-1) )396 397 END DO398 399 pval(kn) = zpsum400 401 END SUBROUTINE comp_sum402 403 FUNCTION fround(pres)404 !!----------------------------------------------------------------------405 !! *** ROUTINE fround ***406 !!407 !! ** Purpose : Rounding of floating-point number408 !!409 !! ** Method : Returns the value of PRES rounded to the nearest410 !! floating-point number that has its last three bits zero411 !! This works on big-endian and little-endian machines.412 !!413 !! ** Action :414 !!415 !! References : M. Fisher (ECMWF) IFS code + personal communication416 !!417 !! History :418 !! ! 07-07 (K. Mogensen) Original code heavily based on IFS.419 !!----------------------------------------------------------------------420 REAL(wp) fround421 REAL(wp), INTENT(IN) :: pres ! Value to be rounded422 !423 REAL(wp) :: zz(2), zup, zdown424 INTEGER :: ii(2), iequiv(8), ints_per_real, i_low_word425 INTEGER :: jj426 427 ii(:) = 1428 zz(:) = 1.0_wp429 430 ! Warning: If wp = 64 bits (or 32 bits for key_sp) this will not work.431 432 #if defined key_sp433 ints_per_real = 32 / BIT_SIZE(ii)434 #else435 ints_per_real = 64 / BIT_SIZE(ii)436 #endif437 438 ! Test whether big-endian or little-endian439 440 zup = -1.0_wp441 iequiv(1:ints_per_real) = TRANSFER(zup,iequiv(1:ints_per_real))442 443 IF ( iequiv(1) == 0 ) THEN444 i_low_word = 1 ! Little-endian445 ELSE446 i_low_word = ints_per_real ! Big-endian447 ENDIF448 449 ! Find the nearest number with all 3 lowest-order bits zeroed450 451 iequiv(1:ints_per_real) = transfer(pres,iequiv(1:ints_per_real))452 zup = pres453 zdown = pres454 455 IF (IBITS(iequiv(i_low_word),0,3)/=0) THEN456 457 DO jj = 1, 4458 459 zup = NEAREST( zup, 1.0_wp )460 iequiv(1:ints_per_real) = TRANSFER( zup, iequiv(1:ints_per_real) )461 IF ( IBITS( iequiv(i_low_word), 0, 3 ) == 0 ) EXIT462 463 zdown = NEAREST( zdown, -1.0 )464 iequiv(1:ints_per_real) = TRANSFER( zdown, iequiv(1:ints_per_real))465 IF ( IBITS( iequiv(i_low_word),0,3) == 0 ) EXIT466 467 END DO468 469 IF ( IBITS( iequiv( i_low_word ), 0, 3) /= 0 ) THEN470 CALL ctl_stop('Fround:','This is not possible')471 ENDIF472 473 ENDIF474 475 fround = TRANSFER( iequiv(1:ints_per_real), pres )476 477 END FUNCTION fround478 #endif479 480 481 182 #if defined key_nosignedzero 482 183 FUNCTION SIGN_SCALAR(pa,pb) -
branches/nemo_v3_3_beta/NEMOGCM/NEMO/OPA_SRC/lib_mpp.F90
r2287 r2304 73 73 PUBLIC mppsize, mpprank 74 74 75 # if defined key_mpp_rep176 PUBLIC mpp_allgatherv77 # endif78 79 75 !! * Interfaces 80 76 !! define generic interface for these routine as they are called sometimes … … 88 84 END INTERFACE 89 85 INTERFACE mpp_sum 90 # if defined key_mpp_rep 286 # if defined key_mpp_rep 91 87 MODULE PROCEDURE mppsum_a_int, mppsum_int, mppsum_a_real, mppsum_real, & 92 88 mppsum_realdd, mppsum_a_realdd … … 105 101 END INTERFACE 106 102 107 # if defined key_mpp_rep1108 INTERFACE mpp_allgatherv109 MODULE PROCEDURE mpp_allgatherv_real, mpp_allgatherv_int110 END INTERFACE111 # endif112 113 103 !! ========================= !! 114 104 !! MPI variable definition !! … … 289 279 mynode = mpprank 290 280 ! 291 #if defined key_mpp_rep 2281 #if defined key_mpp_rep 292 282 CALL MPI_OP_CREATE(DDPDD_MPI, .TRUE., MPI_SUMDD, ierr) 293 283 #endif … … 1430 1420 END SUBROUTINE mppsum_real 1431 1421 1432 # if defined key_mpp_rep 21422 # if defined key_mpp_rep 1433 1423 SUBROUTINE mppsum_realdd( ytab, kcom ) 1434 1424 !!---------------------------------------------------------------------- … … 2352 2342 END SUBROUTINE mpi_init_opa 2353 2343 2354 #if defined key_mpp_rep1 2355 SUBROUTINE mpp_allgatherv_real( pvalsin, knoin, pvalsout, ksizeout, & 2356 & knoout, kstartout ) 2357 !!---------------------------------------------------------------------- 2358 !! *** ROUTINE mpp_allgatherv_real *** 2359 !! 2360 !! ** Purpose : Gather a real array on all processors 2361 !! 2362 !! ** Method : MPI all gatherv 2363 !! 2364 !! ** Action : This does only work for MPI. 2365 !! It does not work for SHMEM. 2366 !! 2367 !! References : http://www.mpi-forum.org 2368 !! 2369 !! History : 2370 !! ! 08-08 (K. Mogensen) Original code 2371 !!---------------------------------------------------------------------- 2372 2373 !! * Arguments 2374 INTEGER, INTENT(IN) :: & 2375 & knoin, & 2376 & ksizeout 2377 REAL(wp), DIMENSION(knoin), INTENT(IN) :: & 2378 & pvalsin 2379 REAL(wp), DIMENSION(ksizeout), INTENT(OUT) :: & 2380 & pvalsout 2381 INTEGER, DIMENSION(jpnij), INTENT(OUT) :: & 2382 & kstartout, & 2383 & knoout 2384 2385 !! * Local declarations 2386 INTEGER :: & 2387 & ierr 2388 INTEGER :: & 2389 & ji 2390 !----------------------------------------------------------------------- 2391 ! Call the MPI library to get number of data per processor 2392 !----------------------------------------------------------------------- 2393 CALL mpi_allgather( knoin, 1, mpi_integer, & 2394 & knoout, 1, mpi_integer, & 2395 & mpi_comm_opa, ierr ) 2396 !----------------------------------------------------------------------- 2397 ! Compute starts of each processors contribution 2398 !----------------------------------------------------------------------- 2399 kstartout(1) = 0 2400 DO ji = 2, jpnij 2401 kstartout(ji) = kstartout(ji-1) + knoout(ji-1) 2402 ENDDO 2403 !----------------------------------------------------------------------- 2404 ! Call the MPI library to do the gathering of the data 2405 !----------------------------------------------------------------------- 2406 CALL mpi_allgatherv( pvalsin, knoin, MPI_DOUBLE_PRECISION, & 2407 & pvalsout, knoout, kstartout, MPI_DOUBLE_PRECISION, & 2408 & mpi_comm_opa, ierr ) 2409 2410 END SUBROUTINE mpp_allgatherv_real 2411 2412 SUBROUTINE mpp_allgatherv_int( kvalsin, knoin, kvalsout, ksizeout, & 2413 & knoout, kstartout ) 2414 !!---------------------------------------------------------------------- 2415 !! *** ROUTINE mpp_allgatherv *** 2416 !! 2417 !! ** Purpose : Gather an integer array on all processors 2418 !! 2419 !! ** Method : MPI all gatherv 2420 !! 2421 !! ** Action : This does only work for MPI. 2422 !! It does not work for SHMEM. 2423 !! 2424 !! References : http://www.mpi-forum.org 2425 !! 2426 !! History : 2427 !! ! 06-07 (K. Mogensen) Original code 2428 !!---------------------------------------------------------------------- 2429 2430 !! * Arguments 2431 INTEGER, INTENT(IN) :: & 2432 & knoin, & 2433 & ksizeout 2434 INTEGER, DIMENSION(knoin), INTENT(IN) :: & 2435 & kvalsin 2436 INTEGER, DIMENSION(ksizeout), INTENT(OUT) :: & 2437 & kvalsout 2438 INTEGER, DIMENSION(jpnij), INTENT(OUT) :: & 2439 & kstartout, & 2440 & knoout 2441 2442 !! * Local declarations 2443 INTEGER :: & 2444 & ierr 2445 INTEGER :: & 2446 & ji 2447 !----------------------------------------------------------------------- 2448 ! Call the MPI library to get number of data per processor 2449 !----------------------------------------------------------------------- 2450 CALL mpi_allgather( knoin, 1, mpi_integer, & 2451 & knoout, 1, mpi_integer, & 2452 & mpi_comm_opa, ierr ) 2453 !----------------------------------------------------------------------- 2454 ! Compute starts of each processors contribution 2455 !----------------------------------------------------------------------- 2456 kstartout(1) = 0 2457 DO ji = 2, jpnij 2458 kstartout(ji) = kstartout(ji-1) + knoout(ji-1) 2459 ENDDO 2460 !----------------------------------------------------------------------- 2461 ! Call the MPI library to do the gathering of the data 2462 !----------------------------------------------------------------------- 2463 CALL mpi_allgatherv( kvalsin, knoin, mpi_integer, & 2464 & kvalsout, knoout, kstartout, mpi_integer, & 2465 & mpi_comm_opa, ierr ) 2466 2467 END SUBROUTINE mpp_allgatherv_int 2468 #endif 2469 2470 #if defined key_mpp_rep2 2344 #if defined key_mpp_rep 2471 2345 SUBROUTINE DDPDD_MPI (ydda, yddb, ilen, itype) 2472 2346 !!--------------------------------------------------------------------- … … 2503 2377 !! Default case: Dummy module share memory computing 2504 2378 !!---------------------------------------------------------------------- 2505 # if defined key_mpp_rep12506 USE par_kind2507 USE par_oce2508 2509 PUBLIC mpp_allgatherv2510 # endif2511 2379 2512 2380 INTERFACE mpp_sum … … 2530 2398 END INTERFACE 2531 2399 2532 # if defined key_mpp_rep12533 INTERFACE mpp_allgatherv2534 MODULE PROCEDURE mpp_allgatherv_real, mpp_allgatherv_int2535 END INTERFACE2536 # endif2537 2538 2539 2400 LOGICAL, PUBLIC, PARAMETER :: lk_mpp = .FALSE. !: mpp flag 2540 2401 INTEGER :: ncomm_ice … … 2719 2580 WRITE(*,*) 'mpp_comm_free: You should not have seen this print! error?', kcom 2720 2581 END SUBROUTINE mpp_comm_free 2721 2722 # if defined key_mpp_rep12723 SUBROUTINE mpp_allgatherv_real( pvalsin, knoin, pvalsout, ksizeout, &2724 & knoout, kstartout )2725 INTEGER, INTENT(IN) :: &2726 & knoin, &2727 & ksizeout2728 REAL(wp), DIMENSION(knoin), INTENT(IN) :: &2729 & pvalsin2730 REAL(wp), DIMENSION(ksizeout), INTENT(OUT) :: &2731 & pvalsout2732 INTEGER, DIMENSION(jpnij), INTENT(OUT) :: &2733 & kstartout, &2734 & knoout2735 pvalsout(1:knoin) = pvalsin(1:knoin)2736 kstartout(1) = 02737 knoout(1) = knoin2738 END SUBROUTINE mpp_allgatherv_real2739 2740 SUBROUTINE mpp_allgatherv_int( kvalsin, knoin, kvalsout, ksizeout, &2741 & knoout, kstartout )2742 INTEGER, INTENT(IN) :: &2743 & knoin, &2744 & ksizeout2745 INTEGER, DIMENSION(knoin), INTENT(IN) :: &2746 & kvalsin2747 INTEGER, DIMENSION(ksizeout), INTENT(OUT) :: &2748 & kvalsout2749 INTEGER, DIMENSION(jpnij), INTENT(OUT) :: &2750 & kstartout, &2751 & knoout2752 2753 kvalsout(1:knoin) = kvalsin(1:knoin)2754 kstartout(1) = 02755 knoout(1) = knoin2756 END SUBROUTINE mpp_allgatherv_int2757 # endif2758 2759 2582 #endif 2760 2583 !!----------------------------------------------------------------------
Note: See TracChangeset
for help on using the changeset viewer.