Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

mppsum.F90 @ 3032

Last change on this file since 3032 was 2578, checked in by rblod, 13 years ago
first import of NEMOTAM 3.2.2
File size: 11.6 KB

Rev	Line
[2578]	1	MODULE mppsum
	2	!!======================================================================
	3	!! * MODULE mpp_sum *
	4	!! NEMO: Summation of arrays across processors
	5	!!======================================================================
	6
	7	!!----------------------------------------------------------------------
	8	!! mppsum : Order independent MPP reproducible sum
	9	!!----------------------------------------------------------------------
	10	!! * Modules used
	11	USE par_kind, ONLY : & ! Precision variables
	12	& wp
	13	USE dom_oce, ONLY : & ! Ocean space and time domain variables
	14	& nproc
	15	USE par_oce, ONLY : & ! Ocean parameters
	16	& jpnij
	17	USE lib_mpp
	18	USE mppallgatherv
	19	USE in_out_manager
	20
	21	IMPLICIT NONE
	22
	23	!! * Routine accessibility
	24	PRIVATE
	25
	26	PUBLIC &
	27	& mpp_sum_indep, & ! Order independent MPP reproducible sum
	28	& comp_sum, & ! Perform compensated (i.e. accurate) summation.
	29	& fround ! Rounding of floating-point number
	30
	31	CONTAINS
	32
	33	FUNCTION mpp_sum_indep( pval, kn )
	34	!!----------------------------------------------------------------------
	35	!! * ROUTINE mpp_sum_indep *
	36	!!
	37	!! ** Purpose : Sum all elements in the pval array in
	38	!! an accurate order-independent way.
	39	!!
	40	!! ** Method : The code iterates the compensated summation until the
	41	!! result is guaranteed to be within 4*eps of the true sum.
	42	!! It then rounds the result to the nearest floating-point
	43	!! number whose last three bits are zero, thereby
	44	!! guaranteeing an order-independent result.
	45	!!
	46	!! ** Action : This does only work for MPI.
	47	!! It does not work for SHMEM.
	48	!!
	49	!! References : M. Fisher (ECMWF): IFS code + personal communication
	50	!! The algorithm is based on Ogita et al. (2005)
	51	!! SIAM J. Sci. Computing, Vol.26, No.6, pp1955-1988.
	52	!! This is based in turn on an algorithm
	53	!! by Knuth (1969, seminumerical algorithms).
	54	!!
	55	!! History :
	56	!! ! 07-07 (K. Mogensen) Original code heavily based on IFS.
	57	!!----------------------------------------------------------------------
	58	!! * Function return
	59	REAL(wp) mpp_sum_indep
	60	!! * Arguments
	61	INTEGER, INTENT(IN) :: &
	62	& kn
	63	REAL(wp), DIMENSION(kn), INTENT(IN) :: &
	64	& pval
	65	!! * Local declarations
	66	REAL(wp), DIMENSION(3) ::&
	67	& zbuffl
	68	REAL(wp), DIMENSION(:), ALLOCATABLE :: &
	69	& zpsums, &
	70	& zperrs, &
	71	& zpcors, &
	72	& zbuffg, &
	73	& zp
	74	REAL(wp) :: &
	75	& zcorr, &
	76	& zerr, &
	77	& zolderr, &
	78	& zbeta, &
	79	& zres
	80	INTEGER, DIMENSION(:), allocatable :: &
	81	& irecv, &
	82	& istart
	83	INTEGER :: &
	84	& ing
	85	INTEGER :: &
	86	& jj
	87
	88
	89	! Get global number of elements
	90	ing = kn
	91	#ifdef key_mpp
	92	CALL mpp_sum( ing )
	93	#endif
	94	! Check that the the algorithm can work
	95
	96	IF ( ( REAL( 2 * ing ) * EPSILON( zres ) ) >= 1.0 ) THEN
	97
	98	CALL ctl_stop('mpp_sum_indep:', &
	99	& 'kn is too large to guarantee error bounds')
	100
	101	ENDIF
	102
	103	ALLOCATE( &
	104	& zp(MAX(kn,1)), &
	105	& zbuffg(jpnij*SIZE(zbuffl)), &
	106	& zpsums(jpnij), &
	107	& zperrs(jpnij), &
	108	& zpcors(jpnij) &
	109	& )
	110
	111	zolderr = HUGE(zerr)
	112
	113	! Copy the input array. This avoids some tricky indexing, at the
	114	! expense of some inefficency.
	115
	116	IF ( kn > 0 ) THEN
	117
	118	zp(:) = pval(:)
	119
	120	ELSE
	121
	122	zp(1) = 0.0_wp
	123
	124	ENDIF
	125
	126	k_loop: DO
	127
	128	! Transform local arrays
	129
	130	IF ( kn > 0 ) THEN
	131
	132	CALL comp_sum ( zp, kn, zcorr, zerr )
	133
	134	ENDIF
	135
	136	! Gather partial sums and error bounds to all processors
	137
	138	zbuffl(1) = zp(MAX(kn,1))
	139
	140	IF ( kn > 0 ) THEN
	141
	142	zbuffl(2) = zerr
	143	zbuffl(3) = zcorr
	144
	145	ELSE
	146
	147	zbuffl(2) = 0.0_wp
	148	zbuffl(3) = 0.0_wp
	149
	150	ENDIF
	151
	152	IF ( jpnij > 1 ) THEN
	153
	154	ALLOCATE( &
	155	& irecv(jpnij), &
	156	& istart(jpnij) &
	157	& )
	158
	159	CALL mpp_allgatherv( zbuffl, SIZE(zbuffl), &
	160	& zbuffg, jpnij * SIZE(zbuffl), irecv, istart )
	161
	162	DEALLOCATE( &
	163	& irecv, &
	164	& istart &
	165	& )
	166
	167	DO jj = 1, jpnij
	168
	169	zpsums(jj) = zbuffg(1+(jj-1)*SIZE(zbuffl))
	170	zperrs(jj) = zbuffg(2+(jj-1)*SIZE(zbuffl))
	171	zpcors(jj) = zbuffg(3+(jj-1)*SIZE(zbuffl))
	172
	173	END DO
	174
	175	ELSE
	176
	177	zpsums(1) = zbuffl(1)
	178	zperrs(1) = zbuffl(2)
	179	zpcors(1) = zbuffl(3)
	180
	181	ENDIF
	182
	183	! Transform partial sums
	184
	185	CALL comp_sum( zpsums, jpnij, zcorr, zerr )
	186	zerr = zerr + SUM(zperrs)
	187	zcorr = zcorr + SUM(zpcors)
	188
	189	! Calculate final result
	190
	191	zres = zpsums(jpnij) + zcorr
	192
	193	! Calculate error bound. This is corollary 4.7 from Ogita et al.
	194	! (2005)
	195
	196	zbeta = zerr ( REAL( 2ing, wp ) * EPSILON(zres) ) &
	197	& /(1.0_wp - REAL( 2ing, wp ) EPSILON(zres) )
	198
	199	zerr = EPSILON(zres) * ABS(zres) &
	200	& +(zbeta + ( 2.0_wp * EPSILON(zres) * EPSILON(zres) * ABS(zres) &
	201	& +3.0_wp * TINY(zres) ) )
	202
	203	! Update the last element of the local array
	204
	205	zp(MAX(kn,1)) = zpsums(nproc+1)
	206
	207	! Exit if the global error is small enough
	208
	209	IF ( zerr < 4.0_wp * SPACING(zres) ) EXIT k_loop
	210
	211	! Take appropriate action if ZRES cannot be sufficiently refined.
	212
	213	IF (zerr >= zolderr) THEN
	214
	215	CALL ctl_stop('Failed to refine sum', &
	216	& 'Warning: Possiblity of non-reproducible results')
	217
	218	ENDIF
	219
	220	zolderr = zerr
	221
	222	ENDDO k_loop
	223
	224	! At this stage, we have guaranteed that ZRES less than 4*EPS
	225	! away from the exact sum. There are only four floating point
	226	! numbers in this range. So, if we find the nearest number that
	227	! has its last three bits zero, then we have a reproducible result.
	228
	229	mpp_sum_indep = fround(zres)
	230
	231	DEALLOCATE( &
	232	& zpcors, &
	233	& zperrs, &
	234	& zpsums, &
	235	& zbuffg, &
	236	& zp &
	237	& )
	238
	239	END FUNCTION mpp_sum_indep
	240
	241	SUBROUTINE comp_sum( pval, kn, pcorr, perr )
	242	!!----------------------------------------------------------------------
	243	!! * ROUTINE comp_sum *
	244	!!
	245	!! ** Purpose : To perform compensated (i.e. accurate) summation.
	246	!!
	247	!! ** Method : These routines transform the elements of the array P,
	248	!! such that:
	249	!! 1) pval(kn) contains sum(pval)
	250	!! 2) pval(1)...pval(kn-1) contain the rounding errors
	251	!! that were made in calculating sum(pval).
	252	!! 3) The exact sum of the elements of pval is unmodified.
	253	!! On return, pcorr contains the sum of the rounding errors,
	254	!! perr contains the sum of their absolute values.
	255	!! After calling this routine, an accurate sum of the
	256	!! elements of pval can be calculated as res=pval(n)+pcorr.
	257	!!
	258	!! ** Action :
	259	!!
	260	!! References : M. Fisher (ECMWF) IFS code + personal communications
	261	!!
	262	!! History :
	263	!! ! 07-07 (K. Mogensen) Original code heavily based on IFS
	264	!!----------------------------------------------------------------------
	265	!! * Arguments
	266	INTEGER, INTENT(IN) :: &
	267	& kn ! Number of elements in input array
	268	REAL(wp), DIMENSION(kn), INTENT(INOUT) :: &
	269	& pval ! Input array to be sum on input
	270	! pval(kn) = sum (pval) on output
	271	! pval(1)...pval(kn-1) = rounding errors on output
	272	REAL(wp) :: &
	273	& pcorr, & ! Sum of rounding errors
	274	& perr ! Sum of absolute rounding errors
	275	!! * Local declarations
	276	REAL(wp) :: &
	277	& zx, &
	278	& zz, &
	279	& zpsum
	280	integer :: &
	281	& jj
	282
	283	pcorr = 0.0_wp
	284	perr = 0.0_wp
	285
	286	zpsum = pval(1)
	287
	288	DO jj = 2, kn
	289
	290	! It is vital that these 4 lines are not optimized in any way that
	291	! changes the results.
	292
	293	zx = pval(jj) + zpsum
	294	zz = zx - pval(jj)
	295	pval(jj-1) = ( pval(jj) - ( zx - zz ) ) + ( zpsum - zz )
	296	zpsum = zx
	297
	298	! Accumulate the correction and the error
	299
	300	pcorr = pcorr + pval(jj-1)
	301	perr = perr + ABS( pval(jj-1) )
	302
	303	END DO
	304
	305	pval(kn) = zpsum
	306
	307	END SUBROUTINE comp_sum
	308
	309	FUNCTION fround(pres)
	310	!!----------------------------------------------------------------------
	311	!! * ROUTINE fround *
	312	!!
	313	!! ** Purpose : Rounding of floating-point number
	314	!!
	315	!! ** Method : Returns the value of PRES rounded to the nearest
	316	!! floating-point number that has its last three bits zero
	317	!! This works on big-endian and little-endian machines.
	318	!!
	319	!! ** Action :
	320	!!
	321	!! References : M. Fisher (ECMWF) IFS code + personal communication
	322	!!
	323	!! History :
	324	!! ! 07-07 (K. Mogensen) Original code heavily based on IFS.
	325	!!----------------------------------------------------------------------
	326	!! * Function return
	327	REAL(wp) fround
	328	!! * Arguments
	329	REAL(wp), INTENT(IN) :: &
	330	& pres ! Value to be rounded
	331	!! * Local declarations
	332	REAL(wp) :: &
	333	& zz(2), &
	334	& zup, &
	335	& zdown
	336	INTEGER :: &
	337	& ii(2), &
	338	& iequiv(8), &
	339	& ints_per_real, &
	340	& i_low_word
	341	INTEGER :: &
	342	& jj
	343
	344	ii(:) = 1
	345	zz(:) = 1.0_wp
	346
	347	! Warning: If wp = 64 bits (or 32 bits for key_sp) this will not work.
	348
	349	#if defined key_sp
	350	ints_per_real = 32 / BIT_SIZE(ii)
	351	#else
	352	ints_per_real = 64 / BIT_SIZE(ii)
	353	#endif
	354
	355	! Test whether big-endian or little-endian
	356
	357	zup = -1.0_wp
	358	iequiv(1:ints_per_real) = TRANSFER(zup,iequiv(1:ints_per_real))
	359
	360	IF ( iequiv(1) == 0 ) THEN
	361	i_low_word = 1 ! Little-endian
	362	ELSE
	363	i_low_word = ints_per_real ! Big-endian
	364	ENDIF
	365
	366	! Find the nearest number with all 3 lowest-order bits zeroed
	367
	368	iequiv(1:ints_per_real) = transfer(pres,iequiv(1:ints_per_real))
	369	zup = pres
	370	zdown = pres
	371
	372	IF (IBITS(iequiv(i_low_word),0,3)/=0) THEN
	373
	374	DO jj = 1, 4
	375
	376	zup = NEAREST( zup, 1.0_wp )
	377	iequiv(1:ints_per_real) = TRANSFER( zup, iequiv(1:ints_per_real) )
	378
	379	IF ( IBITS( iequiv(i_low_word), 0, 3 ) == 0 ) EXIT
	380
	381	zdown = NEAREST( zdown, -1.0 )
	382
	383	iequiv(1:ints_per_real) = TRANSFER( zdown, iequiv(1:ints_per_real))
	384
	385	IF ( IBITS( iequiv(i_low_word),0,3) == 0 ) EXIT
	386
	387	END DO
	388
	389	IF ( IBITS( iequiv( i_low_word ), 0, 3) /= 0 ) THEN
	390
	391	CALL ctl_stop('Fround:','This is not possible')
	392
	393	ENDIF
	394
	395	ENDIF
	396
	397	fround = TRANSFER( iequiv(1:ints_per_real), pres )
	398
	399	END FUNCTION fround
	400
	401	END MODULE mppsum

Note: See TracBrowser for help on using the repository browser.

New URL for NEMO forge! http://forge.nemo-ocean.eu

Context Navigation

source: branches/TAM_V3_2_2/NEMO/OPA_SRC/mppsum.F90 @ 3032

Download in other formats: