Context Navigation

mppsum.F90 @ 2587

Last change on this file since 2587 was 2587, checked in by vidard, 13 years ago
refer to ticket #798
Property svn:executable set to ``*
File size: 11.8 KB

Line
1	MODULE mppsum
2	!!======================================================================
3	!! * MODULE mpp_sum *
4	!! NEMO: Summation of arrays across processors
5	!!======================================================================
6
7	!!----------------------------------------------------------------------
8	!! mppsum : Order independent MPP reproducible sum
9	!!----------------------------------------------------------------------
10	!! * Modules used
11	USE par_kind, ONLY : & ! Precision variables
12	& wp
13	USE dom_oce, ONLY : & ! Ocean space and time domain variables
14	& nproc
15	USE par_oce, ONLY : & ! Ocean parameters
16	& jpnij
17	USE lib_mpp
18	USE mppallgatherv
19	USE in_out_manager
20
21	IMPLICIT NONE
22
23	!! * Routine accessibility
24	PRIVATE
25
26	PUBLIC &
27	& mpp_sum_indep, & ! Order independent MPP reproducible sum
28	& comp_sum, & ! Perform compensated (i.e. accurate) summation.
29	& fround ! Rounding of floating-point number
30
31	CONTAINS
32
33	FUNCTION mpp_sum_indep( pval, kn )
34	!!----------------------------------------------------------------------
35	!! * ROUTINE mpp_sum_indep *
36	!!
37	!! ** Purpose : Sum all elements in the pval array in
38	!! an accurate order-independent way.
39	!!
40	!! ** Method : The code iterates the compensated summation until the
41	!! result is guaranteed to be within 4*eps of the true sum.
42	!! It then rounds the result to the nearest floating-point
43	!! number whose last three bits are zero, thereby
44	!! guaranteeing an order-independent result.
45	!!
46	!! ** Action : This does only work for MPI.
47	!! It does not work for SHMEM.
48	!!
49	!! References : M. Fisher (ECMWF): IFS code + personal communication
50	!! The algorithm is based on Ogita et al. (2005)
51	!! SIAM J. Sci. Computing, Vol.26, No.6, pp1955-1988.
52	!! This is based in turn on an algorithm
53	!! by Knuth (1969, seminumerical algorithms).
54	!!
55	!! History :
56	!! ! 07-07 (K. Mogensen) Original code heavily based on IFS.
57	!!----------------------------------------------------------------------
58	!! * Function return
59	REAL(wp) mpp_sum_indep
60	!! * Arguments
61	INTEGER, INTENT(IN) :: &
62	& kn
63	REAL(wp), DIMENSION(kn), INTENT(IN) :: &
64	& pval
65	!! * Local declarations
66	REAL(wp), DIMENSION(3) ::&
67	& zbuffl
68	REAL(wp), DIMENSION(:), ALLOCATABLE :: &
69	& zpsums, &
70	& zperrs, &
71	& zpcors, &
72	& zbuffg, &
73	& zp
74	REAL(wp) :: &
75	& zcorr, &
76	& zerr, &
77	& zolderr, &
78	& zbeta, &
79	& zres
80	INTEGER, DIMENSION(:), allocatable :: &
81	& irecv, &
82	& istart
83	INTEGER :: &
84	& ing
85	INTEGER :: &
86	& jj
87
88	! initialise to avoid uninitialised variables trapping of some compilers to complain.
89	zres = 0.0_wp ; zerr = 0.0_wp ; zbuffl(:) = 0.0_wp
90	! Get global number of elements
91	ing = kn
92	#ifdef key_mpp
93	CALL mpp_sum( ing )
94	#endif
95	! Check that the the algorithm can work
96
97	IF ( ( REAL( 2 * ing ) * EPSILON( zres ) ) >= 1.0 ) THEN
98
99	CALL ctl_stop('mpp_sum_indep:', &
100	& 'kn is too large to guarantee error bounds')
101
102	ENDIF
103
104	ALLOCATE( &
105	& zp(MAX(kn,1)), &
106	& zbuffg(jpnij*SIZE(zbuffl)), &
107	& zpsums(jpnij), &
108	& zperrs(jpnij), &
109	& zpcors(jpnij) &
110	& )
111
112	zolderr = HUGE(zerr)
113
114	! Copy the input array. This avoids some tricky indexing, at the
115	! expense of some inefficency.
116
117	IF ( kn > 0 ) THEN
118
119	zp(:) = pval(:)
120
121	ELSE
122
123	zp(1) = 0.0_wp
124
125	ENDIF
126
127	k_loop: DO
128
129	! Transform local arrays
130
131	IF ( kn > 0 ) THEN
132
133	CALL comp_sum ( zp, kn, zcorr, zerr )
134
135	ENDIF
136
137	! Gather partial sums and error bounds to all processors
138
139	zbuffl(1) = zp(MAX(kn,1))
140
141	IF ( kn > 0 ) THEN
142
143	zbuffl(2) = zerr
144	zbuffl(3) = zcorr
145
146	ELSE
147
148	zbuffl(2) = 0.0_wp
149	zbuffl(3) = 0.0_wp
150
151	ENDIF
152
153	IF ( jpnij > 1 ) THEN
154
155	ALLOCATE( &
156	& irecv(jpnij), &
157	& istart(jpnij) &
158	& )
159
160	CALL mpp_allgatherv( zbuffl, SIZE(zbuffl), &
161	& zbuffg, jpnij * SIZE(zbuffl), irecv, istart )
162
163	DEALLOCATE( &
164	& irecv, &
165	& istart &
166	& )
167
168	DO jj = 1, jpnij
169
170	zpsums(jj) = zbuffg(1+(jj-1)*SIZE(zbuffl))
171	zperrs(jj) = zbuffg(2+(jj-1)*SIZE(zbuffl))
172	zpcors(jj) = zbuffg(3+(jj-1)*SIZE(zbuffl))
173
174	END DO
175
176	ELSE
177
178	zpsums(1) = zbuffl(1)
179	zperrs(1) = zbuffl(2)
180	zpcors(1) = zbuffl(3)
181
182	ENDIF
183
184	! Transform partial sums
185
186	CALL comp_sum( zpsums, jpnij, zcorr, zerr )
187	zerr = zerr + SUM(zperrs)
188	zcorr = zcorr + SUM(zpcors)
189
190	! Calculate final result
191
192	zres = zpsums(jpnij) + zcorr
193
194	! Calculate error bound. This is corollary 4.7 from Ogita et al.
195	! (2005)
196
197	zbeta = zerr ( REAL( 2ing, wp ) * EPSILON(zres) ) &
198	& /(1.0_wp - REAL( 2ing, wp ) EPSILON(zres) )
199
200	zerr = EPSILON(zres) * ABS(zres) &
201	& +(zbeta + ( 2.0_wp * EPSILON(zres) * EPSILON(zres) * ABS(zres) &
202	& +3.0_wp * TINY(zres) ) )
203
204	! Update the last element of the local array
205
206	zp(MAX(kn,1)) = zpsums(nproc+1)
207
208	! Exit if the global error is small enough
209
210	IF ( zerr < 4.0_wp * SPACING(zres) ) EXIT k_loop
211
212	! Take appropriate action if ZRES cannot be sufficiently refined.
213
214	IF (zerr >= zolderr) THEN
215
216	CALL ctl_stop('Failed to refine sum', &
217	& 'Warning: Possiblity of non-reproducible results')
218
219	ENDIF
220
221	zolderr = zerr
222
223	ENDDO k_loop
224
225	! At this stage, we have guaranteed that ZRES less than 4*EPS
226	! away from the exact sum. There are only four floating point
227	! numbers in this range. So, if we find the nearest number that
228	! has its last three bits zero, then we have a reproducible result.
229
230	mpp_sum_indep = fround(zres)
231
232	DEALLOCATE( &
233	& zpcors, &
234	& zperrs, &
235	& zpsums, &
236	& zbuffg, &
237	& zp &
238	& )
239
240	END FUNCTION mpp_sum_indep
241
242	SUBROUTINE comp_sum( pval, kn, pcorr, perr )
243	!!----------------------------------------------------------------------
244	!! * ROUTINE comp_sum *
245	!!
246	!! ** Purpose : To perform compensated (i.e. accurate) summation.
247	!!
248	!! ** Method : These routines transform the elements of the array P,
249	!! such that:
250	!! 1) pval(kn) contains sum(pval)
251	!! 2) pval(1)...pval(kn-1) contain the rounding errors
252	!! that were made in calculating sum(pval).
253	!! 3) The exact sum of the elements of pval is unmodified.
254	!! On return, pcorr contains the sum of the rounding errors,
255	!! perr contains the sum of their absolute values.
256	!! After calling this routine, an accurate sum of the
257	!! elements of pval can be calculated as res=pval(n)+pcorr.
258	!!
259	!! ** Action :
260	!!
261	!! References : M. Fisher (ECMWF) IFS code + personal communications
262	!!
263	!! History :
264	!! ! 07-07 (K. Mogensen) Original code heavily based on IFS
265	!!----------------------------------------------------------------------
266	!! * Arguments
267	INTEGER, INTENT(IN) :: &
268	& kn ! Number of elements in input array
269	REAL(wp), DIMENSION(kn), INTENT(INOUT) :: &
270	& pval ! Input array to be sum on input
271	! pval(kn) = sum (pval) on output
272	! pval(1)...pval(kn-1) = rounding errors on output
273	REAL(wp) :: &
274	& pcorr, & ! Sum of rounding errors
275	& perr ! Sum of absolute rounding errors
276	!! * Local declarations
277	REAL(wp) :: &
278	& zx, &
279	& zz, &
280	& zpsum
281	integer :: &
282	& jj
283
284	pcorr = 0.0_wp
285	perr = 0.0_wp
286
287	zpsum = pval(1)
288
289	DO jj = 2, kn
290
291	! It is vital that these 4 lines are not optimized in any way that
292	! changes the results.
293
294	zx = pval(jj) + zpsum
295	zz = zx - pval(jj)
296	pval(jj-1) = ( pval(jj) - ( zx - zz ) ) + ( zpsum - zz )
297	zpsum = zx
298
299	! Accumulate the correction and the error
300
301	pcorr = pcorr + pval(jj-1)
302	perr = perr + ABS( pval(jj-1) )
303
304	END DO
305
306	pval(kn) = zpsum
307
308	END SUBROUTINE comp_sum
309
310	FUNCTION fround(pres)
311	!!----------------------------------------------------------------------
312	!! * ROUTINE fround *
313	!!
314	!! ** Purpose : Rounding of floating-point number
315	!!
316	!! ** Method : Returns the value of PRES rounded to the nearest
317	!! floating-point number that has its last three bits zero
318	!! This works on big-endian and little-endian machines.
319	!!
320	!! ** Action :
321	!!
322	!! References : M. Fisher (ECMWF) IFS code + personal communication
323	!!
324	!! History :
325	!! ! 07-07 (K. Mogensen) Original code heavily based on IFS.
326	!!----------------------------------------------------------------------
327	!! * Function return
328	REAL(wp) fround
329	!! * Arguments
330	REAL(wp), INTENT(IN) :: &
331	& pres ! Value to be rounded
332	!! * Local declarations
333	REAL(wp) :: &
334	& zz(2), &
335	& zup, &
336	& zdown
337	INTEGER :: &
338	& ii(2), &
339	& iequiv(8), &
340	& ints_per_real, &
341	& i_low_word
342	INTEGER :: &
343	& jj
344
345	ii(:) = 1
346	zz(:) = 1.0_wp
347
348	! Warning: If wp = 64 bits (or 32 bits for key_sp) this will not work.
349
350	#if defined key_sp
351	ints_per_real = 32 / BIT_SIZE(ii)
352	#else
353	ints_per_real = 64 / BIT_SIZE(ii)
354	#endif
355
356	! Test whether big-endian or little-endian
357
358	zup = -1.0_wp
359	iequiv(1:ints_per_real) = TRANSFER(zup,iequiv(1:ints_per_real))
360
361	IF ( iequiv(1) == 0 ) THEN
362	i_low_word = 1 ! Little-endian
363	ELSE
364	i_low_word = ints_per_real ! Big-endian
365	ENDIF
366
367	! Find the nearest number with all 3 lowest-order bits zeroed
368
369	iequiv(1:ints_per_real) = transfer(pres,iequiv(1:ints_per_real))
370	zup = pres
371	zdown = pres
372
373	IF (IBITS(iequiv(i_low_word),0,3)/=0) THEN
374
375	DO jj = 1, 4
376
377	zup = NEAREST( zup, 1.0_wp )
378	iequiv(1:ints_per_real) = TRANSFER( zup, iequiv(1:ints_per_real) )
379
380	IF ( IBITS( iequiv(i_low_word), 0, 3 ) == 0 ) EXIT
381
382	zdown = NEAREST( zdown, -1.0 )
383
384	iequiv(1:ints_per_real) = TRANSFER( zdown, iequiv(1:ints_per_real))
385
386	IF ( IBITS( iequiv(i_low_word),0,3) == 0 ) EXIT
387
388	END DO
389
390	IF ( IBITS( iequiv( i_low_word ), 0, 3) /= 0 ) THEN
391
392	CALL ctl_stop('Fround:','This is not possible')
393
394	ENDIF
395
396	ENDIF
397
398	fround = TRANSFER( iequiv(1:ints_per_real), pres )
399
400	END FUNCTION fround
401
402	END MODULE mppsum

Note: See TracBrowser for help on using the repository browser.

New URL for NEMO forge! http://forge.nemo-ocean.eu

Context Navigation

source: branches/TAM_V3_0/NEMO/OPA_SRC/mppsum.F90 @ 2587

Download in other formats: