Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

mppsumtam.F90 @ 1885

Last change on this file since 1885 was 1885, checked in by rblod, 14 years ago
add TAM sources
File size: 10.9 KB

Line
1	MODULE mppsumtam
2	!!======================================================================
3	!! * MODULE mppsumtam *
4	!! NEMOTAM: Summation of arrays across processors
5	!!======================================================================
6
7	!!----------------------------------------------------------------------
8	!! mpp_sum_inter : Interface for depending on nmppsum
9	!! mpp_sum_simple : Simple sum
10	!! loc_sum_inter : Interface for depending on nmppsum (local version)
11	!! loc_sum_simple : Simple sum (local version)
12	!! loc_sum_indep : Order independent MPP reproducible sum
13	!!----------------------------------------------------------------------
14	!! * Modules used
15	USE par_kind, ONLY : & ! Precision variables
16	& wp
17	USE dom_oce, ONLY : & ! Ocean space and time domain variables
18	& nproc
19	USE par_oce, ONLY : & ! Ocean parameters
20	& jpnij
21	#if defined key_mpp_mpi
22	USE lib_mpp, ONLY : & ! MPP library
23	& mpi_comm_opa
24	#endif
25	USE lib_mpp
26	USE mpp_tam
27	USE mppsum
28	USE in_out_manager
29
30	IMPLICIT NONE
31
32	!! * Routine accessibility
33	PRIVATE
34
35	PUBLIC &
36	& mpp_sum_inter, & ! Interface for depending on nmppsum
37	& mpp_sum_simple, & ! Simple sum
38	& loc_sum_inter, & ! Interface for depending on nmppsum
39	& loc_sum_simple, & ! Simple sum
40	& loc_sum_indep, & ! Order independent local reproducible sum
41	& nmppsum ! Summation control
42
43	!! * Module variables
44	INTEGER :: nmppsum = 2 ! Choice of MPP sum for
45	! (1 = simple)
46	! (2 = order independent)
47
48	CONTAINS
49
50	FUNCTION mpp_sum_inter( pval, kn )
51	!!----------------------------------------------------------------------
52	!! * ROUTINE mpp_sum_inter *
53	!!
54	!! ** Purpose : Summation of arrays across processors
55	!!
56	!! ** Method : Call either mpp_sum_simple or mpp_sum_indep depending on
57	!! nmppsum
58	!!
59	!! ** Action : This does only work for MPI.
60	!! It does not work for SHMEM.
61	!!
62	!! References : http://www.mpi-forum.org
63	!!
64	!! History :
65	!! ! 07-07 (K. Mogensen) Original code
66	!!----------------------------------------------------------------------
67	!! * Function return
68	REAL(wp) mpp_sum_inter
69	!! * Arguments
70	INTEGER, INTENT(IN) :: &
71	& kn
72	REAL(wp), DIMENSION(kn), INTENT(IN) :: &
73	& pval
74	!! * Local declarations
75
76	IF ( nmppsum == 1 ) THEN
77
78	mpp_sum_inter = mpp_sum_simple( pval, kn )
79
80	ELSEIF ( nmppsum == 2 ) THEN
81
82	mpp_sum_inter = mpp_sum_indep( pval, kn )
83
84	ELSE
85
86	CALL ctl_stop( ' mpp_sum: Invalid nmppsum ' )
87
88	ENDIF
89
90	END FUNCTION mpp_sum_inter
91
92	FUNCTION mpp_sum_simple( pval, kn )
93	!!----------------------------------------------------------------------
94	!! * ROUTINE mpp_sum_simple *
95	!!
96	!! ** Purpose : Summation of arrays across processors
97	!!
98	!! ** Method : This routine is the naive implementation using
99	!! mpi_allreduce for the sum of local data
100	!!
101	!! ** Action : This does only work for MPI.
102	!! It does not work for SHMEM.
103	!!
104	!! References : http://www.mpi-forum.org
105	!!
106	!! History :
107	!! ! 07-07 (K. Mogensen) Original code
108	!!----------------------------------------------------------------------
109	!! * Function return
110	REAL(wp) mpp_sum_simple
111	!! * Arguments
112	INTEGER, INTENT(IN) :: &
113	& kn
114	REAL(wp), DIMENSION(kn), INTENT(IN) :: &
115	& pval
116	!! * Local declarations
117	REAL(wp) :: &
118	& zres
119	INTEGER :: &
120	& ierr
121	INTEGER :: &
122	& ji
123
124	! Compute local sum
125	zres = SUM( pval(1:kn) )
126	! Global sum
127
128	CALL mpp_sum( zres )
129
130	mpp_sum_simple = zres
131
132	END FUNCTION mpp_sum_simple
133
134	FUNCTION loc_sum_inter( pval, kn )
135	!!----------------------------------------------------------------------
136	!! * ROUTINE loc_sum_inter *
137	!!
138	!! ** Purpose : Summation of arrays across processors
139	!!
140	!! ** Method : Call either loc_sum_simple or loc_sum_indep depending on
141	!! nmppsum
142	!!
143	!! ** Action :
144	!!
145	!! References :
146	!!
147	!! History :
148	!! ! 07-09 (K. Mogensen) Original code
149	!!----------------------------------------------------------------------
150	!! * Function return
151	REAL(wp) loc_sum_inter
152	!! * Arguments
153	INTEGER, INTENT(IN) :: &
154	& kn
155	REAL(wp), DIMENSION(kn), INTENT(IN) :: &
156	& pval
157	!! * Local declarations
158
159	IF ( nmppsum == 1 ) THEN
160
161	loc_sum_inter = loc_sum_simple( pval, kn )
162
163	ELSEIF ( nmppsum == 2 ) THEN
164
165	loc_sum_inter = loc_sum_indep( pval, kn )
166
167	ELSE
168
169	CALL ctl_stop( ' loc_sum: Invalid nmppsum = ' )
170
171	ENDIF
172
173	END FUNCTION loc_sum_inter
174
175	FUNCTION loc_sum_simple( pval, kn )
176	!!----------------------------------------------------------------------
177	!! * ROUTINE loc_sum_simple *
178	!!
179	!! ** Purpose : Summation of arrays across processors
180	!!
181	!! ** Method : This routine is the naive implementation for the sum
182	!! of local data
183	!!
184	!! ** Action :
185	!!
186	!! References :
187	!!
188	!! History :
189	!! ! 07-09 (K. Mogensen) Original code
190	!!----------------------------------------------------------------------
191	!! * Function return
192	REAL(wp) loc_sum_simple
193	!! * Arguments
194	INTEGER, INTENT(IN) :: &
195	& kn
196	REAL(wp), DIMENSION(kn), INTENT(IN) :: &
197	& pval
198	!! * Local declarations
199	REAL(wp) :: &
200	& zloc
201	INTEGER :: &
202	& ierr
203	INTEGER :: &
204	& ji
205
206	! Compute local sum
207
208	zloc = SUM( pval(1:kn) )
209
210	loc_sum_simple = zloc
211
212	END FUNCTION loc_sum_simple
213
214	FUNCTION loc_sum_indep( pval, kn )
215	!!----------------------------------------------------------------------
216	!! * ROUTINE loc_sum_indep *
217	!!
218	!! ** Purpose : Sum all elements in the pval array in
219	!! an accurate order-independent way.
220	!!
221	!! ** Method : The code iterates the compensated summation until the
222	!! result is guaranteed to be within 4*eps of the true sum.
223	!! It then rounds the result to the nearest floating-point
224	!! number whose last three bits are zero, thereby
225	!! guaranteeing an order-independent result.
226	!!
227	!! This version is used for local arrays. It is identical
228	!! to the MPP version for clarity.
229	!!
230	!! ** Action :
231	!!
232	!! References : M. Fisher (ECMWF): IFS code + personal communication
233	!! The algorithm is based on Ogita et al. (2005)
234	!! SIAM J. Sci. Computing, Vol.26, No.6, pp1955-1988.
235	!! This is based in turn on an algorithm
236	!! by Knuth (1969, seminumerical algorithms).
237	!!
238	!! History :
239	!! ! 07-09 (K. Mogensen) Original code heavily based on IFS.
240	!!----------------------------------------------------------------------
241	!! * Function return
242	REAL(wp) loc_sum_indep
243	!! * Arguments
244	INTEGER, INTENT(IN) :: &
245	& kn
246	REAL(wp), DIMENSION(kn), INTENT(IN) :: &
247	& pval
248	!! * Local declarations
249	REAL(wp), DIMENSION(3) ::&
250	& zbuffl
251	REAL(wp), DIMENSION(:), ALLOCATABLE :: &
252	& zpsums, &
253	& zperrs, &
254	& zpcors, &
255	& zbuffg, &
256	& zp
257	REAL(wp) :: &
258	& zcorr, &
259	& zerr, &
260	& zolderr, &
261	& zbeta, &
262	& zres
263	INTEGER :: &
264	& jj
265
266	! Check that the the algorithm can work
267
268	IF ( ( REAL( 2 * kn ) * EPSILON( zres ) ) >= 1.0 ) THEN
269
270	CALL ctl_stop( 'kn is too large to guarantee error bounds' )
271
272	ENDIF
273
274	ALLOCATE( &
275	& zp(MAX(kn,1)), &
276	& zbuffg(1*SIZE(zbuffl)), &
277	& zpsums(1), &
278	& zperrs(1), &
279	& zpcors(1) &
280	& )
281
282	zolderr = HUGE(zerr)
283
284	! Copy the input array. This avoids some tricky indexing, at the
285	! expense of some inefficency.
286
287	IF ( kn > 0 ) THEN
288
289	zp(:) = pval(:)
290
291	ELSE
292
293	zp(1) = 0.0_wp
294
295	ENDIF
296
297	k_loop: DO
298
299	! Transform local arrays
300
301	IF ( kn > 0 ) THEN
302
303	CALL comp_sum ( zp, kn, zcorr, zerr )
304
305	ENDIF
306
307	! Gather partial sums and error bounds to all processors
308
309	zbuffl(1) = zp(MAX(kn,1))
310
311	IF ( kn > 0 ) THEN
312
313	zbuffl(2) = zerr
314	zbuffl(3) = zcorr
315
316	ELSE
317
318	zbuffl(2) = 0.0_wp
319	zbuffl(3) = 0.0_wp
320
321	ENDIF
322
323	zpsums(1) = zbuffl(1)
324	zperrs(1) = zbuffl(2)
325	zpcors(1) = zbuffl(3)
326
327	! Transform partial sums
328
329	CALL comp_sum( zpsums, 1, zcorr, zerr )
330	zerr = zerr + SUM(zperrs)
331	zcorr = zcorr + SUM(zpcors)
332
333	! Calculate final result
334
335	zres = zpsums(1) + zcorr
336
337	! Calculate error bound. This is corollary 4.7 from Ogita et al.
338	! (2005)
339
340	zbeta = zerr ( REAL( 2kn, wp ) * EPSILON(zres) ) &
341	& /(1.0_wp - REAL( 2kn, wp ) EPSILON(zres) )
342
343	zerr = EPSILON(zres) * ABS(zres) &
344	& +(zbeta + ( 2.0_wp * EPSILON(zres) * EPSILON(zres) * ABS(zres) &
345	& +3.0_wp * TINY(zres) ) )
346
347	! Update the last element of the local array
348
349	zp(MAX(kn,1)) = zpsums(nproc+1)
350
351	! Exit if the global error is small enough
352
353	IF ( zerr < 4.0_wp * SPACING(zres) ) EXIT k_loop
354
355	! Take appropriate action if ZRES cannot be sufficiently refined.
356
357	IF (zerr >= zolderr) THEN
358
359	CALL ctl_stop( 'Failed to refine sum', &
360	& 'Warning: Possiblity of non-reproducible results' )
361
362	ENDIF
363
364	zolderr = zerr
365
366	ENDDO k_loop
367
368	! At this stage, we have guaranteed that ZRES less than 4*EPS
369	! away from the exact sum. There are only four floating point
370	! numbers in this range. So, if we find the nearest number that
371	! has its last three bits zero, then we have a reproducible result.
372
373	loc_sum_indep = fround(zres)
374
375	DEALLOCATE( &
376	& zpcors, &
377	& zperrs, &
378	& zpsums, &
379	& zbuffg, &
380	& zp &
381	& )
382
383	END FUNCTION loc_sum_indep
384
385
386	END MODULE mppsumtam

Note: See TracBrowser for help on using the repository browser.

New URL for NEMO forge! http://forge.nemo-ocean.eu

Context Navigation

source: branches/TAM_V3_0/NEMOTAM/OPATAM_SRC/mppsumtam.F90 @ 1885

Download in other formats: