1 | !----------------------------------------------------------------------- |
---|
2 | ! Communication for message-passing codes |
---|
3 | ! |
---|
4 | ! AUTHOR: V. Balaji (vbalaji@noaa.gov) |
---|
5 | ! Princeton University/GFDL |
---|
6 | ! |
---|
7 | ! MODIFICATIONS: Reiner Vogelsang (reiner@sgi.com) |
---|
8 | ! |
---|
9 | ! This program is free software; The author agrees that you can |
---|
10 | ! redistribute and/or modify this version of the program under the |
---|
11 | ! terms of the Lesser GNU General Public License as published |
---|
12 | ! by the Free Software Foundation. |
---|
13 | ! |
---|
14 | ! This program is distributed in the hope that it will be useful, |
---|
15 | ! but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
16 | ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
17 | ! Lesser GNU General Public License for more details |
---|
18 | ! (http://www.gnu.org/copyleft/lesser.html). |
---|
19 | !----------------------------------------------------------------------- |
---|
20 | |
---|
21 | !these are used to determine hardware/OS/compiler |
---|
22 | !#include <os.h> |
---|
23 | |
---|
24 | !onlysgi_mipspro one of SMA or MPI can be used |
---|
25 | !(though mixing calls is allowed, this module will not) |
---|
26 | #ifdef use_libSMA |
---|
27 | #undef use_libMPI |
---|
28 | #endif |
---|
29 | ! |
---|
30 | #if defined(_CRAYT3E) || defined(_CRAYT3D) || defined(sgi_mipspro) |
---|
31 | #define SGICRAY_MPP |
---|
32 | #endif |
---|
33 | ! |
---|
34 | !shmalloc is used on MPP SGI/Cray systems for shmem |
---|
35 | #if defined(use_libSMA) && defined(SGICRAY_MPP) |
---|
36 | #define use_shmalloc |
---|
37 | #endif |
---|
38 | |
---|
39 | module mpp_mod |
---|
40 | use mod_kinds_model |
---|
41 | #include <os.h> |
---|
42 | !string BWA is used to tag lines that are bug workarounds and will disappear |
---|
43 | !when offending compiler bug is fixed |
---|
44 | !a generalized communication package for use with shmem and MPI |
---|
45 | !will add: co_array_fortran, MPI2 |
---|
46 | !Balaji (vb@gfdl.gov) 11 May 1998 |
---|
47 | #ifdef sgi_mipspro |
---|
48 | #ifdef use_libSMA |
---|
49 | use shmem_interface |
---|
50 | #endif |
---|
51 | !#ifdef use_libMPI |
---|
52 | ! use mpi |
---|
53 | !#endif |
---|
54 | #endif |
---|
55 | implicit none |
---|
56 | private |
---|
57 | character(len=128), private :: version= & |
---|
58 | '$Id: mpp_mod.F90,v 1.1.1.1 2005/03/23 16:01:11 adm Exp $' |
---|
59 | character(len=128), private :: tagname= & |
---|
60 | '$Name: ipslcm5a $' |
---|
61 | |
---|
62 | !various lengths (see shpalloc) are estimated in "words" which are 32bit on SGI, 64bit on Cray |
---|
63 | !these are also the expected sizeof of args to MPI/shmem libraries |
---|
64 | #ifdef _CRAY |
---|
65 | integer(LONG_KIND), private :: word(1) |
---|
66 | #endif |
---|
67 | #ifdef sgi_mipspro |
---|
68 | integer(INT_KIND), private :: word(1) |
---|
69 | #endif |
---|
70 | |
---|
71 | #ifdef SGICRAY |
---|
72 | !see intro_io(3F): to see why these values are used rather than 5,6,0 |
---|
73 | integer, private :: in_unit=100, out_unit=101, err_unit=102 |
---|
74 | #else |
---|
75 | integer, private :: in_unit=5, out_unit=6, err_unit=0 |
---|
76 | #endif |
---|
77 | integer :: log_unit, etc_unit |
---|
78 | logical, private :: module_is_initialized=.FALSE. |
---|
79 | integer, private :: pe=0, node=0, npes=1, root_pe=0 |
---|
80 | integer, private :: error |
---|
81 | integer, parameter, private :: MAXPES=2048 !used for dimensioning stuff that might be indexed by pe |
---|
82 | character(len=32) :: configfile='logfile.out' |
---|
83 | character(len=32) :: etcfile='._mpp.nonrootpe.stdout' |
---|
84 | logical,save::logfile_defined=.false.,opened |
---|
85 | integer::io_num |
---|
86 | |
---|
87 | !initialization flags |
---|
88 | integer, parameter, public :: MPP_VERBOSE=1, MPP_DEBUG=2 |
---|
89 | logical, private :: verbose=.FALSE., debug=.FALSE. |
---|
90 | |
---|
91 | !flags to transmit routines |
---|
92 | integer, parameter, public :: ALL_PES=-1, ANY_PE=-2, NULL_PE=-3 |
---|
93 | |
---|
94 | !errortype flags |
---|
95 | integer, parameter, public :: NOTE=0, WARNING=1, FATAL=2 |
---|
96 | logical, private :: warnings_are_fatal = .FALSE. |
---|
97 | integer, private :: error_state=0 |
---|
98 | |
---|
99 | integer(LONG_KIND), parameter, private :: MPP_WAIT=-1, MPP_READY=-2 |
---|
100 | #ifdef use_libSMA |
---|
101 | #include <mpp/shmem.fh> |
---|
102 | integer :: sync(SHMEM_REDUCE_SYNC_SIZE+SHMEM_BCAST_SYNC_SIZE+SHMEM_BARRIER_SYNC_SIZE) |
---|
103 | !status and remote_data_loc are used to synchronize communication is MPP_TRANSMIT |
---|
104 | #ifdef use_shmalloc |
---|
105 | integer(LONG_KIND), private, dimension(0:MAXPES) :: status, remote_data_loc |
---|
106 | #else |
---|
107 | integer(LONG_KIND), private, allocatable, dimension(:) :: status, remote_data_loc |
---|
108 | #endif |
---|
109 | integer, private :: mpp_from_pe !used to announce from where data is coming from |
---|
110 | #ifdef use_shmalloc |
---|
111 | !we call shpalloc in mpp_init() to ensure all these are remotely accessible |
---|
112 | !on PVP where shpalloc doesn't exist, module variables are automatically |
---|
113 | !guaranteed to be remotely accessible |
---|
114 | pointer( ptr_sync, sync ) |
---|
115 | pointer( ptr_status, status ) |
---|
116 | pointer( ptr_from, mpp_from_pe ) |
---|
117 | pointer( ptr_remote, remote_data_loc ) |
---|
118 | #endif |
---|
119 | #endif /* use_libSMA */ |
---|
120 | #ifdef use_libMPI |
---|
121 | !#ifndef sgi_mipspro |
---|
122 | !sgi_mipspro gets this from 'use mpi' |
---|
123 | #include <mpif.h> |
---|
124 | !#endif |
---|
125 | !tag is never used, but must be initialized to non-negative integer |
---|
126 | integer, private :: tag=1, stat(MPI_STATUS_SIZE) |
---|
127 | ! integer, private, allocatable :: request(:) |
---|
128 | integer, public, allocatable :: request(:) |
---|
129 | #ifdef _CRAYT3E |
---|
130 | !BWA: mpif.h on t3e currently does not contain MPI_INTEGER8 datatype |
---|
131 | !(O2k and t90 do) |
---|
132 | !(t3e: fixed on 3.3 I believe) |
---|
133 | integer, parameter :: MPI_INTEGER8=MPI_INTEGER |
---|
134 | #endif |
---|
135 | #endif /* use_libMPI */ |
---|
136 | |
---|
137 | !mpp_stack is used by SHMEM collective ops |
---|
138 | !must be SHPALLOC'd on SGICRAY_MPP, but is allocatable on PVP |
---|
139 | #ifdef use_shmalloc |
---|
140 | real(DOUBLE_KIND), private :: mpp_stack(1) |
---|
141 | pointer( ptr_stack, mpp_stack ) |
---|
142 | #else |
---|
143 | real(DOUBLE_KIND), private, allocatable :: mpp_stack(:) |
---|
144 | #endif |
---|
145 | integer, private :: mpp_stack_size=0, mpp_stack_hwm=0 |
---|
146 | |
---|
147 | !peset hold communicators as SHMEM-compatible triads (start, log2(stride), num) |
---|
148 | type, private :: communicator |
---|
149 | character(len=32) :: name |
---|
150 | integer, pointer :: list(:) |
---|
151 | integer :: count |
---|
152 | #ifdef use_libSMA |
---|
153 | integer :: start, log2stride |
---|
154 | #elif use_libMPI |
---|
155 | integer :: id, group !MPI communicator and group id for this PE set |
---|
156 | #endif |
---|
157 | end type |
---|
158 | integer, parameter :: PESET_MAX=32 !should be .LE. max num of MPI communicators |
---|
159 | type(communicator) :: peset(0:PESET_MAX) !0 is a dummy used to hold single-PE "self" communicator |
---|
160 | integer :: peset_num=0, current_peset_num=0 |
---|
161 | integer :: world_peset_num !the world communicator |
---|
162 | |
---|
163 | !performance profiling |
---|
164 | ! This profiles every type of MPI/SHMEM call within |
---|
165 | ! a specified region of high-level code |
---|
166 | ! Initialize or retrieve a clock with |
---|
167 | ! id = mpp_clock_id( 'Region identifier name' ) |
---|
168 | ! Then set caliper points around the region using: |
---|
169 | ! call mpp_clock_begin(id) |
---|
170 | ! ... |
---|
171 | ! call mpp_clock_end(id) |
---|
172 | ! mpp_exit will print out the results. |
---|
173 | #ifdef __sgi |
---|
174 | #define SYSTEM_CLOCK system_clock_sgi |
---|
175 | #endif |
---|
176 | |
---|
177 | #ifdef use_libMPI |
---|
178 | #define SYSTEM_CLOCK system_clock_mpi |
---|
179 | #endif |
---|
180 | |
---|
181 | #if defined(__sgi) || defined(use_libMPI) |
---|
182 | integer(LONG_KIND), private :: tick, ticks_per_sec, max_ticks, start_tick, end_tick, tick0=0 |
---|
183 | #else |
---|
184 | integer, private :: tick, ticks_per_sec, max_ticks, start_tick, end_tick, tick0=0 |
---|
185 | #endif |
---|
186 | real, private :: tick_rate |
---|
187 | integer, private, parameter :: MAX_CLOCKS=100, MAX_EVENT_TYPES=5, MAX_EVENTS=40000 |
---|
188 | !event types |
---|
189 | integer, private, parameter :: EVENT_ALLREDUCE=1, EVENT_BROADCAST=2, EVENT_RECV=3, EVENT_SEND=4, EVENT_WAIT=5 |
---|
190 | integer, private :: clock_num=0, current_clock=0 |
---|
191 | integer, private :: clock0 !measures total runtime from mpp_init to mpp_exit |
---|
192 | integer, private :: clock_grain=HUGE(1) |
---|
193 | !the event contains information for each type of event (e.g SHMEM_PUT) |
---|
194 | type, private :: event |
---|
195 | character(len=16) :: name |
---|
196 | integer(LONG_KIND) :: ticks(MAX_EVENTS), bytes(MAX_EVENTS) |
---|
197 | integer :: calls |
---|
198 | end type event |
---|
199 | !a clock contains an array of event profiles for a region |
---|
200 | integer, parameter, public :: MPP_CLOCK_SYNC=1, MPP_CLOCK_DETAILED=2 |
---|
201 | type, private :: clock |
---|
202 | character(len=32) :: name |
---|
203 | #if defined(__sgi) || defined(use_libMPI) |
---|
204 | integer(LONG_KIND) :: tick |
---|
205 | #else |
---|
206 | integer :: tick |
---|
207 | #endif |
---|
208 | integer(LONG_KIND) :: total_ticks |
---|
209 | integer :: peset_num |
---|
210 | logical :: sync_on_begin, detailed |
---|
211 | type(event), pointer :: events(:) !if needed, allocate to MAX_EVENT_TYPES |
---|
212 | end type |
---|
213 | type(clock) :: clocks(MAX_CLOCKS) |
---|
214 | |
---|
215 | integer,parameter :: MAX_BINS=20 |
---|
216 | TYPE :: Clock_Data_Summary |
---|
217 | character(len=16) :: name |
---|
218 | real(DOUBLE_KIND) :: msg_size_sums(MAX_BINS) |
---|
219 | real(DOUBLE_KIND) :: msg_time_sums(MAX_BINS) |
---|
220 | real(DOUBLE_KIND) :: total_data |
---|
221 | real(DOUBLE_KIND) :: total_time |
---|
222 | integer(LONG_KIND) :: msg_size_cnts(MAX_BINS) |
---|
223 | integer(LONG_KIND) :: total_cnts |
---|
224 | END TYPE Clock_Data_Summary |
---|
225 | |
---|
226 | TYPE :: Summary_Struct |
---|
227 | character(len=16) :: name |
---|
228 | type (Clock_Data_Summary) :: event(MAX_EVENT_TYPES) |
---|
229 | END TYPE Summary_Struct |
---|
230 | type(Summary_Struct) :: clock_summary(MAX_CLOCKS) |
---|
231 | |
---|
232 | !public interfaces |
---|
233 | interface mpp_max |
---|
234 | module procedure mpp_max_real8 |
---|
235 | #ifndef no_8byte_integers |
---|
236 | module procedure mpp_max_int8 |
---|
237 | #endif |
---|
238 | #ifndef no_4byte_reals |
---|
239 | module procedure mpp_max_real4 |
---|
240 | #endif |
---|
241 | module procedure mpp_max_int4 |
---|
242 | end interface |
---|
243 | interface mpp_min |
---|
244 | module procedure mpp_min_real8 |
---|
245 | #ifndef no_8byte_integers |
---|
246 | module procedure mpp_min_int8 |
---|
247 | #endif |
---|
248 | #ifndef no_4byte_reals |
---|
249 | module procedure mpp_min_real4 |
---|
250 | #endif |
---|
251 | module procedure mpp_min_int4 |
---|
252 | end interface |
---|
253 | interface mpp_sum |
---|
254 | #ifndef no_8byte_integers |
---|
255 | module procedure mpp_sum_int8 |
---|
256 | module procedure mpp_sum_int8_scalar |
---|
257 | module procedure mpp_sum_int8_2d |
---|
258 | module procedure mpp_sum_int8_3d |
---|
259 | module procedure mpp_sum_int8_4d |
---|
260 | module procedure mpp_sum_int8_5d |
---|
261 | #endif |
---|
262 | module procedure mpp_sum_real8 |
---|
263 | module procedure mpp_sum_real8_scalar |
---|
264 | module procedure mpp_sum_real8_2d |
---|
265 | module procedure mpp_sum_real8_3d |
---|
266 | module procedure mpp_sum_real8_4d |
---|
267 | module procedure mpp_sum_real8_5d |
---|
268 | module procedure mpp_sum_cmplx8 |
---|
269 | module procedure mpp_sum_cmplx8_scalar |
---|
270 | module procedure mpp_sum_cmplx8_2d |
---|
271 | module procedure mpp_sum_cmplx8_3d |
---|
272 | module procedure mpp_sum_cmplx8_4d |
---|
273 | module procedure mpp_sum_cmplx8_5d |
---|
274 | module procedure mpp_sum_int4 |
---|
275 | module procedure mpp_sum_int4_scalar |
---|
276 | module procedure mpp_sum_int4_2d |
---|
277 | module procedure mpp_sum_int4_3d |
---|
278 | module procedure mpp_sum_int4_4d |
---|
279 | module procedure mpp_sum_int4_5d |
---|
280 | #ifndef no_4byte_reals |
---|
281 | module procedure mpp_sum_real4 |
---|
282 | module procedure mpp_sum_real4_scalar |
---|
283 | module procedure mpp_sum_real4_2d |
---|
284 | module procedure mpp_sum_real4_3d |
---|
285 | module procedure mpp_sum_real4_4d |
---|
286 | module procedure mpp_sum_real4_5d |
---|
287 | #endif |
---|
288 | #ifndef no_4byte_cmplx |
---|
289 | module procedure mpp_sum_cmplx4 |
---|
290 | module procedure mpp_sum_cmplx4_scalar |
---|
291 | module procedure mpp_sum_cmplx4_2d |
---|
292 | module procedure mpp_sum_cmplx4_3d |
---|
293 | module procedure mpp_sum_cmplx4_4d |
---|
294 | module procedure mpp_sum_cmplx4_5d |
---|
295 | #endif |
---|
296 | end interface |
---|
297 | interface mpp_transmit |
---|
298 | module procedure mpp_transmit_real8 |
---|
299 | module procedure mpp_transmit_real8_scalar |
---|
300 | module procedure mpp_transmit_real8_2d |
---|
301 | module procedure mpp_transmit_real8_3d |
---|
302 | module procedure mpp_transmit_real8_4d |
---|
303 | module procedure mpp_transmit_real8_5d |
---|
304 | module procedure mpp_transmit_cmplx8 |
---|
305 | module procedure mpp_transmit_cmplx8_scalar |
---|
306 | module procedure mpp_transmit_cmplx8_2d |
---|
307 | module procedure mpp_transmit_cmplx8_3d |
---|
308 | module procedure mpp_transmit_cmplx8_4d |
---|
309 | module procedure mpp_transmit_cmplx8_5d |
---|
310 | #ifndef no_8byte_integers |
---|
311 | module procedure mpp_transmit_int8 |
---|
312 | module procedure mpp_transmit_int8_scalar |
---|
313 | module procedure mpp_transmit_int8_2d |
---|
314 | module procedure mpp_transmit_int8_3d |
---|
315 | module procedure mpp_transmit_int8_4d |
---|
316 | module procedure mpp_transmit_int8_5d |
---|
317 | module procedure mpp_transmit_logical8 |
---|
318 | module procedure mpp_transmit_logical8_scalar |
---|
319 | module procedure mpp_transmit_logical8_2d |
---|
320 | module procedure mpp_transmit_logical8_3d |
---|
321 | module procedure mpp_transmit_logical8_4d |
---|
322 | module procedure mpp_transmit_logical8_5d |
---|
323 | #endif |
---|
324 | #ifndef no_4byte_reals |
---|
325 | module procedure mpp_transmit_real4 |
---|
326 | module procedure mpp_transmit_real4_scalar |
---|
327 | module procedure mpp_transmit_real4_2d |
---|
328 | module procedure mpp_transmit_real4_3d |
---|
329 | module procedure mpp_transmit_real4_4d |
---|
330 | module procedure mpp_transmit_real4_5d |
---|
331 | #endif |
---|
332 | #ifndef no_4byte_cmplx |
---|
333 | module procedure mpp_transmit_cmplx4 |
---|
334 | module procedure mpp_transmit_cmplx4_scalar |
---|
335 | module procedure mpp_transmit_cmplx4_2d |
---|
336 | module procedure mpp_transmit_cmplx4_3d |
---|
337 | module procedure mpp_transmit_cmplx4_4d |
---|
338 | module procedure mpp_transmit_cmplx4_5d |
---|
339 | #endif |
---|
340 | module procedure mpp_transmit_int4 |
---|
341 | module procedure mpp_transmit_int4_scalar |
---|
342 | module procedure mpp_transmit_int4_2d |
---|
343 | module procedure mpp_transmit_int4_3d |
---|
344 | module procedure mpp_transmit_int4_4d |
---|
345 | module procedure mpp_transmit_int4_5d |
---|
346 | module procedure mpp_transmit_logical4 |
---|
347 | module procedure mpp_transmit_logical4_scalar |
---|
348 | module procedure mpp_transmit_logical4_2d |
---|
349 | module procedure mpp_transmit_logical4_3d |
---|
350 | module procedure mpp_transmit_logical4_4d |
---|
351 | module procedure mpp_transmit_logical4_5d |
---|
352 | end interface |
---|
353 | interface mpp_recv |
---|
354 | module procedure mpp_recv_real8 |
---|
355 | module procedure mpp_recv_real8_scalar |
---|
356 | module procedure mpp_recv_real8_2d |
---|
357 | module procedure mpp_recv_real8_3d |
---|
358 | module procedure mpp_recv_real8_4d |
---|
359 | module procedure mpp_recv_real8_5d |
---|
360 | module procedure mpp_recv_cmplx8 |
---|
361 | module procedure mpp_recv_cmplx8_scalar |
---|
362 | module procedure mpp_recv_cmplx8_2d |
---|
363 | module procedure mpp_recv_cmplx8_3d |
---|
364 | module procedure mpp_recv_cmplx8_4d |
---|
365 | module procedure mpp_recv_cmplx8_5d |
---|
366 | #ifndef no_8byte_integers |
---|
367 | module procedure mpp_recv_int8 |
---|
368 | module procedure mpp_recv_int8_scalar |
---|
369 | module procedure mpp_recv_int8_2d |
---|
370 | module procedure mpp_recv_int8_3d |
---|
371 | module procedure mpp_recv_int8_4d |
---|
372 | module procedure mpp_recv_int8_5d |
---|
373 | module procedure mpp_recv_logical8 |
---|
374 | module procedure mpp_recv_logical8_scalar |
---|
375 | module procedure mpp_recv_logical8_2d |
---|
376 | module procedure mpp_recv_logical8_3d |
---|
377 | module procedure mpp_recv_logical8_4d |
---|
378 | module procedure mpp_recv_logical8_5d |
---|
379 | #endif |
---|
380 | #ifndef no_4byte_reals |
---|
381 | module procedure mpp_recv_real4 |
---|
382 | module procedure mpp_recv_real4_scalar |
---|
383 | module procedure mpp_recv_real4_2d |
---|
384 | module procedure mpp_recv_real4_3d |
---|
385 | module procedure mpp_recv_real4_4d |
---|
386 | module procedure mpp_recv_real4_5d |
---|
387 | #endif |
---|
388 | #ifndef no_4byte_cmplx |
---|
389 | module procedure mpp_recv_cmplx4 |
---|
390 | module procedure mpp_recv_cmplx4_scalar |
---|
391 | module procedure mpp_recv_cmplx4_2d |
---|
392 | module procedure mpp_recv_cmplx4_3d |
---|
393 | module procedure mpp_recv_cmplx4_4d |
---|
394 | module procedure mpp_recv_cmplx4_5d |
---|
395 | #endif |
---|
396 | module procedure mpp_recv_int4 |
---|
397 | module procedure mpp_recv_int4_scalar |
---|
398 | module procedure mpp_recv_int4_2d |
---|
399 | module procedure mpp_recv_int4_3d |
---|
400 | module procedure mpp_recv_int4_4d |
---|
401 | module procedure mpp_recv_int4_5d |
---|
402 | module procedure mpp_recv_logical4 |
---|
403 | module procedure mpp_recv_logical4_scalar |
---|
404 | module procedure mpp_recv_logical4_2d |
---|
405 | module procedure mpp_recv_logical4_3d |
---|
406 | module procedure mpp_recv_logical4_4d |
---|
407 | module procedure mpp_recv_logical4_5d |
---|
408 | end interface |
---|
409 | interface mpp_send |
---|
410 | module procedure mpp_send_real8 |
---|
411 | module procedure mpp_send_real8_scalar |
---|
412 | module procedure mpp_send_real8_2d |
---|
413 | module procedure mpp_send_real8_3d |
---|
414 | module procedure mpp_send_real8_4d |
---|
415 | module procedure mpp_send_real8_5d |
---|
416 | module procedure mpp_send_cmplx8 |
---|
417 | module procedure mpp_send_cmplx8_scalar |
---|
418 | module procedure mpp_send_cmplx8_2d |
---|
419 | module procedure mpp_send_cmplx8_3d |
---|
420 | module procedure mpp_send_cmplx8_4d |
---|
421 | module procedure mpp_send_cmplx8_5d |
---|
422 | #ifndef no_8byte_integers |
---|
423 | module procedure mpp_send_int8 |
---|
424 | module procedure mpp_send_int8_scalar |
---|
425 | module procedure mpp_send_int8_2d |
---|
426 | module procedure mpp_send_int8_3d |
---|
427 | module procedure mpp_send_int8_4d |
---|
428 | module procedure mpp_send_int8_5d |
---|
429 | module procedure mpp_send_logical8 |
---|
430 | module procedure mpp_send_logical8_scalar |
---|
431 | module procedure mpp_send_logical8_2d |
---|
432 | module procedure mpp_send_logical8_3d |
---|
433 | module procedure mpp_send_logical8_4d |
---|
434 | module procedure mpp_send_logical8_5d |
---|
435 | #endif |
---|
436 | #ifndef no_4byte_reals |
---|
437 | module procedure mpp_send_real4 |
---|
438 | module procedure mpp_send_real4_scalar |
---|
439 | module procedure mpp_send_real4_2d |
---|
440 | module procedure mpp_send_real4_3d |
---|
441 | module procedure mpp_send_real4_4d |
---|
442 | module procedure mpp_send_real4_5d |
---|
443 | #endif |
---|
444 | #ifndef no_4byte_cmplx |
---|
445 | module procedure mpp_send_cmplx4 |
---|
446 | module procedure mpp_send_cmplx4_scalar |
---|
447 | module procedure mpp_send_cmplx4_2d |
---|
448 | module procedure mpp_send_cmplx4_3d |
---|
449 | module procedure mpp_send_cmplx4_4d |
---|
450 | module procedure mpp_send_cmplx4_5d |
---|
451 | #endif |
---|
452 | module procedure mpp_send_int4 |
---|
453 | module procedure mpp_send_int4_scalar |
---|
454 | module procedure mpp_send_int4_2d |
---|
455 | module procedure mpp_send_int4_3d |
---|
456 | module procedure mpp_send_int4_4d |
---|
457 | module procedure mpp_send_int4_5d |
---|
458 | module procedure mpp_send_logical4 |
---|
459 | module procedure mpp_send_logical4_scalar |
---|
460 | module procedure mpp_send_logical4_2d |
---|
461 | module procedure mpp_send_logical4_3d |
---|
462 | module procedure mpp_send_logical4_4d |
---|
463 | module procedure mpp_send_logical4_5d |
---|
464 | end interface |
---|
465 | |
---|
466 | interface mpp_broadcast |
---|
467 | module procedure mpp_broadcast_real8 |
---|
468 | module procedure mpp_broadcast_real8_scalar |
---|
469 | module procedure mpp_broadcast_real8_2d |
---|
470 | module procedure mpp_broadcast_real8_3d |
---|
471 | module procedure mpp_broadcast_real8_4d |
---|
472 | module procedure mpp_broadcast_real8_5d |
---|
473 | module procedure mpp_broadcast_cmplx8 |
---|
474 | module procedure mpp_broadcast_cmplx8_scalar |
---|
475 | module procedure mpp_broadcast_cmplx8_2d |
---|
476 | module procedure mpp_broadcast_cmplx8_3d |
---|
477 | module procedure mpp_broadcast_cmplx8_4d |
---|
478 | module procedure mpp_broadcast_cmplx8_5d |
---|
479 | #ifndef no_8byte_integers |
---|
480 | module procedure mpp_broadcast_int8 |
---|
481 | module procedure mpp_broadcast_int8_scalar |
---|
482 | module procedure mpp_broadcast_int8_2d |
---|
483 | module procedure mpp_broadcast_int8_3d |
---|
484 | module procedure mpp_broadcast_int8_4d |
---|
485 | module procedure mpp_broadcast_int8_5d |
---|
486 | module procedure mpp_broadcast_logical8 |
---|
487 | module procedure mpp_broadcast_logical8_scalar |
---|
488 | module procedure mpp_broadcast_logical8_2d |
---|
489 | module procedure mpp_broadcast_logical8_3d |
---|
490 | module procedure mpp_broadcast_logical8_4d |
---|
491 | module procedure mpp_broadcast_logical8_5d |
---|
492 | #endif |
---|
493 | #ifndef no_4byte_reals |
---|
494 | module procedure mpp_broadcast_real4 |
---|
495 | module procedure mpp_broadcast_real4_scalar |
---|
496 | module procedure mpp_broadcast_real4_2d |
---|
497 | module procedure mpp_broadcast_real4_3d |
---|
498 | module procedure mpp_broadcast_real4_4d |
---|
499 | module procedure mpp_broadcast_real4_5d |
---|
500 | #endif |
---|
501 | #ifndef no_4byte_cmplx |
---|
502 | module procedure mpp_broadcast_cmplx4 |
---|
503 | module procedure mpp_broadcast_cmplx4_scalar |
---|
504 | module procedure mpp_broadcast_cmplx4_2d |
---|
505 | module procedure mpp_broadcast_cmplx4_3d |
---|
506 | module procedure mpp_broadcast_cmplx4_4d |
---|
507 | module procedure mpp_broadcast_cmplx4_5d |
---|
508 | #endif |
---|
509 | module procedure mpp_broadcast_int4 |
---|
510 | module procedure mpp_broadcast_int4_scalar |
---|
511 | module procedure mpp_broadcast_int4_2d |
---|
512 | module procedure mpp_broadcast_int4_3d |
---|
513 | module procedure mpp_broadcast_int4_4d |
---|
514 | module procedure mpp_broadcast_int4_5d |
---|
515 | module procedure mpp_broadcast_logical4 |
---|
516 | module procedure mpp_broadcast_logical4_scalar |
---|
517 | module procedure mpp_broadcast_logical4_2d |
---|
518 | module procedure mpp_broadcast_logical4_3d |
---|
519 | module procedure mpp_broadcast_logical4_4d |
---|
520 | module procedure mpp_broadcast_logical4_5d |
---|
521 | end interface |
---|
522 | |
---|
523 | interface mpp_chksum |
---|
524 | #ifndef no_8byte_integers |
---|
525 | module procedure mpp_chksum_i8_1d |
---|
526 | module procedure mpp_chksum_i8_2d |
---|
527 | module procedure mpp_chksum_i8_3d |
---|
528 | module procedure mpp_chksum_i8_4d |
---|
529 | #endif |
---|
530 | module procedure mpp_chksum_i4_1d |
---|
531 | module procedure mpp_chksum_i4_2d |
---|
532 | module procedure mpp_chksum_i4_3d |
---|
533 | module procedure mpp_chksum_i4_4d |
---|
534 | module procedure mpp_chksum_r8_0d |
---|
535 | module procedure mpp_chksum_r8_1d |
---|
536 | module procedure mpp_chksum_r8_2d |
---|
537 | module procedure mpp_chksum_r8_3d |
---|
538 | module procedure mpp_chksum_r8_4d |
---|
539 | module procedure mpp_chksum_r8_5d |
---|
540 | module procedure mpp_chksum_c8_0d |
---|
541 | module procedure mpp_chksum_c8_1d |
---|
542 | module procedure mpp_chksum_c8_2d |
---|
543 | module procedure mpp_chksum_c8_3d |
---|
544 | module procedure mpp_chksum_c8_4d |
---|
545 | module procedure mpp_chksum_c8_5d |
---|
546 | #ifndef no_4byte_reals |
---|
547 | module procedure mpp_chksum_r4_0d |
---|
548 | module procedure mpp_chksum_r4_1d |
---|
549 | module procedure mpp_chksum_r4_2d |
---|
550 | module procedure mpp_chksum_r4_3d |
---|
551 | module procedure mpp_chksum_r4_4d |
---|
552 | module procedure mpp_chksum_r4_5d |
---|
553 | #endif |
---|
554 | #ifndef no_4byte_cmplx |
---|
555 | module procedure mpp_chksum_c4_0d |
---|
556 | module procedure mpp_chksum_c4_1d |
---|
557 | module procedure mpp_chksum_c4_2d |
---|
558 | module procedure mpp_chksum_c4_3d |
---|
559 | module procedure mpp_chksum_c4_4d |
---|
560 | module procedure mpp_chksum_c4_5d |
---|
561 | #endif |
---|
562 | end interface |
---|
563 | |
---|
564 | interface mpp_error |
---|
565 | module procedure mpp_error_basic |
---|
566 | module procedure mpp_error_mesg |
---|
567 | module procedure mpp_error_noargs |
---|
568 | end interface |
---|
569 | |
---|
570 | #ifdef use_libSMA |
---|
571 | !currently SMA contains no generic shmem_wait for different integer kinds: |
---|
572 | !I have inserted one here |
---|
573 | interface shmem_integer_wait |
---|
574 | module procedure shmem_int4_wait_local |
---|
575 | module procedure shmem_int8_wait_local |
---|
576 | end interface |
---|
577 | #endif |
---|
578 | public :: mpp_chksum, mpp_max, mpp_min, mpp_sum |
---|
579 | public :: mpp_exit, mpp_init |
---|
580 | public :: mpp_pe, mpp_node, mpp_npes, mpp_root_pe, mpp_set_root_pe, mpp_set_stack_size |
---|
581 | public :: mpp_clock_begin, mpp_clock_end, mpp_clock_id, mpp_clock_set_grain |
---|
582 | public :: mpp_error, mpp_error_state, mpp_set_warn_level |
---|
583 | public :: mpp_sync, mpp_sync_self |
---|
584 | public :: mpp_transmit, mpp_send, mpp_recv, mpp_broadcast |
---|
585 | public :: stdin, stdout, stderr, stdlog |
---|
586 | public :: mpp_declare_pelist, mpp_get_current_pelist, mpp_set_current_pelist |
---|
587 | #ifdef use_shmalloc |
---|
588 | public :: mpp_malloc |
---|
589 | #endif |
---|
590 | |
---|
591 | contains |
---|
592 | |
---|
593 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
594 | ! ! |
---|
595 | ! ROUTINES TO INITIALIZE/FINALIZE MPP MODULE: mpp_init, mpp_exit ! |
---|
596 | ! ! |
---|
597 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
598 | |
---|
599 | subroutine mpp_init( flags,mpp_comm ,logfile) |
---|
600 | integer, optional, intent(in) :: flags,mpp_comm |
---|
601 | character(len=*), optional, intent(in) :: logfile |
---|
602 | ! subroutine mpp_init( flags, in, out, err, log ) |
---|
603 | ! integer, optional, intent(in) :: flags, in, out, err, log |
---|
604 | integer :: my_pe, num_pes, len |
---|
605 | integer :: i |
---|
606 | integer :: comm_intern |
---|
607 | logical :: opened |
---|
608 | #ifdef _CRAYT3E |
---|
609 | intrinsic my_pe |
---|
610 | #endif |
---|
611 | |
---|
612 | if( module_is_initialized )return |
---|
613 | |
---|
614 | #ifdef use_libSMA |
---|
615 | call START_PES(0) !the argument 0 means extract from environment variable NPES on PVP/SGI, from mpprun -n on t3e |
---|
616 | pe = my_pe() |
---|
617 | node = pe !on an SMP this should return node ID rather than PE number. |
---|
618 | npes = num_pes() |
---|
619 | #elif use_libMPI |
---|
620 | call MPI_INITIALIZED( opened, error ) !in case called from another MPI package |
---|
621 | if(PRESENT(mpp_comm)) then |
---|
622 | comm_intern=mpp_comm |
---|
623 | else |
---|
624 | comm_intern=MPI_COMM_WORLD |
---|
625 | endif |
---|
626 | ! |
---|
627 | if(present(logfile)) then |
---|
628 | etcfile=trim(logfile) |
---|
629 | configfile=trim(logfile) |
---|
630 | logfile_defined=.true. |
---|
631 | endif |
---|
632 | ! |
---|
633 | if( .NOT.opened )then |
---|
634 | call MPI_INIT(error) |
---|
635 | comm_intern=MPI_COMM_WORLD |
---|
636 | endif |
---|
637 | call MPI_COMM_RANK( comm_intern, pe, error ) |
---|
638 | call MPI_COMM_SIZE( comm_intern, npes, error ) |
---|
639 | allocate( request(0:npes-1) ) |
---|
640 | request(:) = MPI_REQUEST_NULL |
---|
641 | #endif |
---|
642 | module_is_initialized = .TRUE. |
---|
643 | |
---|
644 | !PEsets: make defaults illegal |
---|
645 | peset(:)%count = -1 |
---|
646 | #ifdef use_libSMA |
---|
647 | peset(:)%start = -1 |
---|
648 | peset(:)%log2stride = -1 |
---|
649 | #elif use_libMPI |
---|
650 | peset(:)%id = -1 |
---|
651 | peset(:)%group = -1 |
---|
652 | #endif |
---|
653 | !0=single-PE, initialized so that count returns 1 |
---|
654 | peset(0)%count = 1 |
---|
655 | allocate( peset(0)%list(1) ) |
---|
656 | peset(0)%list = pe |
---|
657 | #ifdef use_libMPI |
---|
658 | current_peset_num = 0 |
---|
659 | peset(0)%id = comm_intern |
---|
660 | call MPI_COMM_GROUP( peset(0)%id, peset(0)%group, error ) |
---|
661 | #endif |
---|
662 | world_peset_num = get_peset( (/(i,i=0,npes-1)/) ) |
---|
663 | current_peset_num = world_peset_num !initialize current PEset to world |
---|
664 | |
---|
665 | !initialize clocks |
---|
666 | call SYSTEM_CLOCK( count=tick0, count_rate=ticks_per_sec, count_max=max_ticks ) |
---|
667 | tick_rate = 1./ticks_per_sec |
---|
668 | clock0 = mpp_clock_id( 'Total runtime', flags=MPP_CLOCK_SYNC ) |
---|
669 | |
---|
670 | if( PRESENT(flags) )then |
---|
671 | debug = flags.EQ.MPP_DEBUG |
---|
672 | verbose = flags.EQ.MPP_VERBOSE .OR. debug |
---|
673 | end if |
---|
674 | |
---|
675 | #ifdef use_libSMA |
---|
676 | #ifdef use_shmalloc |
---|
677 | !we use shpalloc to ensure all these are remotely accessible |
---|
678 | len=0; ptr_sync = LOC(pe) !null initialization |
---|
679 | call mpp_malloc( ptr_sync, size(TRANSFER(sync,word)), len ) |
---|
680 | len=0; ptr_status = LOC(pe) !null initialization |
---|
681 | call mpp_malloc( ptr_status, npes*size(TRANSFER(status(0),word)), len ) |
---|
682 | len=0; ptr_remote = LOC(pe) !null initialization |
---|
683 | call mpp_malloc( ptr_remote, npes*size(TRANSFER(remote_data_loc(0),word)), len ) |
---|
684 | len=0; ptr_from = LOC(pe) !null initialization |
---|
685 | call mpp_malloc( ptr_from, size(TRANSFER(mpp_from_pe,word)), len ) |
---|
686 | #else |
---|
687 | allocate( status(0:npes-1) ) |
---|
688 | allocate( remote_data_loc(0:npes-1) ) |
---|
689 | #endif |
---|
690 | sync(:) = SHMEM_SYNC_VALUE |
---|
691 | status(0:npes-1) = MPP_READY |
---|
692 | remote_data_loc(0:npes-1) = MPP_WAIT |
---|
693 | call mpp_set_stack_size(32768) !default initial value |
---|
694 | #endif |
---|
695 | |
---|
696 | !logunit: log messages are written to configfile.out by default |
---|
697 | etc_unit=get_unit() |
---|
698 | ! write( etcfile,'(a,i4.4)' )trim(etcfile)//'.', pe |
---|
699 | !rv |
---|
700 | !rv Status 'REPLACE' eads to an unpredictable behaviour on SX-6 |
---|
701 | !rv if( pe.EQ.root_pe )open( unit=etc_unit, file=trim(etcfile), status='REPLACE' ) |
---|
702 | !rv |
---|
703 | if( pe.EQ.root_pe )then |
---|
704 | if(logfile_defined) then |
---|
705 | inquire( file=trim(etcfile), opened=opened,number=io_num) |
---|
706 | if(opened) then |
---|
707 | etc_unit=io_num |
---|
708 | else |
---|
709 | open( unit=etc_unit, file=trim(etcfile), status='UNKNOWN' ) |
---|
710 | close(etc_unit) |
---|
711 | endif |
---|
712 | else |
---|
713 | open( unit=etc_unit, file=trim(etcfile), status='UNKNOWN' ) |
---|
714 | close(etc_unit) |
---|
715 | endif |
---|
716 | endif |
---|
717 | |
---|
718 | call mpp_sync() |
---|
719 | !rv |
---|
720 | if(present(logfile)) then |
---|
721 | inquire( file=trim(etcfile), opened=opened,number=io_num) |
---|
722 | if(opened) then |
---|
723 | if( pe.NE.root_pe ) then |
---|
724 | etc_unit=io_num |
---|
725 | endif |
---|
726 | else |
---|
727 | if( pe.NE.root_pe ) then |
---|
728 | open( unit=etc_unit, file=trim(etcfile), status='UNKNOWN' ) |
---|
729 | close(etc_unit) |
---|
730 | endif |
---|
731 | endif |
---|
732 | else |
---|
733 | if( pe.NE.root_pe ) then |
---|
734 | open( unit=etc_unit, file=trim(etcfile), status='OLD' ) |
---|
735 | close(etc_unit) |
---|
736 | endif |
---|
737 | endif |
---|
738 | ! write(0,*)'etc_unit=',etc_unit |
---|
739 | !rv |
---|
740 | |
---|
741 | !if optional argument logunit=stdout, write messages to stdout instead. |
---|
742 | !if specifying non-defaults, you must specify units not yet in use. |
---|
743 | ! if( PRESENT(in) )then |
---|
744 | ! inquire( unit=in, opened=opened ) |
---|
745 | ! if( opened )call mpp_error( FATAL, 'MPP_INIT: unable to open stdin.' ) |
---|
746 | ! in_unit=in |
---|
747 | ! end if |
---|
748 | ! if( PRESENT(out) )then |
---|
749 | ! inquire( unit=out, opened=opened ) |
---|
750 | ! if( opened )call mpp_error( FATAL, 'MPP_INIT: unable to open stdout.' ) |
---|
751 | ! out_unit=out |
---|
752 | ! end if |
---|
753 | ! if( PRESENT(err) )then |
---|
754 | ! inquire( unit=err, opened=opened ) |
---|
755 | ! if( opened )call mpp_error( FATAL, 'MPP_INIT: unable to open stderr.' ) |
---|
756 | ! err_unit=err |
---|
757 | ! end if |
---|
758 | ! log_unit=get_unit() |
---|
759 | ! if( PRESENT(log) )then |
---|
760 | ! inquire( unit=log, opened=opened ) |
---|
761 | ! if( opened .AND. log.NE.out_unit )call mpp_error( FATAL, 'MPP_INIT: unable to open stdlog.' ) |
---|
762 | ! log_unit=log |
---|
763 | ! end if |
---|
764 | !!log_unit can be written to only from root_pe, all others write to stdout |
---|
765 | ! if( log_unit.NE.out_unit )then |
---|
766 | ! inquire( unit=log_unit, opened=opened ) |
---|
767 | ! if( opened )call mpp_error( FATAL, 'MPP_INIT: specified unit for stdlog already in use.' ) |
---|
768 | ! if( pe.EQ.root_pe )open( unit=log_unit, file=trim(configfile), status='REPLACE' ) |
---|
769 | ! call mpp_sync() |
---|
770 | ! if( pe.NE.root_pe )open( unit=log_unit, file=trim(configfile), status='OLD' ) |
---|
771 | ! end if |
---|
772 | if( pe.EQ.root_pe )then |
---|
773 | log_unit = get_unit() |
---|
774 | !rv Status 'REPLACE' leads to an unpredictable behaviour on the SX-6. |
---|
775 | !rv open( unit=log_unit, file=trim(configfile), status='REPLACE' ) |
---|
776 | if(logfile_defined) then |
---|
777 | inquire( file=trim(configfile), opened=opened,number=io_num) |
---|
778 | if(opened)then |
---|
779 | log_unit=io_num |
---|
780 | else |
---|
781 | open( unit=log_unit, file=trim(configfile), status='UNKNOWN' ) |
---|
782 | close(log_unit) |
---|
783 | endif |
---|
784 | else |
---|
785 | open( unit=log_unit, file=trim(configfile), status='UNKNOWN' ) |
---|
786 | close(log_unit) |
---|
787 | endif |
---|
788 | end if |
---|
789 | !messages |
---|
790 | if( verbose )call mpp_error( NOTE, 'MPP_INIT: initializing MPP module...' ) |
---|
791 | if( pe.EQ.root_pe )then |
---|
792 | write( stdlog(),'(/a)' )'MPP module '//trim(version)//trim(tagname) |
---|
793 | write( stdlog(),'(a,i4)' )'MPP started with NPES=', npes |
---|
794 | #ifdef use_libSMA |
---|
795 | write( stdlog(),'(a)' )'Using SMA (shmem) library for message passing...' |
---|
796 | #endif |
---|
797 | #ifdef use_libMPI |
---|
798 | write( stdlog(),'(a)' )'Using MPI library for message passing...' |
---|
799 | #endif |
---|
800 | write( stdlog(), '(a,es12.4,a,i20,a)' ) & |
---|
801 | 'Realtime clock resolution=', tick_rate, ' sec (', ticks_per_sec, ' ticks/sec)' |
---|
802 | write( stdlog(), '(a,es12.4,a,i20,a)' ) & |
---|
803 | 'Clock rolls over after ', max_ticks*tick_rate, ' sec (', max_ticks, ' ticks)' |
---|
804 | end if |
---|
805 | call mpp_clock_begin(clock0) |
---|
806 | |
---|
807 | return |
---|
808 | end subroutine mpp_init |
---|
809 | |
---|
810 | function stdin() |
---|
811 | integer :: stdin |
---|
812 | stdin = in_unit |
---|
813 | return |
---|
814 | end function stdin |
---|
815 | |
---|
816 | function stdout() |
---|
817 | integer :: stdout |
---|
818 | integer::tmp_unit |
---|
819 | logical::opened |
---|
820 | stdout = out_unit |
---|
821 | if( pe.NE.root_pe )stdout = etc_unit |
---|
822 | if(logfile_defined)then |
---|
823 | |
---|
824 | if( pe.EQ.root_pe )then |
---|
825 | inquire( file=trim(etcfile), opened=opened ,number=tmp_unit) |
---|
826 | if( opened )then |
---|
827 | |
---|
828 | out_unit=tmp_unit |
---|
829 | stdout=out_unit |
---|
830 | ! write(0,*) 'stdout:',tmp_unit,etcfile |
---|
831 | call FLUSH(out_unit) |
---|
832 | |
---|
833 | else |
---|
834 | tmp_unit=get_unit() |
---|
835 | ! write(0,*) 'stdout(not):',tmp_unit,etcfile |
---|
836 | !ac |
---|
837 | open( unit=tmp_unit, status='UNKNOWN', file=trim(etcfile), err=10 ) |
---|
838 | !ac |
---|
839 | out_unit=tmp_unit |
---|
840 | end if |
---|
841 | |
---|
842 | stdout = out_unit |
---|
843 | |
---|
844 | endif |
---|
845 | |
---|
846 | endif |
---|
847 | return |
---|
848 | 10 logfile_defined=.false. |
---|
849 | call mpp_error( FATAL, 'STDOUT: unable to open '//trim(etcfile)) |
---|
850 | end function stdout |
---|
851 | |
---|
852 | function stderr() |
---|
853 | integer :: stderr |
---|
854 | integer :: tmp_unit |
---|
855 | logical :: opened |
---|
856 | |
---|
857 | stderr = err_unit |
---|
858 | |
---|
859 | if(logfile_defined)then |
---|
860 | |
---|
861 | if( pe.EQ.root_pe )then |
---|
862 | inquire( file=trim(configfile), opened=opened ,number=tmp_unit) |
---|
863 | if( opened )then |
---|
864 | err_unit=tmp_unit |
---|
865 | stderr=err_unit |
---|
866 | call FLUSH(err_unit) |
---|
867 | ! write(0,*) 'stderr:',tmp_unit |
---|
868 | else |
---|
869 | ! write(0,*) 'stderr(not):',tmp_unit |
---|
870 | tmp_unit=get_unit() |
---|
871 | !ac |
---|
872 | open( unit=tmp_unit, status='UNKNOWN', file=trim(configfile), err=10 ) |
---|
873 | !ac |
---|
874 | err_unit=tmp_unit |
---|
875 | end if |
---|
876 | |
---|
877 | stderr = err_unit |
---|
878 | |
---|
879 | endif |
---|
880 | |
---|
881 | endif |
---|
882 | return |
---|
883 | |
---|
884 | 10 logfile_defined=.false. |
---|
885 | call mpp_error( FATAL, 'STDERR: unable to open '//trim(configfile)) |
---|
886 | |
---|
887 | end function stderr |
---|
888 | |
---|
889 | function stdlog() |
---|
890 | integer :: stdlog |
---|
891 | logical :: opened |
---|
892 | if( pe.EQ.root_pe )then |
---|
893 | inquire( file=trim(configfile), opened=opened ) |
---|
894 | if( opened )then |
---|
895 | call FLUSH(log_unit) |
---|
896 | else |
---|
897 | log_unit=get_unit() |
---|
898 | !ac |
---|
899 | open( unit=log_unit, status='UNKNOWN', file=trim(configfile), err=10 ) |
---|
900 | !ac |
---|
901 | end if |
---|
902 | stdlog = log_unit |
---|
903 | else |
---|
904 | stdlog = etc_unit |
---|
905 | end if |
---|
906 | return |
---|
907 | 10 call mpp_error( FATAL, 'STDLOG: unable to open '//trim(configfile)//'.' ) |
---|
908 | end function stdlog |
---|
909 | |
---|
910 | subroutine mpp_exit() |
---|
911 | !to be called at the end of a run |
---|
912 | integer :: i, j, k, n, nmax |
---|
913 | real :: t, tmin, tmax, tavg, tstd |
---|
914 | real :: m, mmin, mmax, mavg, mstd |
---|
915 | real :: t_total |
---|
916 | |
---|
917 | if( .NOT.module_is_initialized )return |
---|
918 | call mpp_clock_end(clock0) |
---|
919 | t_total = clocks(clock0)%total_ticks*tick_rate |
---|
920 | if( clock_num.GT.0 )then |
---|
921 | if( ANY(clocks(1:clock_num)%detailed) )then |
---|
922 | call sum_clock_data; call dump_clock_summary |
---|
923 | end if |
---|
924 | if( pe.EQ.root_pe )then |
---|
925 | write( stdout(),'(/a,i4,a)' ) 'Tabulating mpp_clock statistics across ', npes, ' PEs...' |
---|
926 | if( ANY(clocks(1:clock_num)%detailed) ) & |
---|
927 | write( stdout(),'(a)' )' ... see mpp_clock.out.#### for details on individual PEs.' |
---|
928 | write( stdout(),'(/32x,a)' ) ' tmin tmax tavg tstd tfrac' |
---|
929 | end if |
---|
930 | do i = 1,clock_num |
---|
931 | call mpp_set_current_pelist() !implied global barrier |
---|
932 | current_peset_num = clocks(i)%peset_num |
---|
933 | if( .NOT.ANY(peset(current_peset_num)%list(:).EQ.pe) )cycle |
---|
934 | !times between mpp_clock ticks |
---|
935 | t = clocks(i)%total_ticks*tick_rate |
---|
936 | tmin = t; call mpp_min(tmin) |
---|
937 | tmax = t; call mpp_max(tmax) |
---|
938 | tavg = t; call mpp_sum(tavg); tavg = tavg/mpp_npes() |
---|
939 | tstd = (t-tavg)**2; call mpp_sum(tstd); tstd = sqrt( tstd/mpp_npes() ) |
---|
940 | if( pe.EQ.root_pe )write( stdout(),'(a32,4f14.6,f7.3)' ) & |
---|
941 | clocks(i)%name, tmin, tmax, tavg, tstd, tavg/t_total |
---|
942 | end do |
---|
943 | if( ANY(clocks(1:clock_num)%detailed) .AND. pe.EQ.root_pe )write( stdout(),'(/32x,a)' ) & |
---|
944 | ' tmin tmax tavg tstd mmin mmax mavg mstd mavg/tavg' |
---|
945 | do i = 1,clock_num |
---|
946 | !messages: bytelengths and times |
---|
947 | if( .NOT.clocks(i)%detailed )cycle |
---|
948 | do j = 1,MAX_EVENT_TYPES |
---|
949 | n = clocks(i)%events(j)%calls; nmax = n |
---|
950 | call mpp_max(nmax) |
---|
951 | if( nmax.NE.0 )then |
---|
952 | !don't divide by n because n might be 0 |
---|
953 | m = 0 |
---|
954 | if( n.GT.0 )m = sum(clocks(i)%events(j)%bytes(1:n)) |
---|
955 | mmin = m; call mpp_min(mmin) |
---|
956 | mmax = m; call mpp_max(mmax) |
---|
957 | mavg = m; call mpp_sum(mavg); mavg = mavg/mpp_npes() |
---|
958 | mstd = (m-mavg)**2; call mpp_sum(mstd); mstd = sqrt( mstd/mpp_npes() ) |
---|
959 | t = 0 |
---|
960 | if( n.GT.0 )t = sum(clocks(i)%events(j)%ticks(1:n))*tick_rate |
---|
961 | tmin = t; call mpp_min(tmin) |
---|
962 | tmax = t; call mpp_max(tmax) |
---|
963 | tavg = t; call mpp_sum(tavg); tavg = tavg/mpp_npes() |
---|
964 | tstd = (t-tavg)**2; call mpp_sum(tstd); tstd = sqrt( tstd/mpp_npes() ) |
---|
965 | if( pe.EQ.root_pe )write( stdout(),'(a32,4f11.3,5es11.3)' ) & |
---|
966 | trim(clocks(i)%name)//' '//trim(clocks(i)%events(j)%name), & |
---|
967 | tmin, tmax, tavg, tstd, mmin, mmax, mavg, mstd, mavg/tavg |
---|
968 | end if |
---|
969 | end do |
---|
970 | end do |
---|
971 | end if |
---|
972 | call mpp_set_current_pelist() |
---|
973 | call mpp_sync() |
---|
974 | call mpp_max(mpp_stack_hwm) |
---|
975 | if( pe.EQ.root_pe )write( stdout(),* )'MPP_STACK high water mark=', mpp_stack_hwm |
---|
976 | #ifdef use_libMPI |
---|
977 | !reiner Let's do the MPI_finalize outside the mpp environment |
---|
978 | !reiner call MPI_FINALIZE(error) |
---|
979 | #endif |
---|
980 | |
---|
981 | return |
---|
982 | end subroutine mpp_exit |
---|
983 | |
---|
984 | function mpp_pe() |
---|
985 | integer :: mpp_pe |
---|
986 | |
---|
987 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_PE: You must first call mpp_init.' ) |
---|
988 | mpp_pe = pe |
---|
989 | return |
---|
990 | end function mpp_pe |
---|
991 | |
---|
992 | function mpp_node() |
---|
993 | integer :: mpp_node |
---|
994 | |
---|
995 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_NODE: You must first call mpp_init.' ) |
---|
996 | mpp_node = node |
---|
997 | return |
---|
998 | end function mpp_node |
---|
999 | |
---|
1000 | function mpp_npes() |
---|
1001 | integer :: mpp_npes |
---|
1002 | |
---|
1003 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_NPES: You must first call mpp_init.' ) |
---|
1004 | ! mpp_npes = npes |
---|
1005 | mpp_npes = size(peset(current_peset_num)%list) |
---|
1006 | return |
---|
1007 | end function mpp_npes |
---|
1008 | |
---|
1009 | function mpp_root_pe() |
---|
1010 | integer :: mpp_root_pe |
---|
1011 | |
---|
1012 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_ROOT_PE: You must first call mpp_init.' ) |
---|
1013 | mpp_root_pe = root_pe |
---|
1014 | return |
---|
1015 | end function mpp_root_pe |
---|
1016 | |
---|
1017 | subroutine mpp_set_root_pe(num) |
---|
1018 | integer, intent(in) :: num |
---|
1019 | logical :: opened |
---|
1020 | |
---|
1021 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_SET_ROOT_PE: You must first call mpp_init.' ) |
---|
1022 | if( .NOT.(ANY(num.EQ.peset(current_peset_num)%list)) ) & |
---|
1023 | call mpp_error( FATAL, 'MPP_SET_ROOT_PE: you cannot set a root PE outside the current pelist.' ) |
---|
1024 | !actions to take if root_pe has changed: |
---|
1025 | ! open log_unit on new root_pe, close it on old root_pe and point its log_unit to stdout. |
---|
1026 | ! if( num.NE.root_pe )then !root_pe has changed |
---|
1027 | ! if( pe.EQ.num )then |
---|
1028 | !!on the new root_pe |
---|
1029 | ! if( log_unit.NE.out_unit )then |
---|
1030 | ! inquire( unit=log_unit, opened=opened ) |
---|
1031 | ! if( .NOT.opened )open( unit=log_unit, status='OLD', file=trim(configfile), position='APPEND' ) |
---|
1032 | ! end if |
---|
1033 | ! else if( pe.EQ.root_pe )then |
---|
1034 | !!on the old root_pe |
---|
1035 | ! if( log_unit.NE.out_unit )then |
---|
1036 | ! inquire( unit=log_unit, opened=opened ) |
---|
1037 | ! if( opened )close(log_unit) |
---|
1038 | ! log_unit = out_unit |
---|
1039 | ! end if |
---|
1040 | ! end if |
---|
1041 | ! end if |
---|
1042 | root_pe = num |
---|
1043 | return |
---|
1044 | end subroutine mpp_set_root_pe |
---|
1045 | |
---|
1046 | subroutine mpp_declare_pelist( pelist, name ) |
---|
1047 | !this call is written specifically to accommodate a brain-dead MPI restriction |
---|
1048 | !that requires a parent communicator to create a child communicator: |
---|
1049 | !in other words: a pelist cannot go off and declare a communicator, but every PE |
---|
1050 | !in the parent, including those not in pelist(:), must get together for the |
---|
1051 | !MPI_COMM_CREATE call. The parent is typically peset(0)%id, though it could also |
---|
1052 | !be a subset that includes all PEs in pelist. |
---|
1053 | !This restriction does not apply to SMA but to have uniform code, |
---|
1054 | !you may as well call it. It must be placed in a context where all PEs call it. |
---|
1055 | !Subsequent calls that use the pelist should be called PEs in the pelist only. |
---|
1056 | integer, intent(in) :: pelist(:) |
---|
1057 | character(len=*), optional :: name |
---|
1058 | integer :: i |
---|
1059 | |
---|
1060 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_DECLARE_PELIST: You must first call mpp_init.' ) |
---|
1061 | i = get_peset(pelist) |
---|
1062 | write( peset(i)%name,'(a,i2.2)' ) 'PElist', i !default name |
---|
1063 | if( PRESENT(name) )peset(i)%name = name |
---|
1064 | return |
---|
1065 | end subroutine mpp_declare_pelist |
---|
1066 | |
---|
1067 | subroutine mpp_set_current_pelist( pelist ) |
---|
1068 | !Once we branch off into a PE subset, we want subsequent "global" calls to |
---|
1069 | !sync only across this subset. This is declared as the current pelist (peset(current_peset_num)%list) |
---|
1070 | !when current_peset all pelist ops with no pelist should apply the current pelist. |
---|
1071 | !also, we set the start PE in this pelist to be the root_pe. |
---|
1072 | !unlike mpp_declare_pelist, this is called by the PEs in the pelist only |
---|
1073 | !so if the PEset has not been previously declared, this will hang in MPI. |
---|
1074 | !if pelist is omitted, we reset pelist to the world pelist. |
---|
1075 | integer, intent(in), optional :: pelist(:) |
---|
1076 | integer :: i |
---|
1077 | |
---|
1078 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_SET_CURRENT_PELIST: You must first call mpp_init.' ) |
---|
1079 | if( PRESENT(pelist) )then |
---|
1080 | if( .NOT.ANY(pe.EQ.pelist) )call mpp_error( FATAL, 'MPP_SET_CURRENT_PELIST: pe must be in pelist.' ) |
---|
1081 | current_peset_num = get_peset(pelist) |
---|
1082 | else |
---|
1083 | current_peset_num = world_peset_num |
---|
1084 | end if |
---|
1085 | call mpp_set_root_pe( MINVAL(peset(current_peset_num)%list) ) |
---|
1086 | call mpp_sync() !this is called to make sure everyone in the current pelist is here. |
---|
1087 | ! npes = mpp_npes() |
---|
1088 | return |
---|
1089 | end subroutine mpp_set_current_pelist |
---|
1090 | |
---|
1091 | subroutine mpp_get_current_pelist( pelist, name ) |
---|
1092 | !this is created for use by mpp_define_domains within a pelist |
---|
1093 | !will be published but not publicized |
---|
1094 | integer, intent(out) :: pelist(:) |
---|
1095 | character(len=*), intent(out), optional :: name |
---|
1096 | |
---|
1097 | if( size(pelist).NE.size(peset(current_peset_num)%list) ) & |
---|
1098 | call mpp_error( FATAL, 'MPP_GET_CURRENT_PELIST: size(pelist) is wrong.' ) |
---|
1099 | pelist(:) = peset(current_peset_num)%list(:) |
---|
1100 | if( PRESENT(name) )name = peset(current_peset_num)%name |
---|
1101 | |
---|
1102 | return |
---|
1103 | end subroutine mpp_get_current_pelist |
---|
1104 | |
---|
1105 | function get_peset(pelist) |
---|
1106 | integer :: get_peset |
---|
1107 | !makes a PE set out of a PE list |
---|
1108 | !a PE list is an ordered list of PEs |
---|
1109 | !a PE set is a triad (start,log2stride,size) for SHMEM, an a communicator for MPI |
---|
1110 | !if stride is non-uniform or not a power of 2, will return error (not required for MPI but enforced for uniformity) |
---|
1111 | integer, intent(in), optional :: pelist(:) |
---|
1112 | integer :: group |
---|
1113 | integer :: i, n, stride |
---|
1114 | integer, allocatable :: sorted(:) |
---|
1115 | |
---|
1116 | if( .NOT.PRESENT(pelist) )then !set it to current_peset_num |
---|
1117 | get_peset = current_peset_num; return |
---|
1118 | end if |
---|
1119 | if( size(pelist).EQ.1 .AND. npes.GT.1 )then !collective ops on single PEs should return |
---|
1120 | get_peset = 0; return |
---|
1121 | end if |
---|
1122 | !make a sorted list |
---|
1123 | n = 1 |
---|
1124 | if( ascend_sort(pelist).NE.1 )call mpp_error( FATAL, 'GET_PESET: sort error.' ) !result is the array sorted(:) |
---|
1125 | if( debug )write( stderr(),* )'pelist=', pelist, ' sorted=', sorted |
---|
1126 | !find if this array matches any existing peset |
---|
1127 | do i = 1,peset_num |
---|
1128 | if( debug )write( stderr(),'(a,3i4)' )'pe, i, peset_num=', pe, i, peset_num |
---|
1129 | if( size(sorted).EQ.size(peset(i)%list) )then |
---|
1130 | if( ALL(sorted.EQ.peset(i)%list) )then |
---|
1131 | deallocate(sorted) |
---|
1132 | get_peset = i; return |
---|
1133 | end if |
---|
1134 | end if |
---|
1135 | end do |
---|
1136 | !not found, so create new peset |
---|
1137 | peset_num = peset_num + 1 |
---|
1138 | if( peset_num.GE.PESET_MAX )call mpp_error( FATAL, 'GET_PESET: number of PE sets exceeds PESET_MAX.' ) |
---|
1139 | i = peset_num !shorthand |
---|
1140 | !create list |
---|
1141 | allocate( peset(i)%list(size(sorted)) ) |
---|
1142 | peset(i)%list(:) = sorted(:) |
---|
1143 | peset(i)%count = size(sorted) |
---|
1144 | #ifdef use_libSMA |
---|
1145 | peset(i)%start = sorted(1) |
---|
1146 | if( size(sorted).GT.1 )then |
---|
1147 | stride = sorted(2)-sorted(1) |
---|
1148 | if( ANY(sorted(2:n)-sorted(1:n-1).NE.stride) ) & |
---|
1149 | call mpp_error( WARNING, 'GET_PESET: pelist must have constant stride.' ) |
---|
1150 | peset(i)%log2stride = nint( log(real(stride))/log(2.) ) |
---|
1151 | if( 2**peset(i)%log2stride.NE.stride )call mpp_error( WARNING, 'GET_PESET: pelist must have power-of-2 stride.' ) |
---|
1152 | else |
---|
1153 | peset(i)%log2stride = 0 |
---|
1154 | end if |
---|
1155 | #elif use_libMPI |
---|
1156 | call MPI_GROUP_INCL( peset(current_peset_num)%group, size(sorted), sorted, peset(i)%group, error ) |
---|
1157 | call MPI_COMM_CREATE( peset(current_peset_num)%id, peset(i)%group, peset(i)%id, error ) |
---|
1158 | #endif |
---|
1159 | deallocate(sorted) |
---|
1160 | get_peset = i |
---|
1161 | |
---|
1162 | return |
---|
1163 | |
---|
1164 | contains |
---|
1165 | |
---|
1166 | recursive function ascend_sort(a) result(a_sort) |
---|
1167 | integer :: a_sort |
---|
1168 | integer, intent(in) :: a(:) |
---|
1169 | integer :: b, i |
---|
1170 | if( size(a).EQ.1 .OR. ALL(a.EQ.a(1)) )then |
---|
1171 | allocate( sorted(n) ) |
---|
1172 | sorted(n) = a(1) |
---|
1173 | a_sort = n |
---|
1174 | return |
---|
1175 | end if |
---|
1176 | b = minval(a) |
---|
1177 | n = n + 1 |
---|
1178 | i = ascend_sort( pack(a,mask=a.NE.b) ) |
---|
1179 | a_sort = i - 1 |
---|
1180 | sorted(i-1) = b |
---|
1181 | return |
---|
1182 | end function ascend_sort |
---|
1183 | |
---|
1184 | end function get_peset |
---|
1185 | |
---|
1186 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
1187 | ! ! |
---|
1188 | ! PERFORMANCE PROFILING CALLS ! |
---|
1189 | ! ! |
---|
1190 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
1191 | subroutine mpp_clock_set_grain( grain ) |
---|
1192 | integer, intent(in) :: grain |
---|
1193 | !set the granularity of times: only clocks whose grain is lower than |
---|
1194 | !clock_grain are triggered, finer-grained clocks are dormant. |
---|
1195 | !clock_grain is initialized to HUGE(1), so all clocks are triggered if |
---|
1196 | !this is never called. |
---|
1197 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_CLOCK_SET_GRAIN: You must first call mpp_init.' ) |
---|
1198 | |
---|
1199 | clock_grain = grain |
---|
1200 | return |
---|
1201 | end subroutine mpp_clock_set_grain |
---|
1202 | |
---|
1203 | function mpp_clock_id( name, flags, grain ) |
---|
1204 | !return an ID for a new or existing clock |
---|
1205 | integer :: mpp_clock_id |
---|
1206 | character(len=*), intent(in) :: name |
---|
1207 | integer, intent(in), optional :: flags, grain |
---|
1208 | integer :: i |
---|
1209 | |
---|
1210 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_CLOCK_ID: You must first call mpp_init.' ) |
---|
1211 | !if grain is present, the clock is only triggered if it |
---|
1212 | !is low ("coarse") enough: compared to clock_grain |
---|
1213 | !finer-grained clocks are dormant. |
---|
1214 | !if grain is absent, clock is triggered. |
---|
1215 | if( PRESENT(grain) )then |
---|
1216 | if( grain.GT.clock_grain )then |
---|
1217 | mpp_clock_id = 0 |
---|
1218 | return |
---|
1219 | end if |
---|
1220 | end if |
---|
1221 | mpp_clock_id = 1 |
---|
1222 | if( clock_num.EQ.0 )then !first |
---|
1223 | ! allocate( clocks(MAX_CLOCKS) ) |
---|
1224 | clock_num = mpp_clock_id |
---|
1225 | call clock_init(mpp_clock_id,name,flags) |
---|
1226 | else |
---|
1227 | FIND_CLOCK: do while( trim(name).NE.trim(clocks(mpp_clock_id)%name) ) |
---|
1228 | mpp_clock_id = mpp_clock_id + 1 |
---|
1229 | if( mpp_clock_id.GT.clock_num )then |
---|
1230 | if( mpp_clock_id.GT.MAX_CLOCKS )then |
---|
1231 | call mpp_error( WARNING, 'MPP_CLOCK_ID: too many clock requests, this one is ignored.' ) |
---|
1232 | else !new clock: initialize |
---|
1233 | clock_num = mpp_clock_id |
---|
1234 | call clock_init(mpp_clock_id,name,flags) |
---|
1235 | exit FIND_CLOCK |
---|
1236 | end if |
---|
1237 | end if |
---|
1238 | end do FIND_CLOCK |
---|
1239 | endif |
---|
1240 | return |
---|
1241 | end function mpp_clock_id |
---|
1242 | |
---|
1243 | subroutine mpp_clock_begin(id) |
---|
1244 | integer, intent(in) :: id |
---|
1245 | |
---|
1246 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_CLOCK_BEGIN: You must first call mpp_init.' ) |
---|
1247 | if( id.EQ.0 )return |
---|
1248 | if( id.LT.0 .OR. id.GT.clock_num )call mpp_error( FATAL, 'MPP_CLOCK_BEGIN: invalid id.' ) |
---|
1249 | |
---|
1250 | if( clocks(id)%peset_num.EQ.0 )clocks(id)%peset_num = current_peset_num |
---|
1251 | if( clocks(id)%peset_num.NE.current_peset_num ) & |
---|
1252 | call mpp_error( FATAL, 'MPP_CLOCK_BEGIN: cannot change pelist context of a clock.' ) |
---|
1253 | if( clocks(id)%sync_on_begin )then |
---|
1254 | !do an untimed sync at the beginning of the clock |
---|
1255 | !this puts all PEs in the current pelist on par, so that measurements begin together |
---|
1256 | !ending time will be different, thus measuring load imbalance for this clock. |
---|
1257 | current_clock = 0; call mpp_sync() |
---|
1258 | end if |
---|
1259 | current_clock = id |
---|
1260 | call SYSTEM_CLOCK( clocks(id)%tick ) |
---|
1261 | return |
---|
1262 | end subroutine mpp_clock_begin |
---|
1263 | |
---|
1264 | subroutine mpp_clock_end(id) |
---|
1265 | !the id argument is not used for anything at present |
---|
1266 | integer, intent(in), optional :: id |
---|
1267 | integer(LONG_KIND) :: delta |
---|
1268 | |
---|
1269 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_CLOCK_END: You must first call mpp_init.' ) |
---|
1270 | if( id.EQ.0 )return |
---|
1271 | if( id.LT.0 .OR. id.GT.clock_num )call mpp_error( FATAL, 'MPP_CLOCK_BEGIN: invalid id.' ) |
---|
1272 | call SYSTEM_CLOCK(end_tick) |
---|
1273 | if( clocks(id)%peset_num.NE.current_peset_num ) & |
---|
1274 | call mpp_error( FATAL, 'MPP_CLOCK_END: cannot change pelist context of a clock.' ) |
---|
1275 | delta = end_tick - clocks(id)%tick |
---|
1276 | if( delta.LT.0 )then |
---|
1277 | write( stderr(),* )'pe, id, start_tick, end_tick, delta, max_ticks=', pe, id, clocks(id)%tick, end_tick, delta, max_ticks |
---|
1278 | delta = delta + max_ticks + 1 |
---|
1279 | call mpp_error( WARNING, 'MPP_CLOCK_END: Clock rollover, assumed single roll.' ) |
---|
1280 | end if |
---|
1281 | clocks(id)%total_ticks = clocks(id)%total_ticks + delta |
---|
1282 | current_clock = 0 |
---|
1283 | return |
---|
1284 | end subroutine mpp_clock_end |
---|
1285 | |
---|
1286 | subroutine increment_current_clock( event_id, bytes ) |
---|
1287 | integer, intent(in) :: event_id |
---|
1288 | integer, intent(in), optional :: bytes |
---|
1289 | integer :: n |
---|
1290 | integer(LONG_KIND) :: delta |
---|
1291 | |
---|
1292 | if( current_clock.EQ.0 )return |
---|
1293 | if( current_clock.LT.0 .OR. current_clock.GT.clock_num )call mpp_error( FATAL, 'MPP_CLOCK_BEGIN: invalid current_clock.' ) |
---|
1294 | if( .NOT.clocks(current_clock)%detailed )return |
---|
1295 | call SYSTEM_CLOCK(end_tick) |
---|
1296 | n = clocks(current_clock)%events(event_id)%calls + 1 |
---|
1297 | |
---|
1298 | if( n.EQ.MAX_EVENTS )call mpp_error( WARNING, & |
---|
1299 | 'MPP_CLOCK: events exceed MAX_EVENTS, ignore detailed profiling data for clock '//trim(clocks(current_clock)%name) ) |
---|
1300 | if( n.GT.MAX_EVENTS )return |
---|
1301 | |
---|
1302 | clocks(current_clock)%events(event_id)%calls = n |
---|
1303 | delta = end_tick - start_tick |
---|
1304 | if( delta.LT.0 )then |
---|
1305 | delta = delta + max_ticks + 1 |
---|
1306 | call mpp_error( WARNING, 'MPP_CLOCK_END: Clock rollover, assumed single roll.' ) |
---|
1307 | end if |
---|
1308 | clocks(current_clock)%events(event_id)%ticks(n) = delta |
---|
1309 | if( PRESENT(bytes) )clocks(current_clock)%events(event_id)%bytes(n) = bytes |
---|
1310 | return |
---|
1311 | end subroutine increment_current_clock |
---|
1312 | |
---|
1313 | subroutine dump_clock_summary() |
---|
1314 | implicit none |
---|
1315 | |
---|
1316 | real :: total_time,total_time_all,total_data |
---|
1317 | real :: msg_size,eff_BW,s |
---|
1318 | integer :: SD_UNIT |
---|
1319 | integer :: total_calls |
---|
1320 | integer :: i,j,k,ct |
---|
1321 | integer :: msg_cnt |
---|
1322 | character(len=2) :: u |
---|
1323 | character(len=18) :: filename |
---|
1324 | character(len=20),dimension(MAX_BINS),save :: bin |
---|
1325 | |
---|
1326 | data bin( 1) /' 0 - 8 B: '/ |
---|
1327 | data bin( 2) /' 8 - 16 B: '/ |
---|
1328 | data bin( 3) /' 16 - 32 B: '/ |
---|
1329 | data bin( 4) /' 32 - 64 B: '/ |
---|
1330 | data bin( 5) /' 64 - 128 B: '/ |
---|
1331 | data bin( 6) /'128 - 256 B: '/ |
---|
1332 | data bin( 7) /'256 - 512 B: '/ |
---|
1333 | data bin( 8) /'512 - 1024 B: '/ |
---|
1334 | data bin( 9) /' 1.0 - 2.1 KB: '/ |
---|
1335 | data bin(10) /' 2.1 - 4.1 KB: '/ |
---|
1336 | data bin(11) /' 4.1 - 8.2 KB: '/ |
---|
1337 | data bin(12) /' 8.2 - 16.4 KB: '/ |
---|
1338 | data bin(13) /' 16.4 - 32.8 KB: '/ |
---|
1339 | data bin(14) /' 32.8 - 65.5 KB: '/ |
---|
1340 | data bin(15) /' 65.5 - 131.1 KB: '/ |
---|
1341 | data bin(16) /'131.1 - 262.1 KB: '/ |
---|
1342 | data bin(17) /'262.1 - 524.3 KB: '/ |
---|
1343 | data bin(18) /'524.3 - 1048.6 KB: '/ |
---|
1344 | data bin(19) /' 1.0 - 2.1 MB: '/ |
---|
1345 | data bin(20) /' >2.1 MB: '/ |
---|
1346 | |
---|
1347 | if( .NOT.ANY(clocks(1:clock_num)%detailed) )return |
---|
1348 | write( filename,'(a,i4.4)' )'mpp_clock.out.', pe |
---|
1349 | |
---|
1350 | SD_UNIT = get_unit() |
---|
1351 | open(SD_UNIT,file=trim(filename),form='formatted') |
---|
1352 | |
---|
1353 | COMM_TYPE: do ct = 1,clock_num |
---|
1354 | |
---|
1355 | if( .NOT.clocks(ct)%detailed )cycle |
---|
1356 | write(SD_UNIT,*) & |
---|
1357 | clock_summary(ct)%name(1:15),' Communication Data for PE ',pe |
---|
1358 | |
---|
1359 | write(SD_UNIT,*) ' ' |
---|
1360 | write(SD_UNIT,*) ' ' |
---|
1361 | |
---|
1362 | total_time_all = 0.0 |
---|
1363 | EVENT_TYPE: do k = 1,MAX_EVENT_TYPES-1 |
---|
1364 | |
---|
1365 | if(clock_summary(ct)%event(k)%total_time == 0.0)cycle |
---|
1366 | |
---|
1367 | total_time = clock_summary(ct)%event(k)%total_time |
---|
1368 | total_time_all = total_time_all + total_time |
---|
1369 | total_data = clock_summary(ct)%event(k)%total_data |
---|
1370 | total_calls = clock_summary(ct)%event(k)%total_cnts |
---|
1371 | |
---|
1372 | write(SD_UNIT,1000) clock_summary(ct)%event(k)%name(1:9) // ':' |
---|
1373 | |
---|
1374 | write(SD_UNIT,1001) 'Total Data: ',total_data*1.0e-6, & |
---|
1375 | 'MB; Total Time: ', total_time, & |
---|
1376 | 'secs; Total Calls: ',total_calls |
---|
1377 | |
---|
1378 | write(SD_UNIT,*) ' ' |
---|
1379 | write(SD_UNIT,1002) ' Bin Counts Avg Size Eff B/W' |
---|
1380 | write(SD_UNIT,*) ' ' |
---|
1381 | |
---|
1382 | BIN_LOOP: do j=1,MAX_BINS |
---|
1383 | |
---|
1384 | if(clock_summary(ct)%event(k)%msg_size_cnts(j)==0)cycle |
---|
1385 | |
---|
1386 | if(j<=8)then |
---|
1387 | s = 1.0 |
---|
1388 | u = ' B' |
---|
1389 | elseif(j<=18)then |
---|
1390 | s = 1.0e-3 |
---|
1391 | u = 'KB' |
---|
1392 | else |
---|
1393 | s = 1.0e-6 |
---|
1394 | u = 'MB' |
---|
1395 | endif |
---|
1396 | |
---|
1397 | msg_cnt = clock_summary(ct)%event(k)%msg_size_cnts(j) |
---|
1398 | msg_size = & |
---|
1399 | s*(clock_summary(ct)%event(k)%msg_size_sums(j)/real(msg_cnt)) |
---|
1400 | eff_BW = (1.0e-6)*( clock_summary(ct)%event(k)%msg_size_sums(j) / & |
---|
1401 | clock_summary(ct)%event(k)%msg_time_sums(j) ) |
---|
1402 | |
---|
1403 | write(SD_UNIT,1003) bin(j),msg_cnt,msg_size,u,eff_BW |
---|
1404 | |
---|
1405 | end do BIN_LOOP |
---|
1406 | |
---|
1407 | write(SD_UNIT,*) ' ' |
---|
1408 | write(SD_UNIT,*) ' ' |
---|
1409 | end do EVENT_TYPE |
---|
1410 | |
---|
1411 | ! "Data-less" WAIT |
---|
1412 | |
---|
1413 | if(clock_summary(ct)%event(MAX_EVENT_TYPES)%total_time>0.0)then |
---|
1414 | |
---|
1415 | total_time = clock_summary(ct)%event(MAX_EVENT_TYPES)%total_time |
---|
1416 | total_time_all = total_time_all + total_time |
---|
1417 | total_calls = clock_summary(ct)%event(MAX_EVENT_TYPES)%total_cnts |
---|
1418 | |
---|
1419 | write(SD_UNIT,1000) clock_summary(ct)%event(MAX_EVENT_TYPES)%name(1:9) // ':' |
---|
1420 | |
---|
1421 | write(SD_UNIT,1004) 'Total Calls: ',total_calls,'; Total Time: ', & |
---|
1422 | total_time,'secs' |
---|
1423 | |
---|
1424 | endif |
---|
1425 | |
---|
1426 | write(SD_UNIT,*) ' ' |
---|
1427 | write(SD_UNIT,1005) 'Total communication time spent for ' // & |
---|
1428 | clock_summary(ct)%name(1:9) // ': ',total_time_all,'secs' |
---|
1429 | write(SD_UNIT,*) ' ' |
---|
1430 | write(SD_UNIT,*) ' ' |
---|
1431 | write(SD_UNIT,*) ' ' |
---|
1432 | |
---|
1433 | end do COMM_TYPE |
---|
1434 | |
---|
1435 | close(SD_UNIT) |
---|
1436 | |
---|
1437 | 1000 format(a) |
---|
1438 | 1001 format(a,f8.2,a,f8.2,a,i6) |
---|
1439 | 1002 format(a) |
---|
1440 | 1003 format(a,i6,' ',' ',f6.1,a,' ',f7.3,'MB/sec') |
---|
1441 | 1004 format(a,i8,a,f9.2,a) |
---|
1442 | 1005 format(a,f9.2,a) |
---|
1443 | return |
---|
1444 | end subroutine dump_clock_summary |
---|
1445 | |
---|
1446 | integer function get_unit() |
---|
1447 | implicit none |
---|
1448 | |
---|
1449 | integer,save :: i |
---|
1450 | logical :: l_open |
---|
1451 | |
---|
1452 | i = 10 |
---|
1453 | do i=10,99 |
---|
1454 | inquire(unit=i,opened=l_open) |
---|
1455 | if(.not.l_open)exit |
---|
1456 | end do |
---|
1457 | |
---|
1458 | if(i==100)then |
---|
1459 | call mpp_error(FATAL,'Unable to get I/O unit') |
---|
1460 | else |
---|
1461 | get_unit = i |
---|
1462 | endif |
---|
1463 | |
---|
1464 | return |
---|
1465 | end function get_unit |
---|
1466 | |
---|
1467 | subroutine sum_clock_data() |
---|
1468 | implicit none |
---|
1469 | |
---|
1470 | integer :: i,j,k,ct,event_size,event_cnt |
---|
1471 | real :: msg_time |
---|
1472 | |
---|
1473 | CLOCK_TYPE: do ct=1,clock_num |
---|
1474 | if( .NOT.clocks(ct)%detailed )cycle |
---|
1475 | EVENT_TYPE: do j=1,MAX_EVENT_TYPES-1 |
---|
1476 | event_cnt = clocks(ct)%events(j)%calls |
---|
1477 | EVENT_SUMMARY: do i=1,event_cnt |
---|
1478 | |
---|
1479 | clock_summary(ct)%event(j)%total_cnts = & |
---|
1480 | clock_summary(ct)%event(j)%total_cnts + 1 |
---|
1481 | |
---|
1482 | event_size = clocks(ct)%events(j)%bytes(i) |
---|
1483 | |
---|
1484 | k = find_bin(event_size) |
---|
1485 | |
---|
1486 | clock_summary(ct)%event(j)%msg_size_cnts(k) = & |
---|
1487 | clock_summary(ct)%event(j)%msg_size_cnts(k) + 1 |
---|
1488 | |
---|
1489 | clock_summary(ct)%event(j)%msg_size_sums(k) = & |
---|
1490 | clock_summary(ct)%event(j)%msg_size_sums(k) & |
---|
1491 | + clocks(ct)%events(j)%bytes(i) |
---|
1492 | |
---|
1493 | clock_summary(ct)%event(j)%total_data = & |
---|
1494 | clock_summary(ct)%event(j)%total_data & |
---|
1495 | + clocks(ct)%events(j)%bytes(i) |
---|
1496 | |
---|
1497 | msg_time = clocks(ct)%events(j)%ticks(i) |
---|
1498 | msg_time = tick_rate * real( clocks(ct)%events(j)%ticks(i) ) |
---|
1499 | |
---|
1500 | clock_summary(ct)%event(j)%msg_time_sums(k) = & |
---|
1501 | clock_summary(ct)%event(j)%msg_time_sums(k) + msg_time |
---|
1502 | |
---|
1503 | clock_summary(ct)%event(j)%total_time = & |
---|
1504 | clock_summary(ct)%event(j)%total_time + msg_time |
---|
1505 | |
---|
1506 | end do EVENT_SUMMARY |
---|
1507 | end do EVENT_TYPE |
---|
1508 | |
---|
1509 | j = MAX_EVENT_TYPES ! WAITs |
---|
1510 | ! "msg_size_cnts" doesn't really mean anything for WAIT |
---|
1511 | ! but position will be used to store number of counts for now. |
---|
1512 | |
---|
1513 | event_cnt = clocks(ct)%events(j)%calls |
---|
1514 | clock_summary(ct)%event(j)%msg_size_cnts(1) = event_cnt |
---|
1515 | clock_summary(ct)%event(j)%total_cnts = event_cnt |
---|
1516 | |
---|
1517 | msg_time = tick_rate * real( sum ( clocks(ct)%events(j)%ticks(1:event_cnt) ) ) |
---|
1518 | clock_summary(ct)%event(j)%msg_time_sums(1) = & |
---|
1519 | clock_summary(ct)%event(j)%msg_time_sums(1) + msg_time |
---|
1520 | |
---|
1521 | clock_summary(ct)%event(j)%total_time = clock_summary(ct)%event(j)%msg_time_sums(1) |
---|
1522 | |
---|
1523 | end do CLOCK_TYPE |
---|
1524 | |
---|
1525 | return |
---|
1526 | contains |
---|
1527 | integer function find_bin(event_size) |
---|
1528 | implicit none |
---|
1529 | |
---|
1530 | integer,intent(in) :: event_size |
---|
1531 | integer :: k,msg_size |
---|
1532 | |
---|
1533 | msg_size = 8 |
---|
1534 | k = 1 |
---|
1535 | do while(event_size>msg_size .and. k<MAX_BINS) |
---|
1536 | k = k+1 |
---|
1537 | msg_size = msg_size*2 |
---|
1538 | end do |
---|
1539 | find_bin = k |
---|
1540 | return |
---|
1541 | end function find_bin |
---|
1542 | |
---|
1543 | end subroutine sum_clock_data |
---|
1544 | |
---|
1545 | subroutine clock_init(id,name,flags) |
---|
1546 | integer, intent(in) :: id |
---|
1547 | character(len=*), intent(in) :: name |
---|
1548 | integer, intent(in), optional :: flags |
---|
1549 | integer :: i |
---|
1550 | |
---|
1551 | clocks(id)%name = name |
---|
1552 | clocks(id)%tick = 0 |
---|
1553 | clocks(id)%total_ticks = 0 |
---|
1554 | clocks(id)%sync_on_begin = .FALSE. |
---|
1555 | clocks(id)%detailed = .FALSE. |
---|
1556 | clocks(id)%peset_num = 0 |
---|
1557 | if( PRESENT(flags) )then |
---|
1558 | if( BTEST(flags,0) )clocks(id)%sync_on_begin = .TRUE. |
---|
1559 | if( BTEST(flags,1) )clocks(id)%detailed = .TRUE. |
---|
1560 | end if |
---|
1561 | if( clocks(id)%detailed )then |
---|
1562 | allocate( clocks(id)%events(MAX_EVENT_TYPES) ) |
---|
1563 | clocks(id)%events(EVENT_ALLREDUCE)%name = 'ALLREDUCE' |
---|
1564 | clocks(id)%events(EVENT_BROADCAST)%name = 'BROADCAST' |
---|
1565 | clocks(id)%events(EVENT_RECV)%name = 'RECV' |
---|
1566 | clocks(id)%events(EVENT_SEND)%name = 'SEND' |
---|
1567 | clocks(id)%events(EVENT_WAIT)%name = 'WAIT' |
---|
1568 | do i=1,MAX_EVENT_TYPES |
---|
1569 | clocks(id)%events(i)%ticks(:) = 0 |
---|
1570 | clocks(id)%events(i)%bytes(:) = 0 |
---|
1571 | clocks(id)%events(i)%calls = 0 |
---|
1572 | end do |
---|
1573 | clock_summary(id)%name = name |
---|
1574 | clock_summary(id)%event(EVENT_ALLREDUCE)%name = 'ALLREDUCE' |
---|
1575 | clock_summary(id)%event(EVENT_BROADCAST)%name = 'BROADCAST' |
---|
1576 | clock_summary(id)%event(EVENT_RECV)%name = 'RECV' |
---|
1577 | clock_summary(id)%event(EVENT_SEND)%name = 'SEND' |
---|
1578 | clock_summary(id)%event(EVENT_WAIT)%name = 'WAIT' |
---|
1579 | do i=1,MAX_EVENT_TYPES |
---|
1580 | clock_summary(id)%event(i)%msg_size_sums(:) = 0.0 |
---|
1581 | clock_summary(id)%event(i)%msg_time_sums(:) = 0.0 |
---|
1582 | clock_summary(id)%event(i)%total_data = 0.0 |
---|
1583 | clock_summary(id)%event(i)%total_time = 0.0 |
---|
1584 | clock_summary(id)%event(i)%msg_size_cnts(:) = 0 |
---|
1585 | clock_summary(id)%event(i)%total_cnts = 0 |
---|
1586 | end do |
---|
1587 | end if |
---|
1588 | return |
---|
1589 | end subroutine clock_init |
---|
1590 | |
---|
1591 | #ifdef __sgi |
---|
1592 | subroutine system_clock_sgi( count, count_rate, count_max ) |
---|
1593 | !mimics F90 SYSTEM_CLOCK intrinsic |
---|
1594 | integer(LONG_KIND), intent(out), optional :: count, count_rate, count_max |
---|
1595 | integer(LONG_KIND) :: sgi_tick, sgi_ticks_per_sec, sgi_max_tick |
---|
1596 | !sgi_max_tick currently returns 64 |
---|
1597 | !count must return a number between 0 and count_max |
---|
1598 | integer(LONG_KIND), save :: maxtick=0 |
---|
1599 | if( maxtick.EQ.0 )then |
---|
1600 | maxtick = sgi_max_tick() !actually reports #bits in maxtick |
---|
1601 | if( maxtick.LT.BIT_SIZE(maxtick) )then |
---|
1602 | maxtick = 2**maxtick |
---|
1603 | else |
---|
1604 | maxtick = huge(maxtick) |
---|
1605 | end if |
---|
1606 | end if |
---|
1607 | if( PRESENT(count) )then |
---|
1608 | count = modulo( sgi_tick()-tick0, maxtick ) |
---|
1609 | ! count = sgi_tick() |
---|
1610 | end if |
---|
1611 | if( PRESENT(count_rate) )then |
---|
1612 | count_rate = sgi_ticks_per_sec() |
---|
1613 | end if |
---|
1614 | if( PRESENT(count_max) )then |
---|
1615 | count_max = maxtick-1 |
---|
1616 | end if |
---|
1617 | return |
---|
1618 | end subroutine system_clock_sgi |
---|
1619 | #endif |
---|
1620 | |
---|
1621 | #ifdef use_libMPI |
---|
1622 | subroutine system_clock_mpi( count, count_rate, count_max ) |
---|
1623 | !mimics F90 SYSTEM_CLOCK intrinsic |
---|
1624 | integer(LONG_KIND), intent(out), optional :: count, count_rate, count_max |
---|
1625 | !count must return a number between 0 and count_max |
---|
1626 | integer(LONG_KIND), parameter :: maxtick=HUGE(count_max) |
---|
1627 | logical, save :: first_call = .true. |
---|
1628 | real(DOUBLE_KIND), save :: count0 ! use to prevent integer overflow |
---|
1629 | if ( first_call ) count0 = MPI_WTime(); first_call = .false. |
---|
1630 | if( PRESENT(count) )then |
---|
1631 | count = (MPI_WTime()-count0)/MPI_WTick() |
---|
1632 | end if |
---|
1633 | if( PRESENT(count_rate) )then |
---|
1634 | count_rate = MPI_Wtick()**(-1) |
---|
1635 | end if |
---|
1636 | if( PRESENT(count_max) )then |
---|
1637 | count_max = maxtick-1 |
---|
1638 | end if |
---|
1639 | return |
---|
1640 | end subroutine system_clock_mpi |
---|
1641 | #endif |
---|
1642 | |
---|
1643 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
1644 | ! ! |
---|
1645 | ! BASIC MESSAGE PASSING ROUTINE: mpp_transmit ! |
---|
1646 | ! ! |
---|
1647 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
1648 | |
---|
1649 | #define MPP_TRANSMIT_ mpp_transmit_real8 |
---|
1650 | #define MPP_TRANSMIT_SCALAR_ mpp_transmit_real8_scalar |
---|
1651 | #define MPP_TRANSMIT_2D_ mpp_transmit_real8_2d |
---|
1652 | #define MPP_TRANSMIT_3D_ mpp_transmit_real8_3d |
---|
1653 | #define MPP_TRANSMIT_4D_ mpp_transmit_real8_4d |
---|
1654 | #define MPP_TRANSMIT_5D_ mpp_transmit_real8_5d |
---|
1655 | #define MPP_RECV_ mpp_recv_real8 |
---|
1656 | #define MPP_RECV_SCALAR_ mpp_recv_real8_scalar |
---|
1657 | #define MPP_RECV_2D_ mpp_recv_real8_2d |
---|
1658 | #define MPP_RECV_3D_ mpp_recv_real8_3d |
---|
1659 | #define MPP_RECV_4D_ mpp_recv_real8_4d |
---|
1660 | #define MPP_RECV_5D_ mpp_recv_real8_5d |
---|
1661 | #define MPP_SEND_ mpp_send_real8 |
---|
1662 | #define MPP_SEND_SCALAR_ mpp_send_real8_scalar |
---|
1663 | #define MPP_SEND_2D_ mpp_send_real8_2d |
---|
1664 | #define MPP_SEND_3D_ mpp_send_real8_3d |
---|
1665 | #define MPP_SEND_4D_ mpp_send_real8_4d |
---|
1666 | #define MPP_SEND_5D_ mpp_send_real8_5d |
---|
1667 | #define MPP_BROADCAST_ mpp_broadcast_real8 |
---|
1668 | #define MPP_BROADCAST_SCALAR_ mpp_broadcast_real8_scalar |
---|
1669 | #define MPP_BROADCAST_2D_ mpp_broadcast_real8_2d |
---|
1670 | #define MPP_BROADCAST_3D_ mpp_broadcast_real8_3d |
---|
1671 | #define MPP_BROADCAST_4D_ mpp_broadcast_real8_4d |
---|
1672 | #define MPP_BROADCAST_5D_ mpp_broadcast_real8_5d |
---|
1673 | #define MPP_TYPE_ real(DOUBLE_KIND) |
---|
1674 | #define MPP_TYPE_BYTELEN_ 8 |
---|
1675 | #ifdef use_LAM_MPI |
---|
1676 | #define MPI_TYPE_ MPI_DOUBLE_PRECISION |
---|
1677 | #else |
---|
1678 | #define MPI_TYPE_ MPI_REAL8 |
---|
1679 | #endif |
---|
1680 | #define SHMEM_BROADCAST_ SHMEM_BROADCAST8 |
---|
1681 | #define SHMEM_GET_ SHMEM_GET8 |
---|
1682 | #include <mpp_transmit.h> |
---|
1683 | |
---|
1684 | #ifndef no_4byte_reals |
---|
1685 | #define MPP_TRANSMIT_ mpp_transmit_real4 |
---|
1686 | #define MPP_TRANSMIT_SCALAR_ mpp_transmit_real4_scalar |
---|
1687 | #define MPP_TRANSMIT_2D_ mpp_transmit_real4_2d |
---|
1688 | #define MPP_TRANSMIT_3D_ mpp_transmit_real4_3d |
---|
1689 | #define MPP_TRANSMIT_4D_ mpp_transmit_real4_4d |
---|
1690 | #define MPP_TRANSMIT_5D_ mpp_transmit_real4_5d |
---|
1691 | #define MPP_RECV_ mpp_recv_real4 |
---|
1692 | #define MPP_RECV_SCALAR_ mpp_recv_real4_scalar |
---|
1693 | #define MPP_RECV_2D_ mpp_recv_real4_2d |
---|
1694 | #define MPP_RECV_3D_ mpp_recv_real4_3d |
---|
1695 | #define MPP_RECV_4D_ mpp_recv_real4_4d |
---|
1696 | #define MPP_RECV_5D_ mpp_recv_real4_5d |
---|
1697 | #define MPP_SEND_ mpp_send_real4 |
---|
1698 | #define MPP_SEND_SCALAR_ mpp_send_real4_scalar |
---|
1699 | #define MPP_SEND_2D_ mpp_send_real4_2d |
---|
1700 | #define MPP_SEND_3D_ mpp_send_real4_3d |
---|
1701 | #define MPP_SEND_4D_ mpp_send_real4_4d |
---|
1702 | #define MPP_SEND_5D_ mpp_send_real4_5d |
---|
1703 | #define MPP_BROADCAST_ mpp_broadcast_real4 |
---|
1704 | #define MPP_BROADCAST_SCALAR_ mpp_broadcast_real4_scalar |
---|
1705 | #define MPP_BROADCAST_2D_ mpp_broadcast_real4_2d |
---|
1706 | #define MPP_BROADCAST_3D_ mpp_broadcast_real4_3d |
---|
1707 | #define MPP_BROADCAST_4D_ mpp_broadcast_real4_4d |
---|
1708 | #define MPP_BROADCAST_5D_ mpp_broadcast_real4_5d |
---|
1709 | #define MPP_TYPE_ real(FLOAT_KIND) |
---|
1710 | #define MPP_TYPE_BYTELEN_ 4 |
---|
1711 | #ifdef use_LAM_MPI |
---|
1712 | #define MPI_TYPE_ MPI_REAL |
---|
1713 | #else |
---|
1714 | #define MPI_TYPE_ MPI_REAL4 |
---|
1715 | #endif |
---|
1716 | #define SHMEM_BROADCAST_ SHMEM_BROADCAST4 |
---|
1717 | #define SHMEM_GET_ SHMEM_GET4 |
---|
1718 | #include <mpp_transmit.h> |
---|
1719 | #endif |
---|
1720 | |
---|
1721 | #define MPP_TRANSMIT_ mpp_transmit_cmplx8 |
---|
1722 | #define MPP_TRANSMIT_SCALAR_ mpp_transmit_cmplx8_scalar |
---|
1723 | #define MPP_TRANSMIT_2D_ mpp_transmit_cmplx8_2d |
---|
1724 | #define MPP_TRANSMIT_3D_ mpp_transmit_cmplx8_3d |
---|
1725 | #define MPP_TRANSMIT_4D_ mpp_transmit_cmplx8_4d |
---|
1726 | #define MPP_TRANSMIT_5D_ mpp_transmit_cmplx8_5d |
---|
1727 | #define MPP_RECV_ mpp_recv_cmplx8 |
---|
1728 | #define MPP_RECV_SCALAR_ mpp_recv_cmplx8_scalar |
---|
1729 | #define MPP_RECV_2D_ mpp_recv_cmplx8_2d |
---|
1730 | #define MPP_RECV_3D_ mpp_recv_cmplx8_3d |
---|
1731 | #define MPP_RECV_4D_ mpp_recv_cmplx8_4d |
---|
1732 | #define MPP_RECV_5D_ mpp_recv_cmplx8_5d |
---|
1733 | #define MPP_SEND_ mpp_send_cmplx8 |
---|
1734 | #define MPP_SEND_SCALAR_ mpp_send_cmplx8_scalar |
---|
1735 | #define MPP_SEND_2D_ mpp_send_cmplx8_2d |
---|
1736 | #define MPP_SEND_3D_ mpp_send_cmplx8_3d |
---|
1737 | #define MPP_SEND_4D_ mpp_send_cmplx8_4d |
---|
1738 | #define MPP_SEND_5D_ mpp_send_cmplx8_5d |
---|
1739 | #define MPP_BROADCAST_ mpp_broadcast_cmplx8 |
---|
1740 | #define MPP_BROADCAST_SCALAR_ mpp_broadcast_cmplx8_scalar |
---|
1741 | #define MPP_BROADCAST_2D_ mpp_broadcast_cmplx8_2d |
---|
1742 | #define MPP_BROADCAST_3D_ mpp_broadcast_cmplx8_3d |
---|
1743 | #define MPP_BROADCAST_4D_ mpp_broadcast_cmplx8_4d |
---|
1744 | #define MPP_BROADCAST_5D_ mpp_broadcast_cmplx8_5d |
---|
1745 | #define MPP_TYPE_ complex(DOUBLE_KIND) |
---|
1746 | #define MPP_TYPE_BYTELEN_ 16 |
---|
1747 | #define MPI_TYPE_ MPI_DOUBLE_COMPLEX |
---|
1748 | #define SHMEM_BROADCAST_ SHMEM_BROADCAST8 |
---|
1749 | #define SHMEM_GET_ SHMEM_GET128 |
---|
1750 | #include <mpp_transmit.h> |
---|
1751 | |
---|
1752 | #ifndef no_4byte_cmplx |
---|
1753 | #define MPP_TRANSMIT_ mpp_transmit_cmplx4 |
---|
1754 | #define MPP_TRANSMIT_SCALAR_ mpp_transmit_cmplx4_scalar |
---|
1755 | #define MPP_TRANSMIT_2D_ mpp_transmit_cmplx4_2d |
---|
1756 | #define MPP_TRANSMIT_3D_ mpp_transmit_cmplx4_3d |
---|
1757 | #define MPP_TRANSMIT_4D_ mpp_transmit_cmplx4_4d |
---|
1758 | #define MPP_TRANSMIT_5D_ mpp_transmit_cmplx4_5d |
---|
1759 | #define MPP_RECV_ mpp_recv_cmplx4 |
---|
1760 | #define MPP_RECV_SCALAR_ mpp_recv_cmplx4_scalar |
---|
1761 | #define MPP_RECV_2D_ mpp_recv_cmplx4_2d |
---|
1762 | #define MPP_RECV_3D_ mpp_recv_cmplx4_3d |
---|
1763 | #define MPP_RECV_4D_ mpp_recv_cmplx4_4d |
---|
1764 | #define MPP_RECV_5D_ mpp_recv_cmplx4_5d |
---|
1765 | #define MPP_SEND_ mpp_send_cmplx4 |
---|
1766 | #define MPP_SEND_SCALAR_ mpp_send_cmplx4_scalar |
---|
1767 | #define MPP_SEND_2D_ mpp_send_cmplx4_2d |
---|
1768 | #define MPP_SEND_3D_ mpp_send_cmplx4_3d |
---|
1769 | #define MPP_SEND_4D_ mpp_send_cmplx4_4d |
---|
1770 | #define MPP_SEND_5D_ mpp_send_cmplx4_5d |
---|
1771 | #define MPP_BROADCAST_ mpp_broadcast_cmplx4 |
---|
1772 | #define MPP_BROADCAST_SCALAR_ mpp_broadcast_cmplx4_scalar |
---|
1773 | #define MPP_BROADCAST_2D_ mpp_broadcast_cmplx4_2d |
---|
1774 | #define MPP_BROADCAST_3D_ mpp_broadcast_cmplx4_3d |
---|
1775 | #define MPP_BROADCAST_4D_ mpp_broadcast_cmplx4_4d |
---|
1776 | #define MPP_BROADCAST_5D_ mpp_broadcast_cmplx4_5d |
---|
1777 | #define MPP_TYPE_ complex(FLOAT_KIND) |
---|
1778 | #define MPP_TYPE_BYTELEN_ 8 |
---|
1779 | #define MPI_TYPE_ MPI_COMPLEX |
---|
1780 | #define SHMEM_BROADCAST_ SHMEM_BROADCAST4 |
---|
1781 | #define SHMEM_GET_ SHMEM_GET64 |
---|
1782 | #include <mpp_transmit.h> |
---|
1783 | #endif |
---|
1784 | |
---|
1785 | #ifndef no_8byte_integers |
---|
1786 | #define MPP_TRANSMIT_ mpp_transmit_int8 |
---|
1787 | #define MPP_TRANSMIT_SCALAR_ mpp_transmit_int8_scalar |
---|
1788 | #define MPP_TRANSMIT_2D_ mpp_transmit_int8_2d |
---|
1789 | #define MPP_TRANSMIT_3D_ mpp_transmit_int8_3d |
---|
1790 | #define MPP_TRANSMIT_4D_ mpp_transmit_int8_4d |
---|
1791 | #define MPP_TRANSMIT_5D_ mpp_transmit_int8_5d |
---|
1792 | #define MPP_RECV_ mpp_recv_int8 |
---|
1793 | #define MPP_RECV_SCALAR_ mpp_recv_int8_scalar |
---|
1794 | #define MPP_RECV_2D_ mpp_recv_int8_2d |
---|
1795 | #define MPP_RECV_3D_ mpp_recv_int8_3d |
---|
1796 | #define MPP_RECV_4D_ mpp_recv_int8_4d |
---|
1797 | #define MPP_RECV_5D_ mpp_recv_int8_5d |
---|
1798 | #define MPP_SEND_ mpp_send_int8 |
---|
1799 | #define MPP_SEND_SCALAR_ mpp_send_int8_scalar |
---|
1800 | #define MPP_SEND_2D_ mpp_send_int8_2d |
---|
1801 | #define MPP_SEND_3D_ mpp_send_int8_3d |
---|
1802 | #define MPP_SEND_4D_ mpp_send_int8_4d |
---|
1803 | #define MPP_SEND_5D_ mpp_send_int8_5d |
---|
1804 | #define MPP_BROADCAST_ mpp_broadcast_int8 |
---|
1805 | #define MPP_BROADCAST_SCALAR_ mpp_broadcast_int8_scalar |
---|
1806 | #define MPP_BROADCAST_2D_ mpp_broadcast_int8_2d |
---|
1807 | #define MPP_BROADCAST_3D_ mpp_broadcast_int8_3d |
---|
1808 | #define MPP_BROADCAST_4D_ mpp_broadcast_int8_4d |
---|
1809 | #define MPP_BROADCAST_5D_ mpp_broadcast_int8_5d |
---|
1810 | #define MPP_TYPE_ integer(LONG_KIND) |
---|
1811 | #define MPP_TYPE_BYTELEN_ 8 |
---|
1812 | #ifdef use_LAM_MPI |
---|
1813 | #define MPI_TYPE_ MPI_INTEGER |
---|
1814 | #else |
---|
1815 | #define MPI_TYPE_ MPI_INTEGER8 |
---|
1816 | #endif |
---|
1817 | #define SHMEM_BROADCAST_ SHMEM_BROADCAST8 |
---|
1818 | #define SHMEM_GET_ SHMEM_GET8 |
---|
1819 | #include <mpp_transmit.h> |
---|
1820 | #endif |
---|
1821 | |
---|
1822 | #define MPP_TRANSMIT_ mpp_transmit_int4 |
---|
1823 | #define MPP_TRANSMIT_SCALAR_ mpp_transmit_int4_scalar |
---|
1824 | #define MPP_TRANSMIT_2D_ mpp_transmit_int4_2d |
---|
1825 | #define MPP_TRANSMIT_3D_ mpp_transmit_int4_3d |
---|
1826 | #define MPP_TRANSMIT_4D_ mpp_transmit_int4_4d |
---|
1827 | #define MPP_TRANSMIT_5D_ mpp_transmit_int4_5d |
---|
1828 | #define MPP_RECV_ mpp_recv_int4 |
---|
1829 | #define MPP_RECV_SCALAR_ mpp_recv_int4_scalar |
---|
1830 | #define MPP_RECV_2D_ mpp_recv_int4_2d |
---|
1831 | #define MPP_RECV_3D_ mpp_recv_int4_3d |
---|
1832 | #define MPP_RECV_4D_ mpp_recv_int4_4d |
---|
1833 | #define MPP_RECV_5D_ mpp_recv_int4_5d |
---|
1834 | #define MPP_SEND_ mpp_send_int4 |
---|
1835 | #define MPP_SEND_SCALAR_ mpp_send_int4_scalar |
---|
1836 | #define MPP_SEND_2D_ mpp_send_int4_2d |
---|
1837 | #define MPP_SEND_3D_ mpp_send_int4_3d |
---|
1838 | #define MPP_SEND_4D_ mpp_send_int4_4d |
---|
1839 | #define MPP_SEND_5D_ mpp_send_int4_5d |
---|
1840 | #define MPP_BROADCAST_ mpp_broadcast_int4 |
---|
1841 | #define MPP_BROADCAST_SCALAR_ mpp_broadcast_int4_scalar |
---|
1842 | #define MPP_BROADCAST_2D_ mpp_broadcast_int4_2d |
---|
1843 | #define MPP_BROADCAST_3D_ mpp_broadcast_int4_3d |
---|
1844 | #define MPP_BROADCAST_4D_ mpp_broadcast_int4_4d |
---|
1845 | #define MPP_BROADCAST_5D_ mpp_broadcast_int4_5d |
---|
1846 | #define MPP_TYPE_ integer(INT_KIND) |
---|
1847 | #define MPP_TYPE_BYTELEN_ 4 |
---|
1848 | #ifdef use_LAM_MPI |
---|
1849 | #define MPI_TYPE_ MPI_INTEGER |
---|
1850 | #else |
---|
1851 | #define MPI_TYPE_ MPI_INTEGER4 |
---|
1852 | #endif |
---|
1853 | #define SHMEM_BROADCAST_ SHMEM_BROADCAST4 |
---|
1854 | #define SHMEM_GET_ SHMEM_GET4 |
---|
1855 | #include <mpp_transmit.h> |
---|
1856 | |
---|
1857 | #ifndef no_8byte_integers |
---|
1858 | #define MPP_TRANSMIT_ mpp_transmit_logical8 |
---|
1859 | #define MPP_TRANSMIT_SCALAR_ mpp_transmit_logical8_scalar |
---|
1860 | #define MPP_TRANSMIT_2D_ mpp_transmit_logical8_2d |
---|
1861 | #define MPP_TRANSMIT_3D_ mpp_transmit_logical8_3d |
---|
1862 | #define MPP_TRANSMIT_4D_ mpp_transmit_logical8_4d |
---|
1863 | #define MPP_TRANSMIT_5D_ mpp_transmit_logical8_5d |
---|
1864 | #define MPP_RECV_ mpp_recv_logical8 |
---|
1865 | #define MPP_RECV_SCALAR_ mpp_recv_logical8_scalar |
---|
1866 | #define MPP_RECV_2D_ mpp_recv_logical8_2d |
---|
1867 | #define MPP_RECV_3D_ mpp_recv_logical8_3d |
---|
1868 | #define MPP_RECV_4D_ mpp_recv_logical8_4d |
---|
1869 | #define MPP_RECV_5D_ mpp_recv_logical8_5d |
---|
1870 | #define MPP_SEND_ mpp_send_logical8 |
---|
1871 | #define MPP_SEND_SCALAR_ mpp_send_logical8_scalar |
---|
1872 | #define MPP_SEND_2D_ mpp_send_logical8_2d |
---|
1873 | #define MPP_SEND_3D_ mpp_send_logical8_3d |
---|
1874 | #define MPP_SEND_4D_ mpp_send_logical8_4d |
---|
1875 | #define MPP_SEND_5D_ mpp_send_logical8_5d |
---|
1876 | #define MPP_BROADCAST_ mpp_broadcast_logical8 |
---|
1877 | #define MPP_BROADCAST_SCALAR_ mpp_broadcast_logical8_scalar |
---|
1878 | #define MPP_BROADCAST_2D_ mpp_broadcast_logical8_2d |
---|
1879 | #define MPP_BROADCAST_3D_ mpp_broadcast_logical8_3d |
---|
1880 | #define MPP_BROADCAST_4D_ mpp_broadcast_logical8_4d |
---|
1881 | #define MPP_BROADCAST_5D_ mpp_broadcast_logical8_5d |
---|
1882 | #define MPP_TYPE_ logical(LONG_KIND) |
---|
1883 | #define MPP_TYPE_BYTELEN_ 8 |
---|
1884 | #ifdef use_LAM_MPI |
---|
1885 | #define MPI_TYPE_ MPI_INTEGER |
---|
1886 | #else |
---|
1887 | #define MPI_TYPE_ MPI_INTEGER8 |
---|
1888 | #endif |
---|
1889 | #define SHMEM_BROADCAST_ SHMEM_BROADCAST8 |
---|
1890 | #define SHMEM_GET_ SHMEM_GET8 |
---|
1891 | #include <mpp_transmit.h> |
---|
1892 | #endif |
---|
1893 | |
---|
1894 | #define MPP_TRANSMIT_ mpp_transmit_logical4 |
---|
1895 | #define MPP_TRANSMIT_SCALAR_ mpp_transmit_logical4_scalar |
---|
1896 | #define MPP_TRANSMIT_2D_ mpp_transmit_logical4_2d |
---|
1897 | #define MPP_TRANSMIT_3D_ mpp_transmit_logical4_3d |
---|
1898 | #define MPP_TRANSMIT_4D_ mpp_transmit_logical4_4d |
---|
1899 | #define MPP_TRANSMIT_5D_ mpp_transmit_logical4_5d |
---|
1900 | #define MPP_RECV_ mpp_recv_logical4 |
---|
1901 | #define MPP_RECV_SCALAR_ mpp_recv_logical4_scalar |
---|
1902 | #define MPP_RECV_2D_ mpp_recv_logical4_2d |
---|
1903 | #define MPP_RECV_3D_ mpp_recv_logical4_3d |
---|
1904 | #define MPP_RECV_4D_ mpp_recv_logical4_4d |
---|
1905 | #define MPP_RECV_5D_ mpp_recv_logical4_5d |
---|
1906 | #define MPP_SEND_ mpp_send_logical4 |
---|
1907 | #define MPP_SEND_SCALAR_ mpp_send_logical4_scalar |
---|
1908 | #define MPP_SEND_2D_ mpp_send_logical4_2d |
---|
1909 | #define MPP_SEND_3D_ mpp_send_logical4_3d |
---|
1910 | #define MPP_SEND_4D_ mpp_send_logical4_4d |
---|
1911 | #define MPP_SEND_5D_ mpp_send_logical4_5d |
---|
1912 | #define MPP_BROADCAST_ mpp_broadcast_logical4 |
---|
1913 | #define MPP_BROADCAST_SCALAR_ mpp_broadcast_logical4_scalar |
---|
1914 | #define MPP_BROADCAST_2D_ mpp_broadcast_logical4_2d |
---|
1915 | #define MPP_BROADCAST_3D_ mpp_broadcast_logical4_3d |
---|
1916 | #define MPP_BROADCAST_4D_ mpp_broadcast_logical4_4d |
---|
1917 | #define MPP_BROADCAST_5D_ mpp_broadcast_logical4_5d |
---|
1918 | #define MPP_TYPE_ logical(INT_KIND) |
---|
1919 | #define MPP_TYPE_BYTELEN_ 4 |
---|
1920 | #ifdef use_LAM_MPI |
---|
1921 | #define MPI_TYPE_ MPI_INTEGER |
---|
1922 | #else |
---|
1923 | #define MPI_TYPE_ MPI_INTEGER4 |
---|
1924 | #endif |
---|
1925 | #define SHMEM_BROADCAST_ SHMEM_BROADCAST4 |
---|
1926 | #define SHMEM_GET_ SHMEM_GET4 |
---|
1927 | #include <mpp_transmit.h> |
---|
1928 | |
---|
1929 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
1930 | ! ! |
---|
1931 | ! GLOBAL REDUCTION ROUTINES: mpp_max, mpp_sum, mpp_min ! |
---|
1932 | ! ! |
---|
1933 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
1934 | |
---|
1935 | #define MPP_REDUCE_ mpp_max_real8 |
---|
1936 | #define MPP_TYPE_ real(DOUBLE_KIND) |
---|
1937 | #define SHMEM_REDUCE_ SHMEM_REAL8_MAX_TO_ALL |
---|
1938 | #ifdef use_LAM_MPI |
---|
1939 | #define MPI_TYPE_ MPI_DOUBLE_PRECISION |
---|
1940 | #else |
---|
1941 | #define MPI_TYPE_ MPI_REAL8 |
---|
1942 | #endif |
---|
1943 | #define MPI_REDUCE_ MPI_MAX |
---|
1944 | #include <mpp_reduce.h> |
---|
1945 | |
---|
1946 | #define MPP_REDUCE_ mpp_max_real4 |
---|
1947 | #define MPP_TYPE_ real(FLOAT_KIND) |
---|
1948 | #define SHMEM_REDUCE_ SHMEM_REAL4_MAX_TO_ALL |
---|
1949 | #ifdef use_LAM_MPI |
---|
1950 | #define MPI_TYPE_ MPI_REAL |
---|
1951 | #else |
---|
1952 | #define MPI_TYPE_ MPI_REAL4 |
---|
1953 | #endif |
---|
1954 | #define MPI_REDUCE_ MPI_MAX |
---|
1955 | #include <mpp_reduce.h> |
---|
1956 | |
---|
1957 | #ifndef no_8byte_integers |
---|
1958 | #define MPP_REDUCE_ mpp_max_int8 |
---|
1959 | #define MPP_TYPE_ integer(LONG_KIND) |
---|
1960 | #define SHMEM_REDUCE_ SHMEM_INT8_MAX_TO_ALL |
---|
1961 | #ifdef use_LAM_MPI |
---|
1962 | #define MPI_TYPE_ MPI_INTEGER |
---|
1963 | #else |
---|
1964 | #define MPI_TYPE_ MPI_INTEGER8 |
---|
1965 | #endif |
---|
1966 | #define MPI_REDUCE_ MPI_MAX |
---|
1967 | #include <mpp_reduce.h> |
---|
1968 | #endif |
---|
1969 | |
---|
1970 | #define MPP_REDUCE_ mpp_max_int4 |
---|
1971 | #define MPP_TYPE_ integer(INT_KIND) |
---|
1972 | #define SHMEM_REDUCE_ SHMEM_INT4_MAX_TO_ALL |
---|
1973 | #ifdef use_LAM_MPI |
---|
1974 | #define MPI_TYPE_ MPI_INTEGER |
---|
1975 | #else |
---|
1976 | #define MPI_TYPE_ MPI_INTEGER4 |
---|
1977 | #endif |
---|
1978 | #define MPI_REDUCE_ MPI_MAX |
---|
1979 | #include <mpp_reduce.h> |
---|
1980 | |
---|
1981 | #define MPP_REDUCE_ mpp_min_real8 |
---|
1982 | #define MPP_TYPE_ real(DOUBLE_KIND) |
---|
1983 | #define SHMEM_REDUCE_ SHMEM_REAL8_MIN_TO_ALL |
---|
1984 | #ifdef use_LAM_MPI |
---|
1985 | #define MPI_TYPE_ MPI_DOUBLE_PRECISION |
---|
1986 | #else |
---|
1987 | #define MPI_TYPE_ MPI_REAL8 |
---|
1988 | #endif |
---|
1989 | #define MPI_REDUCE_ MPI_MIN |
---|
1990 | #include <mpp_reduce.h> |
---|
1991 | |
---|
1992 | #ifndef no_4byte_reals |
---|
1993 | #define MPP_REDUCE_ mpp_min_real4 |
---|
1994 | #define MPP_TYPE_ real(FLOAT_KIND) |
---|
1995 | #define SHMEM_REDUCE_ SHMEM_REAL4_MIN_TO_ALL |
---|
1996 | #ifdef use_LAM_MPI |
---|
1997 | #define MPI_TYPE_ MPI_REAL |
---|
1998 | #else |
---|
1999 | #define MPI_TYPE_ MPI_REAL4 |
---|
2000 | #endif |
---|
2001 | #define MPI_REDUCE_ MPI_MIN |
---|
2002 | #include <mpp_reduce.h> |
---|
2003 | #endif |
---|
2004 | |
---|
2005 | #ifndef no_8byte_integers |
---|
2006 | #define MPP_REDUCE_ mpp_min_int8 |
---|
2007 | #define MPP_TYPE_ integer(LONG_KIND) |
---|
2008 | #define SHMEM_REDUCE_ SHMEM_INT8_MIN_TO_ALL |
---|
2009 | #ifdef use_LAM_MPI |
---|
2010 | #define MPI_TYPE_ MPI_INTEGER |
---|
2011 | #else |
---|
2012 | #define MPI_TYPE_ MPI_INTEGER8 |
---|
2013 | #endif |
---|
2014 | #define MPI_REDUCE_ MPI_MIN |
---|
2015 | #include <mpp_reduce.h> |
---|
2016 | #endif |
---|
2017 | |
---|
2018 | #define MPP_REDUCE_ mpp_min_int4 |
---|
2019 | #define MPP_TYPE_ integer(INT_KIND) |
---|
2020 | #define SHMEM_REDUCE_ SHMEM_INT4_MIN_TO_ALL |
---|
2021 | #ifdef use_LAM_MPI |
---|
2022 | #define MPI_TYPE_ MPI_INTEGER |
---|
2023 | #else |
---|
2024 | #define MPI_TYPE_ MPI_INTEGER4 |
---|
2025 | #endif |
---|
2026 | #define MPI_REDUCE_ MPI_MIN |
---|
2027 | #include <mpp_reduce.h> |
---|
2028 | |
---|
2029 | #define MPP_SUM_ mpp_sum_real8 |
---|
2030 | #define MPP_SUM_SCALAR_ mpp_sum_real8_scalar |
---|
2031 | #define MPP_SUM_2D_ mpp_sum_real8_2d |
---|
2032 | #define MPP_SUM_3D_ mpp_sum_real8_3d |
---|
2033 | #define MPP_SUM_4D_ mpp_sum_real8_4d |
---|
2034 | #define MPP_SUM_5D_ mpp_sum_real8_5d |
---|
2035 | #define MPP_TYPE_ real(DOUBLE_KIND) |
---|
2036 | #define SHMEM_SUM_ SHMEM_REAL8_SUM_TO_ALL |
---|
2037 | #ifdef use_LAM_MPI |
---|
2038 | #define MPI_TYPE_ MPI_DOUBLE_PRECISION |
---|
2039 | #else |
---|
2040 | #define MPI_TYPE_ MPI_REAL8 |
---|
2041 | #endif |
---|
2042 | #define MPP_TYPE_BYTELEN_ 8 |
---|
2043 | #include <mpp_sum.h> |
---|
2044 | |
---|
2045 | #ifndef no_4byte_reals |
---|
2046 | #define MPP_SUM_ mpp_sum_real4 |
---|
2047 | #define MPP_SUM_SCALAR_ mpp_sum_real4_scalar |
---|
2048 | #define MPP_SUM_2D_ mpp_sum_real4_2d |
---|
2049 | #define MPP_SUM_3D_ mpp_sum_real4_3d |
---|
2050 | #define MPP_SUM_4D_ mpp_sum_real4_4d |
---|
2051 | #define MPP_SUM_5D_ mpp_sum_real4_5d |
---|
2052 | #define MPP_TYPE_ real(FLOAT_KIND) |
---|
2053 | #define SHMEM_SUM_ SHMEM_REAL4_SUM_TO_ALL |
---|
2054 | #ifdef use_LAM_MPI |
---|
2055 | #define MPI_TYPE_ MPI_REAL |
---|
2056 | #else |
---|
2057 | #define MPI_TYPE_ MPI_REAL4 |
---|
2058 | #endif |
---|
2059 | #define MPP_TYPE_BYTELEN_ 4 |
---|
2060 | #include <mpp_sum.h> |
---|
2061 | #endif |
---|
2062 | |
---|
2063 | #define MPP_SUM_ mpp_sum_cmplx8 |
---|
2064 | #define MPP_SUM_SCALAR_ mpp_sum_cmplx8_scalar |
---|
2065 | #define MPP_SUM_2D_ mpp_sum_cmplx8_2d |
---|
2066 | #define MPP_SUM_3D_ mpp_sum_cmplx8_3d |
---|
2067 | #define MPP_SUM_4D_ mpp_sum_cmplx8_4d |
---|
2068 | #define MPP_SUM_5D_ mpp_sum_cmplx8_5d |
---|
2069 | #define MPP_TYPE_ complex(DOUBLE_KIND) |
---|
2070 | #define SHMEM_SUM_ SHMEM_COMP8_SUM_TO_ALL |
---|
2071 | #define MPI_TYPE_ MPI_DOUBLE_COMPLEX |
---|
2072 | #define MPP_TYPE_BYTELEN_ 16 |
---|
2073 | #include <mpp_sum.h> |
---|
2074 | |
---|
2075 | #ifndef no_4byte_cmplx |
---|
2076 | #define MPP_SUM_ mpp_sum_cmplx4 |
---|
2077 | #define MPP_SUM_SCALAR_ mpp_sum_cmplx4_scalar |
---|
2078 | #define MPP_SUM_2D_ mpp_sum_cmplx4_2d |
---|
2079 | #define MPP_SUM_3D_ mpp_sum_cmplx4_3d |
---|
2080 | #define MPP_SUM_4D_ mpp_sum_cmplx4_4d |
---|
2081 | #define MPP_SUM_5D_ mpp_sum_cmplx4_5d |
---|
2082 | #define MPP_TYPE_ complex(FLOAT_KIND) |
---|
2083 | #define SHMEM_SUM_ SHMEM_COMP4_SUM_TO_ALL |
---|
2084 | #define MPI_TYPE_ MPI_COMPLEX |
---|
2085 | #define MPP_TYPE_BYTELEN_ 8 |
---|
2086 | #include <mpp_sum.h> |
---|
2087 | #endif |
---|
2088 | |
---|
2089 | #ifndef no_8byte_integers |
---|
2090 | #define MPP_SUM_ mpp_sum_int8 |
---|
2091 | #define MPP_SUM_SCALAR_ mpp_sum_int8_scalar |
---|
2092 | #define MPP_SUM_2D_ mpp_sum_int8_2d |
---|
2093 | #define MPP_SUM_3D_ mpp_sum_int8_3d |
---|
2094 | #define MPP_SUM_4D_ mpp_sum_int8_4d |
---|
2095 | #define MPP_SUM_5D_ mpp_sum_int8_5d |
---|
2096 | #define MPP_TYPE_ integer(LONG_KIND) |
---|
2097 | #define SHMEM_SUM_ SHMEM_INT8_SUM_TO_ALL |
---|
2098 | #ifdef use_LAM_MPI |
---|
2099 | #define MPI_TYPE_ MPI_INTEGER |
---|
2100 | #else |
---|
2101 | #define MPI_TYPE_ MPI_INTEGER8 |
---|
2102 | #endif |
---|
2103 | #define MPP_TYPE_BYTELEN_ 8 |
---|
2104 | #include <mpp_sum.h> |
---|
2105 | #endif |
---|
2106 | |
---|
2107 | #define MPP_SUM_ mpp_sum_int4 |
---|
2108 | #define MPP_SUM_SCALAR_ mpp_sum_int4_scalar |
---|
2109 | #define MPP_SUM_2D_ mpp_sum_int4_2d |
---|
2110 | #define MPP_SUM_3D_ mpp_sum_int4_3d |
---|
2111 | #define MPP_SUM_4D_ mpp_sum_int4_4d |
---|
2112 | #define MPP_SUM_5D_ mpp_sum_int4_5d |
---|
2113 | #define MPP_TYPE_ integer(INT_KIND) |
---|
2114 | #define SHMEM_SUM_ SHMEM_INT4_SUM_TO_ALL |
---|
2115 | #ifdef use_LAM_MPI |
---|
2116 | #define MPI_TYPE_ MPI_INTEGER |
---|
2117 | #else |
---|
2118 | #define MPI_TYPE_ MPI_INTEGER4 |
---|
2119 | #endif |
---|
2120 | #define MPP_TYPE_BYTELEN_ 4 |
---|
2121 | #include <mpp_sum.h> |
---|
2122 | |
---|
2123 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
2124 | ! ! |
---|
2125 | ! SYNCHRONIZATION ROUTINES: mpp_sync, mpp_sync_self ! |
---|
2126 | ! ! |
---|
2127 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
2128 | |
---|
2129 | subroutine mpp_sync( pelist ) |
---|
2130 | !synchronize PEs in list |
---|
2131 | integer, intent(in), optional :: pelist(:) |
---|
2132 | integer :: n |
---|
2133 | |
---|
2134 | call mpp_sync_self(pelist) |
---|
2135 | |
---|
2136 | n = get_peset(pelist); if( peset(n)%count.EQ.1 )return |
---|
2137 | |
---|
2138 | if( current_clock.NE.0 )call SYSTEM_CLOCK(start_tick) |
---|
2139 | #ifdef use_libSMA |
---|
2140 | if( n.EQ.world_peset_num )then |
---|
2141 | call SHMEM_BARRIER_ALL() !special call is faster |
---|
2142 | else |
---|
2143 | call SHMEM_BARRIER( peset(n)%start, peset(n)%log2stride, peset(n)%count, sync ) |
---|
2144 | end if |
---|
2145 | #endif |
---|
2146 | #ifdef use_libMPI |
---|
2147 | call MPI_BARRIER( peset(n)%id, error ) |
---|
2148 | #endif |
---|
2149 | if( current_clock.NE.0 )call increment_current_clock(EVENT_WAIT) |
---|
2150 | |
---|
2151 | return |
---|
2152 | end subroutine mpp_sync |
---|
2153 | |
---|
2154 | subroutine mpp_sync_self( pelist ) |
---|
2155 | !this is to check if current PE's outstanding puts are complete |
---|
2156 | !but we can't use shmem_fence because we are actually waiting for |
---|
2157 | !a remote PE to complete its get |
---|
2158 | integer, intent(in), optional :: pelist(:) |
---|
2159 | integer :: i, m, n, stride |
---|
2160 | |
---|
2161 | n = get_peset(pelist); if( peset(n)%count.EQ.1 )return |
---|
2162 | |
---|
2163 | if( current_clock.NE.0 )call SYSTEM_CLOCK(start_tick) |
---|
2164 | #ifdef use_libSMA |
---|
2165 | #ifdef _CRAYT90 |
---|
2166 | call SHMEM_UDCFLUSH !invalidate data cache |
---|
2167 | #endif |
---|
2168 | #endif |
---|
2169 | do m = 1,peset(n)%count |
---|
2170 | i = peset(n)%list(m) |
---|
2171 | #ifdef use_libSMA |
---|
2172 | call SHMEM_INT8_WAIT( status(i), MPP_WAIT ) !wait for status.NE.MPP_WAIT |
---|
2173 | #endif |
---|
2174 | #ifdef use_libMPI |
---|
2175 | if( request(i).NE.MPI_REQUEST_NULL )call MPI_WAIT( request(i), stat, error ) |
---|
2176 | #endif |
---|
2177 | end do |
---|
2178 | if( current_clock.NE.0 )call increment_current_clock(EVENT_WAIT) |
---|
2179 | return |
---|
2180 | end subroutine mpp_sync_self |
---|
2181 | |
---|
2182 | #ifdef use_libSMA |
---|
2183 | !these local versions are written for grouping into shmem_integer_wait |
---|
2184 | subroutine shmem_int4_wait_local( ivar, cmp_value ) |
---|
2185 | !dir$ INLINEALWAYS shmem_int4_wait_local |
---|
2186 | integer(INT_KIND), intent(in) :: cmp_value |
---|
2187 | integer(INT_KIND), intent(inout) :: ivar |
---|
2188 | call SHMEM_INT4_WAIT( ivar, cmp_value ) |
---|
2189 | return |
---|
2190 | end subroutine shmem_int4_wait_local |
---|
2191 | subroutine shmem_int8_wait_local( ivar, cmp_value ) |
---|
2192 | !dir$ INLINEALWAYS shmem_int8_wait_local |
---|
2193 | integer(LONG_KIND), intent(in) :: cmp_value |
---|
2194 | integer(LONG_KIND), intent(inout) :: ivar |
---|
2195 | call SHMEM_INT8_WAIT( ivar, cmp_value ) |
---|
2196 | return |
---|
2197 | end subroutine shmem_int8_wait_local |
---|
2198 | #endif |
---|
2199 | |
---|
2200 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
2201 | ! ! |
---|
2202 | ! MISCELLANEOUS UTILITIES: mpp_error, mpp_chksum, mpp_malloc ! |
---|
2203 | ! ! |
---|
2204 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
---|
2205 | |
---|
2206 | subroutine mpp_error_basic( errortype, errormsg ) |
---|
2207 | !a very basic error handler |
---|
2208 | !uses ABORT and FLUSH calls, may need to use cpp to rename |
---|
2209 | integer, intent(in) :: errortype |
---|
2210 | character(len=*), intent(in), optional :: errormsg |
---|
2211 | character(len=128) :: text |
---|
2212 | logical :: opened |
---|
2213 | |
---|
2214 | if( .NOT.module_is_initialized )call ABORT() |
---|
2215 | |
---|
2216 | select case( errortype ) |
---|
2217 | case(NOTE) |
---|
2218 | text = 'NOTE' !just FYI |
---|
2219 | case(WARNING) |
---|
2220 | text = 'WARNING' !probable error |
---|
2221 | case(FATAL) |
---|
2222 | text = 'FATAL' !fatal error |
---|
2223 | case default |
---|
2224 | text = 'WARNING: non-existent errortype (must be NOTE|WARNING|FATAL)' |
---|
2225 | end select |
---|
2226 | |
---|
2227 | if( npes.GT.1 )write( text,'(a,i5)' )trim(text)//' from PE', pe !this is the mpp part |
---|
2228 | if( PRESENT(errormsg) )text = trim(text)//': '//trim(errormsg) |
---|
2229 | |
---|
2230 | select case( errortype ) |
---|
2231 | case(NOTE) |
---|
2232 | write( stdout(),'(a)' )trim(text) |
---|
2233 | case default |
---|
2234 | write( stderr(),'(/a/)' )trim(text) |
---|
2235 | if( errortype.EQ.FATAL .OR. warnings_are_fatal )then |
---|
2236 | call FLUSH(stdout()) |
---|
2237 | #ifdef sgi_mipspro |
---|
2238 | ! call TRACE_BACK_STACK_AND_PRINT() |
---|
2239 | #endif |
---|
2240 | #ifdef use_libMPI |
---|
2241 | #ifndef sgi_mipspro |
---|
2242 | !the call to MPI_ABORT is not trapped by TotalView on sgi |
---|
2243 | call MPI_ABORT( peset(0)%id, 1, error ) |
---|
2244 | #endif |
---|
2245 | #endif |
---|
2246 | call ABORT() !automatically calls traceback on Cray systems |
---|
2247 | end if |
---|
2248 | end select |
---|
2249 | |
---|
2250 | error_state = errortype |
---|
2251 | return |
---|
2252 | end subroutine mpp_error_basic |
---|
2253 | !overloads to mpp_error_basic |
---|
2254 | subroutine mpp_error_mesg( routine, errormsg, errortype ) |
---|
2255 | !support for error_mesg routine in FMS |
---|
2256 | character(len=*), intent(in) :: routine, errormsg |
---|
2257 | integer, intent(in) :: errortype |
---|
2258 | call mpp_error( errortype, trim(routine)//': '//trim(errormsg) ) |
---|
2259 | return |
---|
2260 | end subroutine mpp_error_mesg |
---|
2261 | subroutine mpp_error_noargs() |
---|
2262 | call mpp_error(FATAL) |
---|
2263 | end subroutine mpp_error_noargs |
---|
2264 | |
---|
2265 | subroutine mpp_set_warn_level(flag) |
---|
2266 | integer, intent(in) :: flag |
---|
2267 | |
---|
2268 | if( flag.EQ.WARNING )then |
---|
2269 | warnings_are_fatal = .FALSE. |
---|
2270 | else if( flag.EQ.FATAL )then |
---|
2271 | warnings_are_fatal = .TRUE. |
---|
2272 | else |
---|
2273 | call mpp_error( FATAL, 'MPP_SET_WARN_LEVEL: warning flag must be set to WARNING or FATAL.' ) |
---|
2274 | end if |
---|
2275 | return |
---|
2276 | end subroutine mpp_set_warn_level |
---|
2277 | |
---|
2278 | function mpp_error_state() |
---|
2279 | integer :: mpp_error_state |
---|
2280 | mpp_error_state = error_state |
---|
2281 | return |
---|
2282 | end function mpp_error_state |
---|
2283 | |
---|
2284 | #ifdef use_shmalloc |
---|
2285 | subroutine mpp_malloc( ptr, newlen, len ) |
---|
2286 | !routine to perform symmetric allocation: |
---|
2287 | !this is required on the t3e/O2k for variables that will be non-local arguments |
---|
2288 | !to a shmem call (see man intro_shmem(3F)). |
---|
2289 | !newlen is the required allocation length for the pointer ptr |
---|
2290 | ! len is the current allocation (0 if unallocated) |
---|
2291 | integer, intent(in) :: newlen |
---|
2292 | integer, intent(inout) :: len |
---|
2293 | real :: dummy |
---|
2294 | integer :: words_per_long |
---|
2295 | integer(LONG_KIND) :: long |
---|
2296 | !argument ptr is a cray pointer, points to a dummy argument in this routine |
---|
2297 | pointer( ptr, dummy ) |
---|
2298 | ! integer(LONG_KIND) :: error_8 |
---|
2299 | |
---|
2300 | if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_MALLOC: You must first call mpp_init.' ) |
---|
2301 | !use existing allocation if it is enough |
---|
2302 | if( newlen.LE.len )return |
---|
2303 | |
---|
2304 | call SHMEM_BARRIER_ALL() |
---|
2305 | !if the pointer is already allocated, deallocate |
---|
2306 | ! if( len.NE.0 )call SHPDEALLC( ptr, error_8, -1 ) !BWA: error_8 instead of error, see PV 682618 (fixed in mpt.1.3.0.1) |
---|
2307 | if( len.NE.0 )call SHPDEALLC( ptr, error, -1 ) |
---|
2308 | !allocate new length: assume that the array is KIND=8 |
---|
2309 | words_per_long = size(transfer(long,word)) |
---|
2310 | call SHPALLOC( ptr, newlen*words_per_long, error, -1 ) |
---|
2311 | len = newlen |
---|
2312 | call SHMEM_BARRIER_ALL() |
---|
2313 | |
---|
2314 | if( debug )then |
---|
2315 | call SYSTEM_CLOCK(tick) |
---|
2316 | write( stdout(),'(a,i18,a,i5,a,2i8,i16)' )'T=', tick, ' PE=', pe, ' MPP_MALLOC: len, newlen, ptr=', len, newlen, ptr |
---|
2317 | end if |
---|
2318 | return |
---|
2319 | end subroutine mpp_malloc |
---|
2320 | #endif use_shmalloc |
---|
2321 | |
---|
2322 | subroutine mpp_set_stack_size(n) |
---|
2323 | !set the mpp_stack variable to be at least n LONG words long |
---|
2324 | integer, intent(in) :: n |
---|
2325 | character(len=8) :: text |
---|
2326 | #ifdef use_shmalloc |
---|
2327 | call mpp_malloc( ptr_stack, n, mpp_stack_size ) |
---|
2328 | #else |
---|
2329 | if( n.GT.mpp_stack_size .AND. allocated(mpp_stack) )deallocate(mpp_stack) |
---|
2330 | if( .NOT.allocated(mpp_stack) )then |
---|
2331 | allocate( mpp_stack(n) ) |
---|
2332 | mpp_stack_size = n |
---|
2333 | end if |
---|
2334 | #endif |
---|
2335 | write( text,'(i8)' )n |
---|
2336 | if( pe.EQ.root_pe )call mpp_error( NOTE, 'MPP_SET_STACK_SIZE: stack size set to '//text//'.' ) |
---|
2337 | |
---|
2338 | return |
---|
2339 | end subroutine mpp_set_stack_size |
---|
2340 | |
---|
2341 | #ifndef no_8byte_integers |
---|
2342 | #define MPP_CHKSUM_INT_ mpp_chksum_i8_1d |
---|
2343 | #define MPP_TYPE_ integer(LONG_KIND) |
---|
2344 | #define MPP_RANK_ (:) |
---|
2345 | #include <mpp_chksum_int.h> |
---|
2346 | |
---|
2347 | #define MPP_CHKSUM_INT_ mpp_chksum_i8_2d |
---|
2348 | #define MPP_TYPE_ integer(LONG_KIND) |
---|
2349 | #define MPP_RANK_ (:,:) |
---|
2350 | #include <mpp_chksum_int.h> |
---|
2351 | |
---|
2352 | #define MPP_CHKSUM_INT_ mpp_chksum_i8_3d |
---|
2353 | #define MPP_TYPE_ integer(LONG_KIND) |
---|
2354 | #define MPP_RANK_ (:,:,:) |
---|
2355 | #include <mpp_chksum_int.h> |
---|
2356 | |
---|
2357 | #define MPP_CHKSUM_INT_ mpp_chksum_i8_4d |
---|
2358 | #define MPP_TYPE_ integer(LONG_KIND) |
---|
2359 | #define MPP_RANK_ (:,:,:,:) |
---|
2360 | #include <mpp_chksum_int.h> |
---|
2361 | |
---|
2362 | #define MPP_CHKSUM_INT_ mpp_chksum_i8_5d |
---|
2363 | #define MPP_TYPE_ integer(LONG_KIND) |
---|
2364 | #define MPP_RANK_ (:,:,:,:,:) |
---|
2365 | #include <mpp_chksum_int.h> |
---|
2366 | #endif |
---|
2367 | |
---|
2368 | #define MPP_CHKSUM_INT_ mpp_chksum_i4_1d |
---|
2369 | #define MPP_TYPE_ integer(INT_KIND) |
---|
2370 | #define MPP_RANK_ (:) |
---|
2371 | #include <mpp_chksum_int.h> |
---|
2372 | |
---|
2373 | #define MPP_CHKSUM_INT_ mpp_chksum_i4_2d |
---|
2374 | #define MPP_TYPE_ integer(INT_KIND) |
---|
2375 | #define MPP_RANK_ (:,:) |
---|
2376 | #include <mpp_chksum_int.h> |
---|
2377 | |
---|
2378 | #define MPP_CHKSUM_INT_ mpp_chksum_i4_3d |
---|
2379 | #define MPP_TYPE_ integer(INT_KIND) |
---|
2380 | #define MPP_RANK_ (:,:,:) |
---|
2381 | #include <mpp_chksum_int.h> |
---|
2382 | |
---|
2383 | #define MPP_CHKSUM_INT_ mpp_chksum_i4_4d |
---|
2384 | #define MPP_TYPE_ integer(INT_KIND) |
---|
2385 | #define MPP_RANK_ (:,:,:,:) |
---|
2386 | #include <mpp_chksum_int.h> |
---|
2387 | |
---|
2388 | #define MPP_CHKSUM_INT_ mpp_chksum_i4_5d |
---|
2389 | #define MPP_TYPE_ integer(INT_KIND) |
---|
2390 | #define MPP_RANK_ (:,:,:,:,:) |
---|
2391 | #include <mpp_chksum_int.h> |
---|
2392 | |
---|
2393 | #define MPP_CHKSUM_ mpp_chksum_r8_0d |
---|
2394 | #define MPP_TYPE_ real(DOUBLE_KIND) |
---|
2395 | #define MPP_RANK_ ! |
---|
2396 | #include <mpp_chksum.h> |
---|
2397 | |
---|
2398 | #define MPP_CHKSUM_ mpp_chksum_r8_1d |
---|
2399 | #define MPP_TYPE_ real(DOUBLE_KIND) |
---|
2400 | #define MPP_RANK_ (:) |
---|
2401 | #include <mpp_chksum.h> |
---|
2402 | |
---|
2403 | #define MPP_CHKSUM_ mpp_chksum_r8_2d |
---|
2404 | #define MPP_TYPE_ real(DOUBLE_KIND) |
---|
2405 | #define MPP_RANK_ (:,:) |
---|
2406 | #include <mpp_chksum.h> |
---|
2407 | |
---|
2408 | #define MPP_CHKSUM_ mpp_chksum_r8_3d |
---|
2409 | #define MPP_TYPE_ real(DOUBLE_KIND) |
---|
2410 | #define MPP_RANK_ (:,:,:) |
---|
2411 | #include <mpp_chksum.h> |
---|
2412 | |
---|
2413 | #define MPP_CHKSUM_ mpp_chksum_r8_4d |
---|
2414 | #define MPP_TYPE_ real(DOUBLE_KIND) |
---|
2415 | #define MPP_RANK_ (:,:,:,:) |
---|
2416 | #include <mpp_chksum.h> |
---|
2417 | |
---|
2418 | #define MPP_CHKSUM_ mpp_chksum_r8_5d |
---|
2419 | #define MPP_TYPE_ real(DOUBLE_KIND) |
---|
2420 | #define MPP_RANK_ (:,:,:,:,:) |
---|
2421 | #include <mpp_chksum.h> |
---|
2422 | |
---|
2423 | #define MPP_CHKSUM_ mpp_chksum_c8_0d |
---|
2424 | #define MPP_TYPE_ complex(DOUBLE_KIND) |
---|
2425 | #define MPP_RANK_ ! |
---|
2426 | #include <mpp_chksum.h> |
---|
2427 | |
---|
2428 | #define MPP_CHKSUM_ mpp_chksum_c8_1d |
---|
2429 | #define MPP_TYPE_ complex(DOUBLE_KIND) |
---|
2430 | #define MPP_RANK_ (:) |
---|
2431 | #include <mpp_chksum.h> |
---|
2432 | |
---|
2433 | #define MPP_CHKSUM_ mpp_chksum_c8_2d |
---|
2434 | #define MPP_TYPE_ complex(DOUBLE_KIND) |
---|
2435 | #define MPP_RANK_ (:,:) |
---|
2436 | #include <mpp_chksum.h> |
---|
2437 | |
---|
2438 | #define MPP_CHKSUM_ mpp_chksum_c8_3d |
---|
2439 | #define MPP_TYPE_ complex(DOUBLE_KIND) |
---|
2440 | #define MPP_RANK_ (:,:,:) |
---|
2441 | #include <mpp_chksum.h> |
---|
2442 | |
---|
2443 | #define MPP_CHKSUM_ mpp_chksum_c8_4d |
---|
2444 | #define MPP_TYPE_ complex(DOUBLE_KIND) |
---|
2445 | #define MPP_RANK_ (:,:,:,:) |
---|
2446 | #include <mpp_chksum.h> |
---|
2447 | |
---|
2448 | #define MPP_CHKSUM_ mpp_chksum_c8_5d |
---|
2449 | #define MPP_TYPE_ complex(DOUBLE_KIND) |
---|
2450 | #define MPP_RANK_ (:,:,:,:,:) |
---|
2451 | #include <mpp_chksum.h> |
---|
2452 | |
---|
2453 | #ifndef no_4byte_reals |
---|
2454 | !CAUTION: the r4 versions of these may produce |
---|
2455 | !unpredictable results: I'm not sure what the result |
---|
2456 | !of the TRANSFER() to integer(8) is from an odd number of real(4)s? |
---|
2457 | !However the complex(4) will work, since it is guaranteed even. |
---|
2458 | #define MPP_CHKSUM_ mpp_chksum_r4_0d |
---|
2459 | #define MPP_TYPE_ real(FLOAT_KIND) |
---|
2460 | #define MPP_RANK_ ! |
---|
2461 | #include <mpp_chksum.h> |
---|
2462 | |
---|
2463 | #define MPP_CHKSUM_ mpp_chksum_r4_1d |
---|
2464 | #define MPP_TYPE_ real(FLOAT_KIND) |
---|
2465 | #define MPP_RANK_ (:) |
---|
2466 | #include <mpp_chksum.h> |
---|
2467 | |
---|
2468 | #define MPP_CHKSUM_ mpp_chksum_r4_2d |
---|
2469 | #define MPP_TYPE_ real(FLOAT_KIND) |
---|
2470 | #define MPP_RANK_ (:,:) |
---|
2471 | #include <mpp_chksum.h> |
---|
2472 | |
---|
2473 | #define MPP_CHKSUM_ mpp_chksum_r4_3d |
---|
2474 | #define MPP_TYPE_ real(FLOAT_KIND) |
---|
2475 | #define MPP_RANK_ (:,:,:) |
---|
2476 | #include <mpp_chksum.h> |
---|
2477 | |
---|
2478 | #define MPP_CHKSUM_ mpp_chksum_r4_4d |
---|
2479 | #define MPP_TYPE_ real(FLOAT_KIND) |
---|
2480 | #define MPP_RANK_ (:,:,:,:) |
---|
2481 | #include <mpp_chksum.h> |
---|
2482 | |
---|
2483 | #define MPP_CHKSUM_ mpp_chksum_r4_5d |
---|
2484 | #define MPP_TYPE_ real(FLOAT_KIND) |
---|
2485 | #define MPP_RANK_ (:,:,:,:,:) |
---|
2486 | #include <mpp_chksum.h> |
---|
2487 | #endif |
---|
2488 | |
---|
2489 | #ifndef no_4byte_cmplx |
---|
2490 | #define MPP_CHKSUM_ mpp_chksum_c4_0d |
---|
2491 | #define MPP_TYPE_ complex(FLOAT_KIND) |
---|
2492 | #define MPP_RANK_ ! |
---|
2493 | #include <mpp_chksum.h> |
---|
2494 | |
---|
2495 | #define MPP_CHKSUM_ mpp_chksum_c4_1d |
---|
2496 | #define MPP_TYPE_ complex(FLOAT_KIND) |
---|
2497 | #define MPP_RANK_ (:) |
---|
2498 | #include <mpp_chksum.h> |
---|
2499 | |
---|
2500 | #define MPP_CHKSUM_ mpp_chksum_c4_2d |
---|
2501 | #define MPP_TYPE_ complex(FLOAT_KIND) |
---|
2502 | #define MPP_RANK_ (:,:) |
---|
2503 | #include <mpp_chksum.h> |
---|
2504 | |
---|
2505 | #define MPP_CHKSUM_ mpp_chksum_c4_3d |
---|
2506 | #define MPP_TYPE_ complex(FLOAT_KIND) |
---|
2507 | #define MPP_RANK_ (:,:,:) |
---|
2508 | #include <mpp_chksum.h> |
---|
2509 | |
---|
2510 | #define MPP_CHKSUM_ mpp_chksum_c4_4d |
---|
2511 | #define MPP_TYPE_ complex(FLOAT_KIND) |
---|
2512 | #define MPP_RANK_ (:,:,:,:) |
---|
2513 | #include <mpp_chksum.h> |
---|
2514 | |
---|
2515 | #define MPP_CHKSUM_ mpp_chksum_c4_5d |
---|
2516 | #define MPP_TYPE_ complex(FLOAT_KIND) |
---|
2517 | #define MPP_RANK_ (:,:,:,:,:) |
---|
2518 | #include <mpp_chksum.h> |
---|
2519 | #endif |
---|
2520 | |
---|
2521 | end module mpp_mod |
---|
2522 | |
---|
2523 | #ifdef test_mpp |
---|
2524 | #ifdef SYSTEM_CLOCK |
---|
2525 | #undef SYSTEM_CLOCK |
---|
2526 | #endif |
---|
2527 | program test |
---|
2528 | !test various aspects of mpp_mod |
---|
2529 | #ifdef sgi_mipspro |
---|
2530 | use shmem_interface |
---|
2531 | #endif |
---|
2532 | use mpp_mod |
---|
2533 | implicit none |
---|
2534 | integer :: pe, npes, root |
---|
2535 | integer, parameter :: n=1048576 |
---|
2536 | real, allocatable, dimension(:) :: a, b, c |
---|
2537 | integer :: tick, tick0, ticks_per_sec, id |
---|
2538 | integer :: i, j, k, l, m |
---|
2539 | real :: dt |
---|
2540 | |
---|
2541 | call mpp_init() |
---|
2542 | call mpp_set_stack_size(3145746) |
---|
2543 | pe = mpp_pe() |
---|
2544 | npes = mpp_npes() |
---|
2545 | root = mpp_root_pe() |
---|
2546 | |
---|
2547 | call SYSTEM_CLOCK( count_rate=ticks_per_sec ) |
---|
2548 | allocate( a(n), b(n) ) |
---|
2549 | id = mpp_clock_id( 'Random number' ) |
---|
2550 | call mpp_clock_begin(id) |
---|
2551 | call random_number(a) |
---|
2552 | call mpp_clock_end (id) |
---|
2553 | !time transmit, compare against shmem_put and get |
---|
2554 | if( pe.EQ.root )then |
---|
2555 | print *, 'Time mpp_transmit for various lengths...' |
---|
2556 | #ifdef SGICRAY |
---|
2557 | print *, 'For comparison, times for shmem_get and shmem_put are also provided.' |
---|
2558 | #endif |
---|
2559 | print * |
---|
2560 | end if |
---|
2561 | id = mpp_clock_id( 'mpp_transmit' ) |
---|
2562 | call mpp_clock_begin(id) |
---|
2563 | !timing is done for cyclical pass (more useful than ping-pong etc) |
---|
2564 | l = n |
---|
2565 | do while( l.GT.0 ) |
---|
2566 | !mpp_transmit |
---|
2567 | call mpp_sync() |
---|
2568 | call SYSTEM_CLOCK(tick0) |
---|
2569 | do i = 1,npes |
---|
2570 | call mpp_transmit( a, l, modulo(pe+npes-i,npes), b, l, modulo(pe+i,npes) ) |
---|
2571 | ! call mpp_sync_self( (/modulo(pe+npes-i,npes)/) ) |
---|
2572 | end do |
---|
2573 | call mpp_sync() |
---|
2574 | call SYSTEM_CLOCK(tick) |
---|
2575 | dt = real(tick-tick0)/(npes*ticks_per_sec) |
---|
2576 | dt = max( dt, epsilon(dt) ) |
---|
2577 | if( pe.EQ.root )write( stdout(),'(/a,i8,f13.6,f8.2)' )'MPP_TRANSMIT length, time, bw(Mb/s)=', l, dt, l*8e-6/dt |
---|
2578 | #ifdef SGICRAY |
---|
2579 | !shmem_put |
---|
2580 | call mpp_sync() |
---|
2581 | call SYSTEM_CLOCK(tick0) |
---|
2582 | do i = 1,npes |
---|
2583 | call shmem_real_put( b, a, l, modulo(pe+1,npes) ) |
---|
2584 | end do |
---|
2585 | call mpp_sync() |
---|
2586 | call SYSTEM_CLOCK(tick) |
---|
2587 | dt = real(tick-tick0)/(npes*ticks_per_sec) |
---|
2588 | dt = max( dt, epsilon(dt) ) |
---|
2589 | if( pe.EQ.root )write( stdout(),'( a,i8,f13.6,f8.2)' )'SHMEM_PUT length, time, bw(Mb/s)=', l, dt, l*8e-6/dt |
---|
2590 | !shmem_get |
---|
2591 | call mpp_sync() |
---|
2592 | call SYSTEM_CLOCK(tick0) |
---|
2593 | do i = 1,npes |
---|
2594 | call shmem_real_get( b, a, l, modulo(pe+1,npes) ) |
---|
2595 | end do |
---|
2596 | call SYSTEM_CLOCK(tick) |
---|
2597 | dt = real(tick-tick0)/(npes*ticks_per_sec) |
---|
2598 | dt = max( dt, epsilon(dt) ) |
---|
2599 | if( pe.EQ.root )write( stdout(),'( a,i8,f13.6,f8.2)' )'SHMEM_GET length, time, bw(Mb/s)=', l, dt, l*8e-6/dt |
---|
2600 | #endif |
---|
2601 | l = l/2 |
---|
2602 | end do |
---|
2603 | |
---|
2604 | !test mpp_sum |
---|
2605 | if( pe.EQ.root )then |
---|
2606 | print '(/a)', 'Time mpp_sum...' |
---|
2607 | end if |
---|
2608 | a = real(pe+1) |
---|
2609 | call mpp_sync() |
---|
2610 | call SYSTEM_CLOCK(tick0) |
---|
2611 | call mpp_sum(a,n) |
---|
2612 | call SYSTEM_CLOCK(tick) |
---|
2613 | dt = real(tick-tick0)/ticks_per_sec |
---|
2614 | dt = max( dt, epsilon(dt) ) |
---|
2615 | if( pe.EQ.root )write( stdout(),'(a,2i4,f9.1,i8,f13.6,f8.2/)' ) & |
---|
2616 | 'mpp_sum: pe, npes, sum(pe+1), length, time, bw(Mb/s)=', pe, npes, a(1), n, dt, n*8e-6/dt |
---|
2617 | call mpp_clock_end(id) |
---|
2618 | |
---|
2619 | !test mpp_max |
---|
2620 | if( pe.EQ.root )then |
---|
2621 | print * |
---|
2622 | print *, 'Test mpp_max...' |
---|
2623 | end if |
---|
2624 | a = real(pe+1) |
---|
2625 | print *, 'pe, pe+1 =', pe, a(1) |
---|
2626 | call mpp_max( a(1) ) |
---|
2627 | print *, 'pe, max(pe+1)=', pe, a(1) |
---|
2628 | |
---|
2629 | !pelist check |
---|
2630 | call mpp_sync() |
---|
2631 | call flush(stdout()) |
---|
2632 | if( npes.GE.2 )then |
---|
2633 | if( pe.EQ.root )print *, 'Test of pelists: bcast, sum and max using PEs 0...npes-2 (excluding last PE)' |
---|
2634 | call mpp_declare_pelist( (/(i,i=0,npes-2)/) ) |
---|
2635 | |
---|
2636 | a = real(pe+1) |
---|
2637 | if( pe.NE.npes-1 )call mpp_broadcast( a, n, npes-2, (/(i,i=0,npes-2)/) ) |
---|
2638 | print *, 'bcast(npes-1) from 0 to npes-2=', pe, a(1) |
---|
2639 | a = real(pe+1) |
---|
2640 | if( pe.NE.npes-1 )call mpp_sum( a, n, (/(i,i=0,npes-2)/) ) |
---|
2641 | if( pe.EQ.root )print *, 'sum(pe+1) from 0 to npes-2=', a(1) |
---|
2642 | a = real(pe+1) |
---|
2643 | if( pe.NE.npes-1 )call mpp_max( a(1), (/(i,i=0,npes-2)/) ) |
---|
2644 | if( pe.EQ.root )print *, 'max(pe+1) from 0 to npes-2=', a(1) |
---|
2645 | end if |
---|
2646 | #ifdef use_CRI_pointers |
---|
2647 | !test mpp_chksum |
---|
2648 | if( modulo(n,npes).EQ.0 )then !only set up for even division |
---|
2649 | if( pe.EQ.root )call random_number(a) |
---|
2650 | call mpp_sync() |
---|
2651 | call mpp_transmit( a, n, ALL_PES, a, n, root ) |
---|
2652 | m= n/npes |
---|
2653 | allocate( c(m) ) |
---|
2654 | c = a(pe*m+1:pe*m+m) |
---|
2655 | |
---|
2656 | if( pe.EQ.root )then |
---|
2657 | print * |
---|
2658 | print *, 'Test mpp_chksum...' |
---|
2659 | print *, 'This test shows that a whole array and a distributed array give identical checksums.' |
---|
2660 | end if |
---|
2661 | print *, 'chksum(a)=', mpp_chksum(a,(/pe/)) |
---|
2662 | print *, 'chksum(c)=', mpp_chksum(c) |
---|
2663 | end if |
---|
2664 | #endif |
---|
2665 | call mpp_exit() |
---|
2666 | end program test |
---|
2667 | #endif |
---|