1 | MODULE timing |
---|
2 | !!======================================================================== |
---|
3 | !! *** MODULE timing *** |
---|
4 | !!======================================================================== |
---|
5 | !! History : 4.0 ! 2001-05 (R. Benshila) |
---|
6 | !!------------------------------------------------------------------------ |
---|
7 | |
---|
8 | !!------------------------------------------------------------------------ |
---|
9 | !! timming_init : initialize timing process |
---|
10 | !! timing_start : start Timer |
---|
11 | !! timing_stop : stop Timer |
---|
12 | !! timing_reset : end timing variable creation |
---|
13 | !! timing_finalize : compute stats and write output in calling w*_info |
---|
14 | !! timing_ini_var : create timing variables |
---|
15 | !! timing_listing : print instumented subroutines in ocean.output |
---|
16 | !! wcurrent_info : compute and print detailed stats on the current CPU |
---|
17 | !! wave_info : compute and print averaged statson all processors |
---|
18 | !! wmpi_info : compute and write global stats |
---|
19 | !! supress : suppress an element of the timing linked list |
---|
20 | !! insert : insert an element of the timing linked list |
---|
21 | !!------------------------------------------------------------------------ |
---|
22 | USE in_out_manager ! I/O manager |
---|
23 | USE dom_oce ! ocean domain |
---|
24 | USE lib_mpp |
---|
25 | |
---|
26 | IMPLICIT NONE |
---|
27 | PRIVATE |
---|
28 | |
---|
29 | PUBLIC timing_init, timing_finalize ! called in nemogcm module |
---|
30 | PUBLIC timing_reset ! called in step module |
---|
31 | PUBLIC timing_start, timing_stop ! called in each routine to time |
---|
32 | |
---|
33 | |
---|
34 | INCLUDE 'mpif.h' |
---|
35 | |
---|
36 | |
---|
37 | ! Variables for fine grain timing |
---|
38 | TYPE timer |
---|
39 | CHARACTER(LEN=20) :: cname |
---|
40 | REAL(wp) :: t_cpu, t_clock, tsum_cpu, tsum_clock, tmax_cpu, tmax_clock, tmin_cpu, tmin_clock, tsub_cpu, tsub_clock |
---|
41 | INTEGER :: ncount, ncount_max, ncount_rate |
---|
42 | INTEGER :: niter |
---|
43 | LOGICAL :: l_tdone |
---|
44 | TYPE(timer), POINTER :: next => NULL() |
---|
45 | TYPE(timer), POINTER :: prev => NULL() |
---|
46 | TYPE(timer), POINTER :: parent_section => NULL() |
---|
47 | END TYPE timer |
---|
48 | |
---|
49 | TYPE alltimer |
---|
50 | CHARACTER(LEN=20), DIMENSION(:), POINTER :: cname => NULL() |
---|
51 | REAL(wp), DIMENSION(:), POINTER :: tsum_cpu => NULL() |
---|
52 | REAL(wp), DIMENSION(:), POINTER :: tsum_clock => NULL() |
---|
53 | INTEGER, DIMENSION(:), POINTER :: niter => NULL() |
---|
54 | TYPE(alltimer), POINTER :: next => NULL() |
---|
55 | TYPE(alltimer), POINTER :: prev => NULL() |
---|
56 | END TYPE alltimer |
---|
57 | |
---|
58 | TYPE(timer), POINTER :: s_timer_root => NULL() |
---|
59 | TYPE(timer), POINTER :: s_timer => NULL() |
---|
60 | TYPE(timer), POINTER :: s_wrk => NULL() |
---|
61 | REAL(wp) :: t_overclock, t_overcpu |
---|
62 | LOGICAL :: l_initdone = .FALSE. |
---|
63 | INTEGER :: nsize |
---|
64 | |
---|
65 | ! Variables for coarse grain timing |
---|
66 | REAL(wp) :: tot_etime, tot_ctime |
---|
67 | REAL(kind=wp), DIMENSION(2) :: t_elaps, t_cpu |
---|
68 | REAL(wp), ALLOCATABLE, DIMENSION(:) :: all_etime, all_ctime |
---|
69 | INTEGER :: nfinal_count, ncount, ncount_rate, ncount_max |
---|
70 | INTEGER, DIMENSION(8) :: nvalues |
---|
71 | CHARACTER(LEN=8), DIMENSION(2) :: cdate |
---|
72 | CHARACTER(LEN=10), DIMENSION(2) :: ctime |
---|
73 | CHARACTER(LEN=5) :: czone |
---|
74 | |
---|
75 | ! From of ouput file (1/proc or one global) !RB to put in nammpp or namctl |
---|
76 | LOGICAL :: ln_onefile = .TRUE. |
---|
77 | LOGICAL :: lwriter |
---|
78 | !!---------------------------------------------------------------------- |
---|
79 | !! NEMO/OPA 4.0 , NEMO Consortium (2011) |
---|
80 | !! $Id: timing.F90 5120 2015-03-03 16:11:55Z acc $ |
---|
81 | !! Software governed by the CeCILL licence (NEMOGCM/NEMO_CeCILL.txt) |
---|
82 | !!---------------------------------------------------------------------- |
---|
83 | CONTAINS |
---|
84 | |
---|
85 | SUBROUTINE timing_start(cdinfo) |
---|
86 | !!---------------------------------------------------------------------- |
---|
87 | !! *** ROUTINE timing_start *** |
---|
88 | !! ** Purpose : collect execution time |
---|
89 | !!---------------------------------------------------------------------- |
---|
90 | CHARACTER(len=*), INTENT(in) :: cdinfo |
---|
91 | ! |
---|
92 | |
---|
93 | ! Create timing structure at first call |
---|
94 | IF( .NOT. l_initdone ) THEN |
---|
95 | CALL timing_ini_var(cdinfo) |
---|
96 | ELSE |
---|
97 | s_timer => s_timer_root |
---|
98 | DO WHILE( TRIM(s_timer%cname) /= TRIM(cdinfo) ) |
---|
99 | IF( ASSOCIATED(s_timer%next) ) s_timer => s_timer%next |
---|
100 | END DO |
---|
101 | ENDIF |
---|
102 | s_timer%l_tdone = .FALSE. |
---|
103 | s_timer%niter = s_timer%niter + 1 |
---|
104 | s_timer%t_cpu = 0. |
---|
105 | s_timer%t_clock = 0. |
---|
106 | |
---|
107 | ! CPU time collection |
---|
108 | CALL CPU_TIME( s_timer%t_cpu ) |
---|
109 | ! clock time collection |
---|
110 | |
---|
111 | s_timer%t_clock= MPI_Wtime() |
---|
112 | |
---|
113 | |
---|
114 | |
---|
115 | |
---|
116 | ! |
---|
117 | END SUBROUTINE timing_start |
---|
118 | |
---|
119 | |
---|
120 | SUBROUTINE timing_stop(cdinfo, csection) |
---|
121 | !!---------------------------------------------------------------------- |
---|
122 | !! *** ROUTINE timing_stop *** |
---|
123 | !! ** Purpose : finalize timing and output |
---|
124 | !!---------------------------------------------------------------------- |
---|
125 | CHARACTER(len=*), INTENT(in) :: cdinfo |
---|
126 | CHARACTER(len=*), INTENT(in), OPTIONAL :: csection |
---|
127 | ! |
---|
128 | INTEGER :: ifinal_count, iperiods |
---|
129 | REAL(wp) :: zcpu_end, zmpitime |
---|
130 | ! |
---|
131 | s_wrk => NULL() |
---|
132 | |
---|
133 | ! clock time collection |
---|
134 | |
---|
135 | zmpitime = MPI_Wtime() |
---|
136 | |
---|
137 | |
---|
138 | |
---|
139 | ! CPU time collection |
---|
140 | CALL CPU_TIME( zcpu_end ) |
---|
141 | |
---|
142 | s_timer => s_timer_root |
---|
143 | DO WHILE( TRIM(s_timer%cname) /= TRIM(cdinfo) ) |
---|
144 | IF( ASSOCIATED(s_timer%next) ) s_timer => s_timer%next |
---|
145 | END DO |
---|
146 | |
---|
147 | ! CPU time correction |
---|
148 | s_timer%t_cpu = zcpu_end - s_timer%t_cpu - t_overcpu - s_timer%tsub_cpu |
---|
149 | |
---|
150 | ! clock time correction |
---|
151 | |
---|
152 | s_timer%t_clock = zmpitime - s_timer%t_clock - t_overclock - s_timer%tsub_clock |
---|
153 | |
---|
154 | |
---|
155 | |
---|
156 | |
---|
157 | |
---|
158 | |
---|
159 | |
---|
160 | ! Correction of parent section |
---|
161 | IF( .NOT. PRESENT(csection) ) THEN |
---|
162 | s_wrk => s_timer |
---|
163 | DO WHILE ( ASSOCIATED(s_wrk%parent_section ) ) |
---|
164 | s_wrk => s_wrk%parent_section |
---|
165 | s_wrk%tsub_cpu = s_wrk%tsub_cpu + s_timer%t_cpu |
---|
166 | s_wrk%tsub_clock = s_wrk%tsub_clock + s_timer%t_clock |
---|
167 | END DO |
---|
168 | ENDIF |
---|
169 | |
---|
170 | ! time diagnostics |
---|
171 | s_timer%tsum_clock = s_timer%tsum_clock + s_timer%t_clock |
---|
172 | s_timer%tsum_cpu = s_timer%tsum_cpu + s_timer%t_cpu |
---|
173 | !RB to use to get min/max during a time integration |
---|
174 | ! IF( .NOT. l_initdone ) THEN |
---|
175 | ! s_timer%tmin_clock = s_timer%t_clock |
---|
176 | ! s_timer%tmin_cpu = s_timer%t_cpu |
---|
177 | ! ELSE |
---|
178 | ! s_timer%tmin_clock = MIN( s_timer%tmin_clock, s_timer%t_clock ) |
---|
179 | ! s_timer%tmin_cpu = MIN( s_timer%tmin_cpu , s_timer%t_cpu ) |
---|
180 | ! ENDIF |
---|
181 | ! s_timer%tmax_clock = MAX( s_timer%tmax_clock, s_timer%t_clock ) |
---|
182 | ! s_timer%tmax_cpu = MAX( s_timer%tmax_cpu , s_timer%t_cpu ) |
---|
183 | ! |
---|
184 | s_timer%tsub_clock = 0. |
---|
185 | s_timer%tsub_cpu = 0. |
---|
186 | s_timer%l_tdone = .TRUE. |
---|
187 | ! |
---|
188 | END SUBROUTINE timing_stop |
---|
189 | |
---|
190 | |
---|
191 | SUBROUTINE timing_init |
---|
192 | !!---------------------------------------------------------------------- |
---|
193 | !! *** ROUTINE timing_init *** |
---|
194 | !! ** Purpose : open timing output file |
---|
195 | !!---------------------------------------------------------------------- |
---|
196 | INTEGER :: iperiods, istart_count, ifinal_count |
---|
197 | REAL(wp) :: zdum |
---|
198 | LOGICAL :: ll_f |
---|
199 | |
---|
200 | IF( ln_onefile ) THEN |
---|
201 | IF( lwp) CALL ctl_opn( numtime, 'timing.output', 'REPLACE', 'FORMATTED', 'SEQUENTIAL', -1, numout,.TRUE., narea ) |
---|
202 | lwriter = lwp |
---|
203 | ELSE |
---|
204 | CALL ctl_opn( numtime, 'timing.output', 'REPLACE', 'FORMATTED', 'SEQUENTIAL', -1, numout,.FALSE., narea ) |
---|
205 | lwriter = .TRUE. |
---|
206 | ENDIF |
---|
207 | |
---|
208 | IF( lwriter) THEN |
---|
209 | WRITE(numtime,*) |
---|
210 | WRITE(numtime,*) ' CNRS - NERC - Met OFFICE - MERCATOR-ocean - CMCC - INGV' |
---|
211 | WRITE(numtime,*) ' NEMO team' |
---|
212 | WRITE(numtime,*) ' Ocean General Circulation Model' |
---|
213 | WRITE(numtime,*) ' version 3.6 (2015) ' |
---|
214 | WRITE(numtime,*) |
---|
215 | WRITE(numtime,*) ' Timing Informations ' |
---|
216 | WRITE(numtime,*) |
---|
217 | WRITE(numtime,*) |
---|
218 | ENDIF |
---|
219 | |
---|
220 | ! Compute clock function overhead |
---|
221 | |
---|
222 | t_overclock = MPI_WTIME() |
---|
223 | t_overclock = MPI_WTIME() - t_overclock |
---|
224 | |
---|
225 | ! Compute cpu_time function overhead |
---|
226 | CALL CPU_TIME(zdum) |
---|
227 | CALL CPU_TIME(t_overcpu) |
---|
228 | |
---|
229 | ! End overhead omputation |
---|
230 | t_overcpu = t_overcpu - zdum |
---|
231 | t_overclock = t_overcpu + t_overclock |
---|
232 | |
---|
233 | ! Timing on date and time |
---|
234 | CALL DATE_AND_TIME(cdate(1),ctime(1),czone,nvalues) |
---|
235 | |
---|
236 | CALL CPU_TIME(t_cpu(1)) |
---|
237 | ! Start elapsed and CPU time counters |
---|
238 | t_elaps(1) = MPI_WTIME() |
---|
239 | ! |
---|
240 | END SUBROUTINE timing_init |
---|
241 | |
---|
242 | |
---|
243 | SUBROUTINE timing_finalize |
---|
244 | !!---------------------------------------------------------------------- |
---|
245 | !! *** ROUTINE timing_finalize *** |
---|
246 | !! ** Purpose : compute average time |
---|
247 | !! write timing output file |
---|
248 | !!---------------------------------------------------------------------- |
---|
249 | TYPE(timer), POINTER :: s_temp |
---|
250 | INTEGER :: idum, iperiods, icode |
---|
251 | LOGICAL :: ll_ord, ll_averep |
---|
252 | CHARACTER(len=120) :: clfmt |
---|
253 | |
---|
254 | ll_averep = .TRUE. |
---|
255 | |
---|
256 | ! total CPU and elapse |
---|
257 | CALL CPU_TIME(t_cpu(2)) |
---|
258 | t_cpu(2) = t_cpu(2) - t_cpu(1) - t_overcpu |
---|
259 | t_elaps(2) = MPI_WTIME() - t_elaps(1) - t_overclock |
---|
260 | |
---|
261 | ! End of timings on date & time |
---|
262 | CALL DATE_AND_TIME(cdate(2),ctime(2),czone,nvalues) |
---|
263 | |
---|
264 | ! Compute the numer of routines |
---|
265 | nsize = 0 |
---|
266 | s_timer => s_timer_root |
---|
267 | DO WHILE( ASSOCIATED(s_timer) ) |
---|
268 | nsize = nsize + 1 |
---|
269 | s_timer => s_timer%next |
---|
270 | END DO |
---|
271 | idum = nsize |
---|
272 | IF(lk_mpp) CALL mpp_sum(idum) |
---|
273 | IF( idum/jpnij /= nsize ) THEN |
---|
274 | IF( lwriter ) WRITE(numtime,*) ' ===> W A R N I N G: ' |
---|
275 | IF( lwriter ) WRITE(numtime,*) ' Some CPU have different number of routines instrumented for timing' |
---|
276 | IF( lwriter ) WRITE(numtime,*) ' No detailed report on averaged timing can be provided' |
---|
277 | IF( lwriter ) WRITE(numtime,*) ' The following detailed report only deals with the current processor' |
---|
278 | IF( lwriter ) WRITE(numtime,*) |
---|
279 | ll_averep = .FALSE. |
---|
280 | ENDIF |
---|
281 | |
---|
282 | ! in MPI gather some info |
---|
283 | ALLOCATE( all_etime(jpnij), all_ctime(jpnij) ) |
---|
284 | CALL MPI_ALLGATHER(t_elaps(2), 1, MPI_DOUBLE_PRECISION, & |
---|
285 | all_etime , 1, MPI_DOUBLE_PRECISION, & |
---|
286 | MPI_COMM_OPA, icode) |
---|
287 | CALL MPI_ALLGATHER(t_cpu(2) , 1, MPI_DOUBLE_PRECISION, & |
---|
288 | all_ctime, 1, MPI_DOUBLE_PRECISION, & |
---|
289 | MPI_COMM_OPA, icode) |
---|
290 | tot_etime = SUM(all_etime(:)) |
---|
291 | tot_ctime = SUM(all_ctime(:)) |
---|
292 | |
---|
293 | ! write output file |
---|
294 | IF( lwriter ) WRITE(numtime,*) 'Total timing (sum) :' |
---|
295 | IF( lwriter ) WRITE(numtime,*) '--------------------' |
---|
296 | IF( lwriter ) WRITE(numtime,"('Elapsed Time (s) CPU Time (s)')") |
---|
297 | IF( lwriter ) WRITE(numtime,'(5x,f12.3,1x,f12.3)') tot_etime, tot_ctime |
---|
298 | IF( lwriter ) WRITE(numtime,*) |
---|
299 | IF( ll_averep ) CALL waver_info |
---|
300 | CALL wmpi_info |
---|
301 | IF( lwriter ) CALL wcurrent_info |
---|
302 | |
---|
303 | clfmt='(1X,"Timing started on ",2(A2,"/"),A4," at ",2(A2,":"),A2," MET ",A3,":",A2," from GMT")' |
---|
304 | IF( lwriter ) WRITE(numtime, TRIM(clfmt)) & |
---|
305 | & cdate(1)(7:8), cdate(1)(5:6), cdate(1)(1:4), & |
---|
306 | & ctime(1)(1:2), ctime(1)(3:4), ctime(1)(5:6), & |
---|
307 | & czone(1:3), czone(4:5) |
---|
308 | clfmt='(1X, "Timing ended on ",2(A2,"/"),A4," at ",2(A2,":"),A2," MET ",A3,":",A2," from GMT")' |
---|
309 | IF( lwriter ) WRITE(numtime, TRIM(clfmt)) & |
---|
310 | & cdate(2)(7:8), cdate(2)(5:6), cdate(2)(1:4), & |
---|
311 | & ctime(2)(1:2), ctime(2)(3:4), ctime(2)(5:6), & |
---|
312 | & czone(1:3), czone(4:5) |
---|
313 | |
---|
314 | IF( lwriter ) CLOSE(numtime) |
---|
315 | ! |
---|
316 | END SUBROUTINE timing_finalize |
---|
317 | |
---|
318 | |
---|
319 | SUBROUTINE wcurrent_info |
---|
320 | !!---------------------------------------------------------------------- |
---|
321 | !! *** ROUTINE wcurrent_info *** |
---|
322 | !! ** Purpose : compute and write timing output file |
---|
323 | !!---------------------------------------------------------------------- |
---|
324 | LOGICAL :: ll_ord |
---|
325 | CHARACTER(len=2048) :: clfmt |
---|
326 | |
---|
327 | ! reorder the current list by elapse time |
---|
328 | s_wrk => NULL() |
---|
329 | s_timer => s_timer_root |
---|
330 | DO |
---|
331 | ll_ord = .TRUE. |
---|
332 | s_timer => s_timer_root |
---|
333 | DO WHILE ( ASSOCIATED( s_timer%next ) ) |
---|
334 | IF (.NOT. ASSOCIATED(s_timer%next)) EXIT |
---|
335 | IF ( s_timer%tsum_clock < s_timer%next%tsum_clock ) THEN |
---|
336 | ALLOCATE(s_wrk) |
---|
337 | s_wrk = s_timer%next |
---|
338 | CALL insert (s_timer, s_timer_root, s_wrk) |
---|
339 | CALL suppress(s_timer%next) |
---|
340 | ll_ord = .FALSE. |
---|
341 | CYCLE |
---|
342 | ENDIF |
---|
343 | IF( ASSOCIATED(s_timer%next) ) s_timer => s_timer%next |
---|
344 | END DO |
---|
345 | IF( ll_ord ) EXIT |
---|
346 | END DO |
---|
347 | |
---|
348 | ! write current info |
---|
349 | WRITE(numtime,*) 'Detailed timing for proc :', narea-1 |
---|
350 | WRITE(numtime,*) '--------------------------' |
---|
351 | WRITE(numtime,*) 'Section ', & |
---|
352 | & 'Elapsed Time (s) ','Elapsed Time (%) ', & |
---|
353 | & 'CPU Time(s) ','CPU Time (%) ','CPU/Elapsed ','Frequency' |
---|
354 | s_timer => s_timer_root |
---|
355 | clfmt = '(1x,a,4x,f12.3,6x,f12.3,x,f12.3,2x,f12.3,6x,f7.3,2x,i9)' |
---|
356 | DO WHILE ( ASSOCIATED(s_timer) ) |
---|
357 | WRITE(numtime,TRIM(clfmt)) s_timer%cname, & |
---|
358 | & s_timer%tsum_clock,s_timer%tsum_clock*100./t_elaps(2), & |
---|
359 | & s_timer%tsum_cpu ,s_timer%tsum_cpu*100./t_cpu(2) , & |
---|
360 | & s_timer%tsum_cpu/s_timer%tsum_clock, s_timer%niter |
---|
361 | s_timer => s_timer%next |
---|
362 | END DO |
---|
363 | WRITE(numtime,*) |
---|
364 | ! |
---|
365 | END SUBROUTINE wcurrent_info |
---|
366 | |
---|
367 | SUBROUTINE waver_info |
---|
368 | !!---------------------------------------------------------------------- |
---|
369 | !! *** ROUTINE wcurrent_info *** |
---|
370 | !! ** Purpose : compute and write averaged timing informations |
---|
371 | !!---------------------------------------------------------------------- |
---|
372 | TYPE(alltimer), POINTER :: sl_timer_glob_root => NULL() |
---|
373 | TYPE(alltimer), POINTER :: sl_timer_glob => NULL() |
---|
374 | TYPE(timer), POINTER :: sl_timer_ave_root => NULL() |
---|
375 | TYPE(timer), POINTER :: sl_timer_ave => NULL() |
---|
376 | INTEGER :: icode |
---|
377 | INTEGER :: ierr |
---|
378 | LOGICAL :: ll_ord |
---|
379 | CHARACTER(len=200) :: clfmt |
---|
380 | |
---|
381 | ! Initialised the global strucutre |
---|
382 | ALLOCATE(sl_timer_glob_root, Stat=ierr) |
---|
383 | IF(ierr /= 0)THEN |
---|
384 | WRITE(numtime,*) 'Failed to allocate global timing structure in waver_info' |
---|
385 | RETURN |
---|
386 | END IF |
---|
387 | |
---|
388 | ALLOCATE(sl_timer_glob_root%cname (jpnij), & |
---|
389 | sl_timer_glob_root%tsum_cpu (jpnij), & |
---|
390 | sl_timer_glob_root%tsum_clock(jpnij), & |
---|
391 | sl_timer_glob_root%niter (jpnij), Stat=ierr) |
---|
392 | IF(ierr /= 0)THEN |
---|
393 | WRITE(numtime,*) 'Failed to allocate global timing structure in waver_info' |
---|
394 | RETURN |
---|
395 | END IF |
---|
396 | sl_timer_glob_root%cname(:) = '' |
---|
397 | sl_timer_glob_root%tsum_cpu(:) = 0._wp |
---|
398 | sl_timer_glob_root%tsum_clock(:) = 0._wp |
---|
399 | sl_timer_glob_root%niter(:) = 0 |
---|
400 | sl_timer_glob_root%next => NULL() |
---|
401 | sl_timer_glob_root%prev => NULL() |
---|
402 | !ARPDBG - don't need to allocate a pointer that's immediately then |
---|
403 | ! set to point to some other object. |
---|
404 | !ALLOCATE(sl_timer_glob) |
---|
405 | !ALLOCATE(sl_timer_glob%cname (jpnij)) |
---|
406 | !ALLOCATE(sl_timer_glob%tsum_cpu (jpnij)) |
---|
407 | !ALLOCATE(sl_timer_glob%tsum_clock(jpnij)) |
---|
408 | !ALLOCATE(sl_timer_glob%niter (jpnij)) |
---|
409 | sl_timer_glob => sl_timer_glob_root |
---|
410 | ! |
---|
411 | IF( narea .EQ. 1 ) THEN |
---|
412 | ALLOCATE(sl_timer_ave_root) |
---|
413 | sl_timer_ave_root%cname = '' |
---|
414 | sl_timer_ave_root%t_cpu = 0._wp |
---|
415 | sl_timer_ave_root%t_clock = 0._wp |
---|
416 | sl_timer_ave_root%tsum_cpu = 0._wp |
---|
417 | sl_timer_ave_root%tsum_clock = 0._wp |
---|
418 | sl_timer_ave_root%tmax_cpu = 0._wp |
---|
419 | sl_timer_ave_root%tmax_clock = 0._wp |
---|
420 | sl_timer_ave_root%tmin_cpu = 0._wp |
---|
421 | sl_timer_ave_root%tmin_clock = 0._wp |
---|
422 | sl_timer_ave_root%tsub_cpu = 0._wp |
---|
423 | sl_timer_ave_root%tsub_clock = 0._wp |
---|
424 | sl_timer_ave_root%ncount = 0 |
---|
425 | sl_timer_ave_root%ncount_rate = 0 |
---|
426 | sl_timer_ave_root%ncount_max = 0 |
---|
427 | sl_timer_ave_root%niter = 0 |
---|
428 | sl_timer_ave_root%l_tdone = .FALSE. |
---|
429 | sl_timer_ave_root%next => NULL() |
---|
430 | sl_timer_ave_root%prev => NULL() |
---|
431 | ALLOCATE(sl_timer_ave) |
---|
432 | sl_timer_ave => sl_timer_ave_root |
---|
433 | ENDIF |
---|
434 | |
---|
435 | ! Gather info from all processors |
---|
436 | s_timer => s_timer_root |
---|
437 | DO WHILE ( ASSOCIATED(s_timer) ) |
---|
438 | CALL MPI_GATHER(s_timer%cname , 20, MPI_CHARACTER, & |
---|
439 | sl_timer_glob%cname, 20, MPI_CHARACTER, & |
---|
440 | 0, MPI_COMM_OPA, icode) |
---|
441 | CALL MPI_GATHER(s_timer%tsum_clock , 1, MPI_DOUBLE_PRECISION, & |
---|
442 | sl_timer_glob%tsum_clock, 1, MPI_DOUBLE_PRECISION, & |
---|
443 | 0, MPI_COMM_OPA, icode) |
---|
444 | CALL MPI_GATHER(s_timer%tsum_cpu , 1, MPI_DOUBLE_PRECISION, & |
---|
445 | sl_timer_glob%tsum_cpu, 1, MPI_DOUBLE_PRECISION, & |
---|
446 | 0, MPI_COMM_OPA, icode) |
---|
447 | CALL MPI_GATHER(s_timer%niter , 1, MPI_INTEGER, & |
---|
448 | sl_timer_glob%niter, 1, MPI_INTEGER, & |
---|
449 | 0, MPI_COMM_OPA, icode) |
---|
450 | |
---|
451 | IF( narea == 1 .AND. ASSOCIATED(s_timer%next) ) THEN |
---|
452 | ALLOCATE(sl_timer_glob%next) |
---|
453 | ALLOCATE(sl_timer_glob%next%cname (jpnij)) |
---|
454 | ALLOCATE(sl_timer_glob%next%tsum_cpu (jpnij)) |
---|
455 | ALLOCATE(sl_timer_glob%next%tsum_clock(jpnij)) |
---|
456 | ALLOCATE(sl_timer_glob%next%niter (jpnij)) |
---|
457 | sl_timer_glob%next%prev => sl_timer_glob |
---|
458 | sl_timer_glob%next%next => NULL() |
---|
459 | sl_timer_glob => sl_timer_glob%next |
---|
460 | ENDIF |
---|
461 | s_timer => s_timer%next |
---|
462 | END DO |
---|
463 | |
---|
464 | WRITE(*,*) 'ARPDBG: timing: done gathers' |
---|
465 | |
---|
466 | IF( narea == 1 ) THEN |
---|
467 | ! Compute some stats |
---|
468 | sl_timer_glob => sl_timer_glob_root |
---|
469 | DO WHILE( ASSOCIATED(sl_timer_glob) ) |
---|
470 | sl_timer_ave%cname = sl_timer_glob%cname(1) |
---|
471 | sl_timer_ave%tsum_cpu = SUM (sl_timer_glob%tsum_cpu (:)) / jpnij |
---|
472 | sl_timer_ave%tsum_clock = SUM (sl_timer_glob%tsum_clock(:)) / jpnij |
---|
473 | sl_timer_ave%tmax_cpu = MAXVAL(sl_timer_glob%tsum_cpu (:)) |
---|
474 | sl_timer_ave%tmax_clock = MAXVAL(sl_timer_glob%tsum_clock(:)) |
---|
475 | sl_timer_ave%tmin_cpu = MINVAL(sl_timer_glob%tsum_cpu (:)) |
---|
476 | sl_timer_ave%tmin_clock = MINVAL(sl_timer_glob%tsum_clock(:)) |
---|
477 | sl_timer_ave%niter = SUM (sl_timer_glob%niter (:)) |
---|
478 | ! |
---|
479 | IF( ASSOCIATED(sl_timer_glob%next) ) THEN |
---|
480 | ALLOCATE(sl_timer_ave%next) |
---|
481 | sl_timer_ave%next%prev => sl_timer_ave |
---|
482 | sl_timer_ave%next%next => NULL() |
---|
483 | sl_timer_ave => sl_timer_ave%next |
---|
484 | ENDIF |
---|
485 | sl_timer_glob => sl_timer_glob%next |
---|
486 | END DO |
---|
487 | |
---|
488 | WRITE(*,*) 'ARPDBG: timing: done computing stats' |
---|
489 | |
---|
490 | ! reorder the averaged list by CPU time |
---|
491 | s_wrk => NULL() |
---|
492 | sl_timer_ave => sl_timer_ave_root |
---|
493 | DO |
---|
494 | ll_ord = .TRUE. |
---|
495 | sl_timer_ave => sl_timer_ave_root |
---|
496 | DO WHILE( ASSOCIATED( sl_timer_ave%next ) ) |
---|
497 | |
---|
498 | IF( .NOT. ASSOCIATED(sl_timer_ave%next) ) EXIT |
---|
499 | |
---|
500 | IF ( sl_timer_ave%tsum_clock < sl_timer_ave%next%tsum_clock ) THEN |
---|
501 | ALLOCATE(s_wrk) |
---|
502 | ! Copy data into the new object pointed to by s_wrk |
---|
503 | s_wrk = sl_timer_ave%next |
---|
504 | ! Insert this new timer object before our current position |
---|
505 | CALL insert (sl_timer_ave, sl_timer_ave_root, s_wrk) |
---|
506 | ! Remove the old object from the list |
---|
507 | CALL suppress(sl_timer_ave%next) |
---|
508 | ll_ord = .FALSE. |
---|
509 | CYCLE |
---|
510 | ENDIF |
---|
511 | IF( ASSOCIATED(sl_timer_ave%next) ) sl_timer_ave => sl_timer_ave%next |
---|
512 | END DO |
---|
513 | IF( ll_ord ) EXIT |
---|
514 | END DO |
---|
515 | |
---|
516 | ! write averaged info |
---|
517 | WRITE(numtime,"('Averaged timing on all processors :')") |
---|
518 | WRITE(numtime,"('-----------------------------------')") |
---|
519 | WRITE(numtime,"('Section',13x,'Elap. Time(s)',2x,'Elap. Time(%)',2x, & |
---|
520 | & 'CPU Time(s)',2x,'CPU Time(%)',2x,'CPU/Elap',1x, & |
---|
521 | & 'Max elap(%)',2x,'Min elap(%)',2x, & |
---|
522 | & 'Freq')") |
---|
523 | sl_timer_ave => sl_timer_ave_root |
---|
524 | clfmt = '((A),E15.7,2x,f6.2,5x,f12.2,5x,f6.2,5x,f7.2,2x,f12.2,4x,f6.2,2x,f9.2)' |
---|
525 | DO WHILE ( ASSOCIATED(sl_timer_ave) ) |
---|
526 | WRITE(numtime,TRIM(clfmt)) sl_timer_ave%cname(1:18), & |
---|
527 | & sl_timer_ave%tsum_clock,sl_timer_ave%tsum_clock*100.*jpnij/tot_etime, & |
---|
528 | & sl_timer_ave%tsum_cpu ,sl_timer_ave%tsum_cpu*100.*jpnij/tot_ctime , & |
---|
529 | & sl_timer_ave%tsum_cpu/sl_timer_ave%tsum_clock, & |
---|
530 | & sl_timer_ave%tmax_clock*100.*jpnij/tot_etime, & |
---|
531 | & sl_timer_ave%tmin_clock*100.*jpnij/tot_etime, & |
---|
532 | & sl_timer_ave%niter/REAL(jpnij) |
---|
533 | sl_timer_ave => sl_timer_ave%next |
---|
534 | END DO |
---|
535 | WRITE(numtime,*) |
---|
536 | ! |
---|
537 | DEALLOCATE(sl_timer_ave_root) |
---|
538 | ENDIF |
---|
539 | ! |
---|
540 | DEALLOCATE(sl_timer_glob_root) |
---|
541 | ! |
---|
542 | END SUBROUTINE waver_info |
---|
543 | |
---|
544 | |
---|
545 | SUBROUTINE wmpi_info |
---|
546 | !!---------------------------------------------------------------------- |
---|
547 | !! *** ROUTINE wmpi_time *** |
---|
548 | !! ** Purpose : compute and write a summary of MPI infos |
---|
549 | !!---------------------------------------------------------------------- |
---|
550 | ! |
---|
551 | INTEGER :: idum, icode |
---|
552 | INTEGER, ALLOCATABLE, DIMENSION(:) :: iall_rank |
---|
553 | REAL(wp) :: ztot_ratio |
---|
554 | REAL(wp) :: zmax_etime, zmax_ctime, zmax_ratio, zmin_etime, zmin_ctime, zmin_ratio |
---|
555 | REAL(wp) :: zavg_etime, zavg_ctime, zavg_ratio |
---|
556 | REAL(wp), ALLOCATABLE, DIMENSION(:) :: zall_ratio |
---|
557 | CHARACTER(LEN=128), dimension(8) :: cllignes |
---|
558 | CHARACTER(LEN=128) :: clhline, clstart_date, clfinal_date |
---|
559 | CHARACTER(LEN=2048) :: clfmt |
---|
560 | |
---|
561 | ! Gather all times |
---|
562 | ALLOCATE( zall_ratio(jpnij), iall_rank(jpnij) ) |
---|
563 | IF( narea == 1 ) THEN |
---|
564 | iall_rank(:) = (/ (idum,idum=0,jpnij-1) /) |
---|
565 | |
---|
566 | ! Compute elapse user time |
---|
567 | zavg_etime = tot_etime/REAL(jpnij,wp) |
---|
568 | zmax_etime = MAXVAL(all_etime(:)) |
---|
569 | zmin_etime = MINVAL(all_etime(:)) |
---|
570 | |
---|
571 | ! Compute CPU user time |
---|
572 | zavg_ctime = tot_ctime/REAL(jpnij,wp) |
---|
573 | zmax_ctime = MAXVAL(all_ctime(:)) |
---|
574 | zmin_ctime = MINVAL(all_ctime(:)) |
---|
575 | |
---|
576 | ! Compute cpu/elapsed ratio |
---|
577 | zall_ratio(:) = all_ctime(:) / all_etime(:) |
---|
578 | ztot_ratio = SUM(zall_ratio(:)) |
---|
579 | zavg_ratio = ztot_ratio/REAL(jpnij,wp) |
---|
580 | zmax_ratio = MAXVAL(zall_ratio(:)) |
---|
581 | zmin_ratio = MINVAL(zall_ratio(:)) |
---|
582 | |
---|
583 | ! Output Format |
---|
584 | clhline ='1x,13("-"),"|",18("-"),"|",14("-"),"|",18("-"),/,' |
---|
585 | cllignes(1)='(1x,"MPI summary report :",/,' |
---|
586 | cllignes(2)='1x,"--------------------",//,' |
---|
587 | cllignes(3)='1x,"Process Rank |"," Elapsed Time (s) |"," CPU Time (s) |"," Ratio CPU/Elapsed",/,' |
---|
588 | cllignes(4)=' (1x,i4,9x,"|",f12.3,6x,"|",f12.3,2x,"|",4x,f7.3,/),' |
---|
589 | WRITE(cllignes(4)(1:4),'(I4)') jpnij |
---|
590 | cllignes(5)='1x,"Total |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3,/,' |
---|
591 | cllignes(6)='1x,"Minimum |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3,/,' |
---|
592 | cllignes(7)='1x,"Maximum |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3,/,' |
---|
593 | cllignes(8)='1x,"Average |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3)' |
---|
594 | clfmt=TRIM(cllignes(1))// TRIM(cllignes(2))//TRIM(cllignes(3))// & |
---|
595 | & TRIM(clhline)//TRIM(cllignes(4))//TRIM(clhline)//TRIM(cllignes(5))// & |
---|
596 | & TRIM(clhline)//TRIM(cllignes(6))//TRIM(clhline)//TRIM(cllignes(7))// & |
---|
597 | & TRIM(clhline)//TRIM(cllignes(8)) |
---|
598 | WRITE(numtime, TRIM(clfmt)) & |
---|
599 | (iall_rank(idum),all_etime(idum),all_ctime(idum),zall_ratio(idum),idum=1, jpnij), & |
---|
600 | tot_etime, tot_ctime, ztot_ratio, & |
---|
601 | zmin_etime, zmin_ctime, zmin_ratio, & |
---|
602 | zmax_etime, zmax_ctime, zmax_ratio, & |
---|
603 | zavg_etime, zavg_ctime, zavg_ratio |
---|
604 | WRITE(numtime,*) |
---|
605 | END IF |
---|
606 | ! |
---|
607 | DEALLOCATE(zall_ratio, iall_rank) |
---|
608 | ! |
---|
609 | END SUBROUTINE wmpi_info |
---|
610 | |
---|
611 | |
---|
612 | SUBROUTINE timing_ini_var(cdinfo) |
---|
613 | !!---------------------------------------------------------------------- |
---|
614 | !! *** ROUTINE timing_ini_var *** |
---|
615 | !! ** Purpose : create timing structure |
---|
616 | !!---------------------------------------------------------------------- |
---|
617 | CHARACTER(len=*), INTENT(in) :: cdinfo |
---|
618 | LOGICAL :: ll_section |
---|
619 | |
---|
620 | ! |
---|
621 | IF( .NOT. ASSOCIATED(s_timer_root) ) THEN |
---|
622 | ALLOCATE(s_timer_root) |
---|
623 | s_timer_root%cname = cdinfo |
---|
624 | s_timer_root%t_cpu = 0._wp |
---|
625 | s_timer_root%t_clock = 0._wp |
---|
626 | s_timer_root%tsum_cpu = 0._wp |
---|
627 | s_timer_root%tsum_clock = 0._wp |
---|
628 | s_timer_root%tmax_cpu = 0._wp |
---|
629 | s_timer_root%tmax_clock = 0._wp |
---|
630 | s_timer_root%tmin_cpu = 0._wp |
---|
631 | s_timer_root%tmin_clock = 0._wp |
---|
632 | s_timer_root%tsub_cpu = 0._wp |
---|
633 | s_timer_root%tsub_clock = 0._wp |
---|
634 | s_timer_root%ncount = 0 |
---|
635 | s_timer_root%ncount_rate = 0 |
---|
636 | s_timer_root%ncount_max = 0 |
---|
637 | s_timer_root%niter = 0 |
---|
638 | s_timer_root%l_tdone = .FALSE. |
---|
639 | s_timer_root%next => NULL() |
---|
640 | s_timer_root%prev => NULL() |
---|
641 | s_timer => s_timer_root |
---|
642 | ! |
---|
643 | ALLOCATE(s_wrk) |
---|
644 | s_wrk => NULL() |
---|
645 | |
---|
646 | ELSE |
---|
647 | s_timer => s_timer_root |
---|
648 | ! case of already existing area (typically inside a loop) |
---|
649 | DO WHILE( ASSOCIATED(s_timer) ) |
---|
650 | IF( TRIM(s_timer%cname) .EQ. TRIM(cdinfo) ) RETURN |
---|
651 | s_timer => s_timer%next |
---|
652 | END DO |
---|
653 | |
---|
654 | ! end of the chain |
---|
655 | s_timer => s_timer_root |
---|
656 | DO WHILE( ASSOCIATED(s_timer%next) ) |
---|
657 | s_timer => s_timer%next |
---|
658 | END DO |
---|
659 | |
---|
660 | ALLOCATE(s_timer%next) |
---|
661 | s_timer%next%cname = cdinfo |
---|
662 | s_timer%next%t_cpu = 0._wp |
---|
663 | s_timer%next%t_clock = 0._wp |
---|
664 | s_timer%next%tsum_cpu = 0._wp |
---|
665 | s_timer%next%tsum_clock = 0._wp |
---|
666 | s_timer%next%tmax_cpu = 0._wp |
---|
667 | s_timer%next%tmax_clock = 0._wp |
---|
668 | s_timer%next%tmin_cpu = 0._wp |
---|
669 | s_timer%next%tmin_clock = 0._wp |
---|
670 | s_timer%next%tsub_cpu = 0._wp |
---|
671 | s_timer%next%tsub_clock = 0._wp |
---|
672 | s_timer%next%ncount = 0 |
---|
673 | s_timer%next%ncount_rate = 0 |
---|
674 | s_timer%next%ncount_max = 0 |
---|
675 | s_timer%next%niter = 0 |
---|
676 | s_timer%next%l_tdone = .FALSE. |
---|
677 | s_timer%next%parent_section => NULL() |
---|
678 | s_timer%next%prev => s_timer |
---|
679 | s_timer%next%next => NULL() |
---|
680 | s_timer => s_timer%next |
---|
681 | |
---|
682 | ! are we inside a section |
---|
683 | s_wrk => s_timer%prev |
---|
684 | ll_section = .FALSE. |
---|
685 | DO WHILE( ASSOCIATED(s_wrk) .AND. .NOT. ll_section ) |
---|
686 | IF( .NOT. s_wrk%l_tdone ) THEN |
---|
687 | ll_section = .TRUE. |
---|
688 | s_timer%parent_section => s_wrk |
---|
689 | ENDIF |
---|
690 | s_wrk => s_wrk%prev |
---|
691 | END DO |
---|
692 | ENDIF |
---|
693 | ! |
---|
694 | END SUBROUTINE timing_ini_var |
---|
695 | |
---|
696 | |
---|
697 | SUBROUTINE timing_reset |
---|
698 | !!---------------------------------------------------------------------- |
---|
699 | !! *** ROUTINE timing_reset *** |
---|
700 | !! ** Purpose : go to root of timing tree |
---|
701 | !!---------------------------------------------------------------------- |
---|
702 | l_initdone = .TRUE. |
---|
703 | ! IF(lwp) WRITE(numout,*) |
---|
704 | ! IF(lwp) WRITE(numout,*) 'timing_reset : instrumented routines for timing' |
---|
705 | ! IF(lwp) WRITE(numout,*) '~~~~~~~~~~~~' |
---|
706 | ! CALL timing_list(s_timer_root) |
---|
707 | ! WRITE(numout,*) |
---|
708 | ! |
---|
709 | END SUBROUTINE timing_reset |
---|
710 | |
---|
711 | |
---|
712 | RECURSIVE SUBROUTINE timing_list(ptr) |
---|
713 | |
---|
714 | TYPE(timer), POINTER, INTENT(inout) :: ptr |
---|
715 | ! |
---|
716 | IF( ASSOCIATED(ptr%next) ) CALL timing_list(ptr%next) |
---|
717 | IF(lwp) WRITE(numout,*)' ', ptr%cname |
---|
718 | ! |
---|
719 | END SUBROUTINE timing_list |
---|
720 | |
---|
721 | |
---|
722 | SUBROUTINE insert(sd_current, sd_root ,sd_ptr) |
---|
723 | !!---------------------------------------------------------------------- |
---|
724 | !! *** ROUTINE insert *** |
---|
725 | !! ** Purpose : insert an element in timer structure |
---|
726 | !!---------------------------------------------------------------------- |
---|
727 | TYPE(timer), POINTER, INTENT(inout) :: sd_current, sd_root, sd_ptr |
---|
728 | ! |
---|
729 | |
---|
730 | IF( ASSOCIATED( sd_current, sd_root ) ) THEN |
---|
731 | ! If our current element is the root element then |
---|
732 | ! replace it with the one being inserted |
---|
733 | sd_root => sd_ptr |
---|
734 | ELSE |
---|
735 | sd_current%prev%next => sd_ptr |
---|
736 | END IF |
---|
737 | sd_ptr%next => sd_current |
---|
738 | sd_ptr%prev => sd_current%prev |
---|
739 | sd_current%prev => sd_ptr |
---|
740 | ! Nullify the pointer to the new element now that it is held |
---|
741 | ! within the list. If we don't do this then a subsequent call |
---|
742 | ! to ALLOCATE memory to this pointer will fail. |
---|
743 | sd_ptr => NULL() |
---|
744 | ! |
---|
745 | END SUBROUTINE insert |
---|
746 | |
---|
747 | |
---|
748 | SUBROUTINE suppress(sd_ptr) |
---|
749 | !!---------------------------------------------------------------------- |
---|
750 | !! *** ROUTINE suppress *** |
---|
751 | !! ** Purpose : supress an element in timer structure |
---|
752 | !!---------------------------------------------------------------------- |
---|
753 | TYPE(timer), POINTER, INTENT(inout) :: sd_ptr |
---|
754 | ! |
---|
755 | TYPE(timer), POINTER :: sl_temp |
---|
756 | |
---|
757 | sl_temp => sd_ptr |
---|
758 | sd_ptr => sd_ptr%next |
---|
759 | IF ( ASSOCIATED(sl_temp%next) ) sl_temp%next%prev => sl_temp%prev |
---|
760 | DEALLOCATE(sl_temp) |
---|
761 | sl_temp => NULL() |
---|
762 | ! |
---|
763 | END SUBROUTINE suppress |
---|
764 | |
---|
765 | !!===================================================================== |
---|
766 | END MODULE timing |
---|