1 | !************************************************************************ |
---|
2 | ! ! |
---|
3 | ! Module : MPI_Times (Version : 3.4) ! |
---|
4 | ! ! |
---|
5 | ! Goal : Measure and print on stdout CPU and Elapsed user times ! |
---|
6 | ! and the ratio CPU/Elapsed of MPI programs. ! |
---|
7 | ! ! |
---|
8 | ! Usage : Insert a "USE MPI_TIMES" instruction inside each MPI ! |
---|
9 | ! Fortran program unit to instrument, then make calls to ! |
---|
10 | ! the MPI_TIME subroutine as shown in the example below : ! |
---|
11 | ! ! |
---|
12 | ! PROGRAM foo ! |
---|
13 | ! USE MPI_TIMES ! |
---|
14 | ! ... ! |
---|
15 | ! CALL MPI_INIT(ierr) ! |
---|
16 | ! ! |
---|
17 | ! !... Set elapsed and CPU user times ! |
---|
18 | ! CALL MPI_TIME(0) ! |
---|
19 | ! ... ! |
---|
20 | ! ... Instruction block to instrument ... ! |
---|
21 | ! ... ! |
---|
22 | ! !... Measure and print elapsed and CPU user times ! |
---|
23 | ! CALL MPI_TIME(1) ! |
---|
24 | ! ! |
---|
25 | ! CALL MPI_FINALIZE(ierr) ! |
---|
26 | ! END PROGRAM foo ! |
---|
27 | ! ! |
---|
28 | ! Notes : 1) Standard Fortran 95 compiler has to be used to compile ! |
---|
29 | ! MPI_TIMES module. ! |
---|
30 | ! ! |
---|
31 | ! 2) MPI_TIME subroutine is collective over all processes ! |
---|
32 | ! of MPI_COMM_WORLD communicator. ! |
---|
33 | ! ! |
---|
34 | ! 3) On some machines, default CPU user time may also ! |
---|
35 | ! include MPI wait times on communication to complete. ! |
---|
36 | ! ! |
---|
37 | ! 4) If Te and Tc respectively denotes the elapsed and CPU ! |
---|
38 | ! user times, then the ratio R=Tc/Te > 0 may lead to ! |
---|
39 | ! different interpretations depending on R<1 or R=1 ! |
---|
40 | ! or R>1. ! |
---|
41 | ! ! |
---|
42 | ! a) If R<1, then wait time on communications and/or ! |
---|
43 | ! system load could be the reason of such performance. ! |
---|
44 | ! ! |
---|
45 | ! b) If R is close to 1 and no hybrid parallelization ! |
---|
46 | ! (e.g. MPI + OpenMP) is implemented, then wait time ! |
---|
47 | ! on communications and/or system load are far to be ! |
---|
48 | ! considered unless point 3) and then, one can assume ! |
---|
49 | ! that 99% of the time, processes are busy performing ! |
---|
50 | ! useful computations on dedicated processors. ! |
---|
51 | ! ! |
---|
52 | ! c) If R>1, then process might has been multi-threaded ! |
---|
53 | ! during execution as what would happen in hybrid ! |
---|
54 | ! parallelization (e.g. MPI + OpenMP) on cluster ! |
---|
55 | ! of SMP nodes. In such case, R may reflect the speed ! |
---|
56 | ! up of the process. ! |
---|
57 | ! ! |
---|
58 | ! 5) On IBM SP machine, do not compile MPI_TIMES module ! |
---|
59 | ! using "-qrealsize=8" switch. This will transform ! |
---|
60 | ! MPI_WTIME function type from 8 to 16 bytes floating ! |
---|
61 | ! point precision. ! |
---|
62 | ! ! |
---|
63 | ! 6) No special switch is needed to compile this file. ! |
---|
64 | ! The following should be sufficient on many platforms: ! |
---|
65 | ! f90 -c -Ipath_to_MPI_header_file MPI_Time.f90 ! |
---|
66 | ! ! |
---|
67 | ! Output : At normal termination of the MPI program, process of ! |
---|
68 | ! rank 0 prints on stdout elapsed time, cpu time and ratio ! |
---|
69 | ! cpu/elapsed of all MPI_COMM_WORLD processes. ! |
---|
70 | ! The following is an output example from an execution ! |
---|
71 | ! with 4 processes: ! |
---|
72 | ! ! |
---|
73 | !............ ! |
---|
74 | ! MPI_Time (release 3.4) summary report: ! |
---|
75 | ! ! |
---|
76 | ! Process Rank | Elapsed Time (s) | CPU Time (s) | Ratio CPU/Elapsed ! |
---|
77 | ! -------------|------------------|--------------|------------------ ! |
---|
78 | ! 0 | 427.098 | 270.393 | 0.633 ! |
---|
79 | ! 1 | 427.099 | 279.818 | 0.655 ! |
---|
80 | ! 2 | 427.099 | 276.064 | 0.646 ! |
---|
81 | ! 3 | 427.182 | 271.001 | 0.634 ! |
---|
82 | ! -------------|------------------|--------------|------------------ ! |
---|
83 | ! Total | 1708.477 | 1097.275 | 2.569 ! |
---|
84 | ! -------------|------------------|--------------|------------------ ! |
---|
85 | ! Minimum | 427.098 | 270.393 | 0.633 ! |
---|
86 | ! -------------|------------------|--------------|------------------ ! |
---|
87 | ! Maximum | 427.182 | 279.818 | 0.655 ! |
---|
88 | ! -------------|------------------|--------------|------------------ ! |
---|
89 | ! Average | 427.119 | 274.319 | 0.642 ! |
---|
90 | ! -------------|------------------|--------------|------------------ ! |
---|
91 | ! ! |
---|
92 | ! MPI_Time started on 13/11/2002 at 16:54:59 MET +01:00 from GMT ! |
---|
93 | ! MPI_Time ended on 13/11/2002 at 17:02:06 MET +01:00 from GMT ! |
---|
94 | !............ ! |
---|
95 | ! ! |
---|
96 | ! ! |
---|
97 | !************************************************************************ |
---|
98 | |
---|
99 | MODULE MPI_TIMES |
---|
100 | IMPLICIT NONE |
---|
101 | PRIVATE |
---|
102 | |
---|
103 | !... Shared variables |
---|
104 | INTEGER, PARAMETER :: p = SELECTED_REAL_KIND(12) |
---|
105 | REAL(kind=p) :: Eoverhead, Coverhead |
---|
106 | REAL(kind=p), DIMENSION(2) :: Etime, Ctime |
---|
107 | INTEGER, DIMENSION(8) :: values |
---|
108 | CHARACTER(LEN=8), DIMENSION(2) :: date |
---|
109 | CHARACTER(LEN=10), DIMENSION(2) :: time |
---|
110 | CHARACTER(LEN=5) :: zone |
---|
111 | |
---|
112 | PUBLIC :: MPI_Time |
---|
113 | |
---|
114 | CONTAINS |
---|
115 | |
---|
116 | SUBROUTINE MPI_Time(flag) |
---|
117 | IMPLICIT NONE |
---|
118 | |
---|
119 | !... MPI Header files |
---|
120 | INCLUDE "mpif.h" |
---|
121 | |
---|
122 | !... Input dummy parameter |
---|
123 | INTEGER, INTENT(IN) :: flag |
---|
124 | |
---|
125 | !... Local variables |
---|
126 | INTEGER :: rank, nb_procs, i, code |
---|
127 | INTEGER, ALLOCATABLE, DIMENSION(:) :: All_Rank |
---|
128 | REAL(KIND=p), ALLOCATABLE, DIMENSION(:) :: All_Etime, All_Ctime, All_Ratio |
---|
129 | REAL(KIND=p) :: Total_Etime,Total_Ctime,Total_Ratio,& |
---|
130 | Max_Etime, Max_Ctime, Max_Ratio, & |
---|
131 | Min_Etime, Min_Ctime, Min_Ratio, & |
---|
132 | Avg_Etime, Avg_Ctime, Avg_Ratio, & |
---|
133 | dummy |
---|
134 | CHARACTER(LEN=128), dimension(8) :: lignes |
---|
135 | CHARACTER(LEN=128) :: hline, start_date, final_date |
---|
136 | CHARACTER(LEN=2048) :: fmt |
---|
137 | |
---|
138 | SELECT CASE(flag) |
---|
139 | CASE(0) |
---|
140 | |
---|
141 | !... Compute clock overhead |
---|
142 | Eoverhead = MPI_WTIME() |
---|
143 | Eoverhead = MPI_WTIME() - Eoverhead |
---|
144 | CALL CPU_TIME(dummy) |
---|
145 | CALL CPU_TIME(Coverhead) |
---|
146 | if (dummy < 0.0_p) & |
---|
147 | WRITE(0,*) "Warning, MPI_TIME: CPU user time is not available on this machine." |
---|
148 | Coverhead = Coverhead - dummy |
---|
149 | CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank, code) |
---|
150 | !... Start of timings on "date & time" |
---|
151 | IF ( rank == 0 ) & |
---|
152 | CALL DATE_AND_TIME(date(1),time(1),zone,values) |
---|
153 | !... Start elapsed and CPU time counters |
---|
154 | Etime(1) = MPI_WTIME() |
---|
155 | CALL CPU_TIME(Ctime(1)) |
---|
156 | |
---|
157 | CASE(1) |
---|
158 | !... Final CPU and elapsed times |
---|
159 | CALL CPU_TIME(Ctime(2)) |
---|
160 | Etime(2) = MPI_WTIME() - Etime(1) - Eoverhead - Coverhead |
---|
161 | Ctime(2) = Ctime(2) - Ctime(1) - Coverhead |
---|
162 | !... Gather all times |
---|
163 | CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank, code) |
---|
164 | CALL MPI_COMM_SIZE(MPI_COMM_WORLD, nb_procs, code) |
---|
165 | IF ( rank == 0) ALLOCATE(All_Etime(nb_procs), & |
---|
166 | All_Ctime(nb_procs), & |
---|
167 | All_Ratio(nb_procs), & |
---|
168 | All_Rank(nb_procs) ) |
---|
169 | CALL MPI_GATHER(Etime(2), 1, MPI_DOUBLE_PRECISION, & |
---|
170 | All_Etime, 1, MPI_DOUBLE_PRECISION, & |
---|
171 | 0, MPI_COMM_WORLD, code) |
---|
172 | CALL MPI_GATHER(Ctime(2), 1, MPI_DOUBLE_PRECISION, & |
---|
173 | All_Ctime, 1, MPI_DOUBLE_PRECISION, & |
---|
174 | 0, MPI_COMM_WORLD, code) |
---|
175 | IF ( rank == 0) THEN |
---|
176 | All_Rank(:) = (/ (i,i=0,nb_procs-1) /) |
---|
177 | |
---|
178 | !... Compute elapse user time |
---|
179 | Total_Etime = SUM(All_Etime(:)) |
---|
180 | Avg_Etime = Total_Etime/REAL(nb_procs,KIND=p) |
---|
181 | Max_Etime = MAXVAL(All_Etime(:)) |
---|
182 | Min_Etime = MINVAL(All_Etime(:)) |
---|
183 | IF( Min_Etime <= 0.0_p ) THEN |
---|
184 | WRITE(0,*) "Warning, MPI_TIME: Measured elapsed user time seems to be too short" |
---|
185 | WRITE(0,*) "compared to the clock precision. Timings could be erroneous." |
---|
186 | END IF |
---|
187 | |
---|
188 | !... Compute CPU user time |
---|
189 | Total_Ctime = SUM(All_Ctime(:)) |
---|
190 | Avg_Ctime = Total_Ctime/REAL(nb_procs,KIND=p) |
---|
191 | Max_Ctime = MAXVAL(All_Ctime(:)) |
---|
192 | Min_Ctime = MINVAL(All_Ctime(:)) |
---|
193 | IF( Min_Ctime <= 0.0_p ) THEN |
---|
194 | WRITE(0,*) "Warning, MPI_TIME: Measured CPU user time seems to be too short" |
---|
195 | WRITE(0,*) "compared to the clock precision. Timings could be erroneous." |
---|
196 | END IF |
---|
197 | |
---|
198 | !... Compute cpu/elapsed ratio |
---|
199 | All_Ratio(:) = All_Ctime(:) / All_Etime(:) |
---|
200 | Total_Ratio = SUM(All_Ratio(:)) |
---|
201 | Avg_Ratio = Total_Ratio/REAL(nb_procs,KIND=p) |
---|
202 | Max_Ratio = MAXVAL(All_Ratio(:)) |
---|
203 | Min_Ratio = MINVAL(All_Ratio(:)) |
---|
204 | |
---|
205 | !... End of timings on "date & time" |
---|
206 | CALL DATE_AND_TIME(date(2),time(2),zone,values) |
---|
207 | |
---|
208 | !... Output Format |
---|
209 | hline ='10X,13("-"),"|",18("-"),"|",14("-"),"|",18("-"),/,' |
---|
210 | lignes(1)='(//,10X,"(C) May 2006, LOCEAN - XLV.",/,' |
---|
211 | lignes(2)='10X,"MPI_Time (release 3.4) summary report:",//,' |
---|
212 | lignes(3)='10X,"Process Rank |"," Elapsed Time (s) |"," CPU Time (s) |"," Ratio CPU/Elapsed",/,' |
---|
213 | lignes(4)=' (10X,I4,9(" "),"|",F12.3,6(" "),"|",F12.3,2(" "),"|",4(" "),F7.3,/),' |
---|
214 | WRITE(lignes(4)(1:4),'(I4)') nb_procs |
---|
215 | lignes(5)='10X,"Total |",F12.3,6(" "),"|",F12.3,2(" "),"|",4(" "),F7.3,/,' |
---|
216 | lignes(6)='10X,"Minimum |",F12.3,6(" "),"|",F12.3,2(" "),"|",4(" "),F7.3,/,' |
---|
217 | lignes(7)='10X,"Maximum |",F12.3,6(" "),"|",F12.3,2(" "),"|",4(" "),F7.3,/,' |
---|
218 | lignes(8)='10X,"Average |",F12.3,6(" "),"|",F12.3,2(" "),"|",4(" "),F7.3,/,' |
---|
219 | start_date='/,10X,"MPI_Time started on ",2(A2,"/"),A4," at ",2(A2,":"),A2," MET ",A3,":",A2," from GMT",/,' |
---|
220 | final_date='10X, "MPI_Time ended on ",2(A2,"/"),A4," at ",2(A2,":"),A2," MET ",A3,":",A2," from GMT",//)' |
---|
221 | fmt=TRIM(lignes(1))//TRIM(lignes(2))//TRIM(lignes(3))// & |
---|
222 | & TRIM(hline)//TRIM(lignes(4))//TRIM(hline)//TRIM(lignes(5))// & |
---|
223 | & TRIM(hline)//TRIM(lignes(6))//TRIM(hline)//TRIM(lignes(7))// & |
---|
224 | & TRIM(hline)//TRIM(lignes(8))//TRIM(hline)//TRIM(start_date)// & |
---|
225 | & TRIM(final_date) |
---|
226 | WRITE(0, TRIM(fmt)) & |
---|
227 | (All_rank(i),All_Etime(i),All_Ctime(i),All_Ratio(i),i=1, nb_procs), & |
---|
228 | Total_Etime, Total_Ctime, Total_Ratio, & |
---|
229 | Min_Etime, Min_Ctime, Min_Ratio, & |
---|
230 | Max_Etime, Max_Ctime, Max_Ratio, & |
---|
231 | Avg_Etime, Avg_Ctime, Avg_Ratio, & |
---|
232 | date(1)(7:8), date(1)(5:6), date(1)(1:4), & |
---|
233 | time(1)(1:2), time(1)(3:4), time(1)(5:6), & |
---|
234 | zone(1:3), zone(4:5), & |
---|
235 | date(2)(7:8), date(2)(5:6), date(2)(1:4), & |
---|
236 | time(2)(1:2), time(2)(3:4), time(2)(5:6), & |
---|
237 | zone(1:3), zone(4:5) |
---|
238 | DEALLOCATE(All_Etime, All_Ctime, All_Ratio, All_rank) |
---|
239 | END IF |
---|
240 | |
---|
241 | CASE DEFAULT |
---|
242 | WRITE(0,*) "Error, MPI_TIME: Invalid input parameter" |
---|
243 | |
---|
244 | END SELECT |
---|
245 | END SUBROUTINE MPI_Time |
---|
246 | END MODULE MPI_TIMES |
---|