[11] | 1 | !************************************************************************ |
---|
| 2 | ! ! |
---|
| 3 | ! Module : MPI_Times (Version : 3.4) ! |
---|
| 4 | ! ! |
---|
| 5 | ! Goal : Measure and print on stdout CPU and Elapsed user times ! |
---|
| 6 | ! and the ratio CPU/Elapsed of MPI programs. ! |
---|
| 7 | ! ! |
---|
| 8 | ! Usage : Insert a "USE MPI_TIMES" instruction inside each MPI ! |
---|
| 9 | ! Fortran program unit to instrument, then make calls to ! |
---|
| 10 | ! the MPI_TIME subroutine as shown in the example below : ! |
---|
| 11 | ! ! |
---|
| 12 | ! PROGRAM foo ! |
---|
| 13 | ! USE MPI_TIMES ! |
---|
| 14 | ! ... ! |
---|
| 15 | ! CALL MPI_INIT(ierr) ! |
---|
| 16 | ! ! |
---|
| 17 | ! !... Set elapsed and CPU user times ! |
---|
| 18 | ! CALL MPI_TIME(0) ! |
---|
| 19 | ! ... ! |
---|
| 20 | ! ... Instruction block to instrument ... ! |
---|
| 21 | ! ... ! |
---|
| 22 | ! !... Measure and print elapsed and CPU user times ! |
---|
| 23 | ! CALL MPI_TIME(1) ! |
---|
| 24 | ! ! |
---|
| 25 | ! CALL MPI_FINALIZE(ierr) ! |
---|
| 26 | ! END PROGRAM foo ! |
---|
| 27 | ! ! |
---|
| 28 | ! Notes : 1) Standard Fortran 95 compiler has to be used to compile ! |
---|
| 29 | ! MPI_TIMES module. ! |
---|
| 30 | ! ! |
---|
| 31 | ! 2) MPI_TIME subroutine is collective over all processes ! |
---|
| 32 | ! of MPI_COMM_WORLD communicator. ! |
---|
| 33 | ! ! |
---|
| 34 | ! 3) On some machines, default CPU user time may also ! |
---|
| 35 | ! include MPI wait times on communication to complete. ! |
---|
| 36 | ! ! |
---|
| 37 | ! 4) If Te and Tc respectively denotes the elapsed and CPU ! |
---|
| 38 | ! user times, then the ratio R=Tc/Te > 0 may lead to ! |
---|
| 39 | ! different interpretations depending on R<1 or R=1 ! |
---|
| 40 | ! or R>1. ! |
---|
| 41 | ! ! |
---|
| 42 | ! a) If R<1, then wait time on communications and/or ! |
---|
| 43 | ! system load could be the reason of such performance. ! |
---|
| 44 | ! ! |
---|
| 45 | ! b) If R is close to 1 and no hybrid parallelization ! |
---|
| 46 | ! (e.g. MPI + OpenMP) is implemented, then wait time ! |
---|
| 47 | ! on communications and/or system load are far to be ! |
---|
| 48 | ! considered unless point 3) and then, one can assume ! |
---|
| 49 | ! that 99% of the time, processes are busy performing ! |
---|
| 50 | ! useful computations on dedicated processors. ! |
---|
| 51 | ! ! |
---|
| 52 | ! c) If R>1, then process might has been multi-threaded ! |
---|
| 53 | ! during execution as what would happen in hybrid ! |
---|
| 54 | ! parallelization (e.g. MPI + OpenMP) on cluster ! |
---|
| 55 | ! of SMP nodes. In such case, R may reflect the speed ! |
---|
| 56 | ! up of the process. ! |
---|
| 57 | ! ! |
---|
| 58 | ! 5) On IBM SP machine, do not compile MPI_TIMES module ! |
---|
| 59 | ! using "-qrealsize=8" switch. This will transform ! |
---|
| 60 | ! MPI_WTIME function type from 8 to 16 bytes floating ! |
---|
| 61 | ! point precision. ! |
---|
| 62 | ! ! |
---|
| 63 | ! 6) No special switch is needed to compile this file. ! |
---|
| 64 | ! The following should be sufficient on many platforms: ! |
---|
| 65 | ! f90 -c -Ipath_to_MPI_header_file MPI_Time.f90 ! |
---|
| 66 | ! ! |
---|
| 67 | ! Output : At normal termination of the MPI program, process of ! |
---|
| 68 | ! rank 0 prints on stdout elapsed time, cpu time and ratio ! |
---|
| 69 | ! cpu/elapsed of all MPI_COMM_WORLD processes. ! |
---|
| 70 | ! The following is an output example from an execution ! |
---|
| 71 | ! with 4 processes: ! |
---|
| 72 | ! ! |
---|
| 73 | !............ ! |
---|
| 74 | ! MPI_Time (release 3.4) summary report: ! |
---|
| 75 | ! ! |
---|
| 76 | ! Process Rank | Elapsed Time (s) | CPU Time (s) | Ratio CPU/Elapsed ! |
---|
| 77 | ! -------------|------------------|--------------|------------------ ! |
---|
| 78 | ! 0 | 427.098 | 270.393 | 0.633 ! |
---|
| 79 | ! 1 | 427.099 | 279.818 | 0.655 ! |
---|
| 80 | ! 2 | 427.099 | 276.064 | 0.646 ! |
---|
| 81 | ! 3 | 427.182 | 271.001 | 0.634 ! |
---|
| 82 | ! -------------|------------------|--------------|------------------ ! |
---|
| 83 | ! Total | 1708.477 | 1097.275 | 2.569 ! |
---|
| 84 | ! -------------|------------------|--------------|------------------ ! |
---|
| 85 | ! Minimum | 427.098 | 270.393 | 0.633 ! |
---|
| 86 | ! -------------|------------------|--------------|------------------ ! |
---|
| 87 | ! Maximum | 427.182 | 279.818 | 0.655 ! |
---|
| 88 | ! -------------|------------------|--------------|------------------ ! |
---|
| 89 | ! Average | 427.119 | 274.319 | 0.642 ! |
---|
| 90 | ! -------------|------------------|--------------|------------------ ! |
---|
| 91 | ! ! |
---|
| 92 | ! MPI_Time started on 13/11/2002 at 16:54:59 MET +01:00 from GMT ! |
---|
| 93 | ! MPI_Time ended on 13/11/2002 at 17:02:06 MET +01:00 from GMT ! |
---|
| 94 | !............ ! |
---|
| 95 | ! ! |
---|
| 96 | ! ! |
---|
| 97 | !************************************************************************ |
---|
| 98 | |
---|
| 99 | MODULE MPI_TIMES |
---|
| 100 | IMPLICIT NONE |
---|
| 101 | PRIVATE |
---|
| 102 | |
---|
| 103 | !... Shared variables |
---|
| 104 | INTEGER, PARAMETER :: p = SELECTED_REAL_KIND(12) |
---|
| 105 | REAL(kind=p) :: Eoverhead, Coverhead |
---|
| 106 | REAL(kind=p), DIMENSION(2) :: Etime, Ctime |
---|
| 107 | INTEGER, DIMENSION(8) :: values |
---|
| 108 | CHARACTER(LEN=8), DIMENSION(2) :: date |
---|
| 109 | CHARACTER(LEN=10), DIMENSION(2) :: time |
---|
| 110 | CHARACTER(LEN=5) :: zone |
---|
| 111 | |
---|
| 112 | PUBLIC :: MPI_Time |
---|
| 113 | |
---|
| 114 | CONTAINS |
---|
| 115 | |
---|
| 116 | SUBROUTINE MPI_Time(flag) |
---|
| 117 | IMPLICIT NONE |
---|
| 118 | |
---|
| 119 | !... MPI Header files |
---|
| 120 | INCLUDE "mpif.h" |
---|
| 121 | |
---|
| 122 | !... Input dummy parameter |
---|
| 123 | INTEGER, INTENT(IN) :: flag |
---|
| 124 | |
---|
| 125 | !... Local variables |
---|
| 126 | INTEGER :: rank, nb_procs, i, code |
---|
| 127 | INTEGER, ALLOCATABLE, DIMENSION(:) :: All_Rank |
---|
| 128 | REAL(KIND=p), ALLOCATABLE, DIMENSION(:) :: All_Etime, All_Ctime, All_Ratio |
---|
| 129 | REAL(KIND=p) :: Total_Etime,Total_Ctime,Total_Ratio,& |
---|
| 130 | Max_Etime, Max_Ctime, Max_Ratio, & |
---|
| 131 | Min_Etime, Min_Ctime, Min_Ratio, & |
---|
| 132 | Avg_Etime, Avg_Ctime, Avg_Ratio, & |
---|
| 133 | dummy |
---|
| 134 | CHARACTER(LEN=128), dimension(8) :: lignes |
---|
| 135 | CHARACTER(LEN=128) :: hline, start_date, final_date |
---|
| 136 | CHARACTER(LEN=2048) :: fmt |
---|
| 137 | |
---|
| 138 | SELECT CASE(flag) |
---|
| 139 | CASE(0) |
---|
| 140 | |
---|
| 141 | !... Compute clock overhead |
---|
| 142 | Eoverhead = MPI_WTIME() |
---|
| 143 | Eoverhead = MPI_WTIME() - Eoverhead |
---|
| 144 | CALL CPU_TIME(dummy) |
---|
| 145 | CALL CPU_TIME(Coverhead) |
---|
| 146 | if (dummy < 0.0_p) & |
---|
| 147 | WRITE(0,*) "Warning, MPI_TIME: CPU user time is not available on this machine." |
---|
| 148 | Coverhead = Coverhead - dummy |
---|
| 149 | CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank, code) |
---|
| 150 | !... Start of timings on "date & time" |
---|
| 151 | IF ( rank == 0 ) & |
---|
| 152 | CALL DATE_AND_TIME(date(1),time(1),zone,values) |
---|
| 153 | !... Start elapsed and CPU time counters |
---|
| 154 | Etime(1) = MPI_WTIME() |
---|
| 155 | CALL CPU_TIME(Ctime(1)) |
---|
| 156 | |
---|
| 157 | CASE(1) |
---|
| 158 | !... Final CPU and elapsed times |
---|
| 159 | CALL CPU_TIME(Ctime(2)) |
---|
| 160 | Etime(2) = MPI_WTIME() - Etime(1) - Eoverhead - Coverhead |
---|
| 161 | Ctime(2) = Ctime(2) - Ctime(1) - Coverhead |
---|
| 162 | !... Gather all times |
---|
| 163 | CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank, code) |
---|
| 164 | CALL MPI_COMM_SIZE(MPI_COMM_WORLD, nb_procs, code) |
---|
| 165 | IF ( rank == 0) ALLOCATE(All_Etime(nb_procs), & |
---|
| 166 | All_Ctime(nb_procs), & |
---|
| 167 | All_Ratio(nb_procs), & |
---|
| 168 | All_Rank(nb_procs) ) |
---|
| 169 | CALL MPI_GATHER(Etime(2), 1, MPI_DOUBLE_PRECISION, & |
---|
| 170 | All_Etime, 1, MPI_DOUBLE_PRECISION, & |
---|
| 171 | 0, MPI_COMM_WORLD, code) |
---|
| 172 | CALL MPI_GATHER(Ctime(2), 1, MPI_DOUBLE_PRECISION, & |
---|
| 173 | All_Ctime, 1, MPI_DOUBLE_PRECISION, & |
---|
| 174 | 0, MPI_COMM_WORLD, code) |
---|
| 175 | IF ( rank == 0) THEN |
---|
| 176 | All_Rank(:) = (/ (i,i=0,nb_procs-1) /) |
---|
| 177 | |
---|
| 178 | !... Compute elapse user time |
---|
| 179 | Total_Etime = SUM(All_Etime(:)) |
---|
| 180 | Avg_Etime = Total_Etime/REAL(nb_procs,KIND=p) |
---|
| 181 | Max_Etime = MAXVAL(All_Etime(:)) |
---|
| 182 | Min_Etime = MINVAL(All_Etime(:)) |
---|
| 183 | IF( Min_Etime <= 0.0_p ) THEN |
---|
| 184 | WRITE(0,*) "Warning, MPI_TIME: Measured elapsed user time seems to be too short" |
---|
| 185 | WRITE(0,*) "compared to the clock precision. Timings could be erroneous." |
---|
| 186 | END IF |
---|
| 187 | |
---|
| 188 | !... Compute CPU user time |
---|
| 189 | Total_Ctime = SUM(All_Ctime(:)) |
---|
| 190 | Avg_Ctime = Total_Ctime/REAL(nb_procs,KIND=p) |
---|
| 191 | Max_Ctime = MAXVAL(All_Ctime(:)) |
---|
| 192 | Min_Ctime = MINVAL(All_Ctime(:)) |
---|
| 193 | IF( Min_Ctime <= 0.0_p ) THEN |
---|
| 194 | WRITE(0,*) "Warning, MPI_TIME: Measured CPU user time seems to be too short" |
---|
| 195 | WRITE(0,*) "compared to the clock precision. Timings could be erroneous." |
---|
| 196 | END IF |
---|
| 197 | |
---|
| 198 | !... Compute cpu/elapsed ratio |
---|
| 199 | All_Ratio(:) = All_Ctime(:) / All_Etime(:) |
---|
| 200 | Total_Ratio = SUM(All_Ratio(:)) |
---|
| 201 | Avg_Ratio = Total_Ratio/REAL(nb_procs,KIND=p) |
---|
| 202 | Max_Ratio = MAXVAL(All_Ratio(:)) |
---|
| 203 | Min_Ratio = MINVAL(All_Ratio(:)) |
---|
| 204 | |
---|
| 205 | !... End of timings on "date & time" |
---|
| 206 | CALL DATE_AND_TIME(date(2),time(2),zone,values) |
---|
| 207 | |
---|
| 208 | !... Output Format |
---|
| 209 | hline ='10X,13("-"),"|",18("-"),"|",14("-"),"|",18("-"),/,' |
---|
| 210 | lignes(1)='(//,10X,"(C) May 2006, LOCEAN - XLV.",/,' |
---|
| 211 | lignes(2)='10X,"MPI_Time (release 3.4) summary report:",//,' |
---|
| 212 | lignes(3)='10X,"Process Rank |"," Elapsed Time (s) |"," CPU Time (s) |"," Ratio CPU/Elapsed",/,' |
---|
| 213 | lignes(4)=' (10X,I4,9(" "),"|",F12.3,6(" "),"|",F12.3,2(" "),"|",4(" "),F7.3,/),' |
---|
| 214 | WRITE(lignes(4)(1:4),'(I4)') nb_procs |
---|
| 215 | lignes(5)='10X,"Total |",F12.3,6(" "),"|",F12.3,2(" "),"|",4(" "),F7.3,/,' |
---|
| 216 | lignes(6)='10X,"Minimum |",F12.3,6(" "),"|",F12.3,2(" "),"|",4(" "),F7.3,/,' |
---|
| 217 | lignes(7)='10X,"Maximum |",F12.3,6(" "),"|",F12.3,2(" "),"|",4(" "),F7.3,/,' |
---|
| 218 | lignes(8)='10X,"Average |",F12.3,6(" "),"|",F12.3,2(" "),"|",4(" "),F7.3,/,' |
---|
| 219 | start_date='/,10X,"MPI_Time started on ",2(A2,"/"),A4," at ",2(A2,":"),A2," MET ",A3,":",A2," from GMT",/,' |
---|
| 220 | final_date='10X, "MPI_Time ended on ",2(A2,"/"),A4," at ",2(A2,":"),A2," MET ",A3,":",A2," from GMT",//)' |
---|
| 221 | fmt=TRIM(lignes(1))//TRIM(lignes(2))//TRIM(lignes(3))// & |
---|
| 222 | & TRIM(hline)//TRIM(lignes(4))//TRIM(hline)//TRIM(lignes(5))// & |
---|
| 223 | & TRIM(hline)//TRIM(lignes(6))//TRIM(hline)//TRIM(lignes(7))// & |
---|
| 224 | & TRIM(hline)//TRIM(lignes(8))//TRIM(hline)//TRIM(start_date)// & |
---|
| 225 | & TRIM(final_date) |
---|
| 226 | WRITE(0, TRIM(fmt)) & |
---|
| 227 | (All_rank(i),All_Etime(i),All_Ctime(i),All_Ratio(i),i=1, nb_procs), & |
---|
| 228 | Total_Etime, Total_Ctime, Total_Ratio, & |
---|
| 229 | Min_Etime, Min_Ctime, Min_Ratio, & |
---|
| 230 | Max_Etime, Max_Ctime, Max_Ratio, & |
---|
| 231 | Avg_Etime, Avg_Ctime, Avg_Ratio, & |
---|
| 232 | date(1)(7:8), date(1)(5:6), date(1)(1:4), & |
---|
| 233 | time(1)(1:2), time(1)(3:4), time(1)(5:6), & |
---|
| 234 | zone(1:3), zone(4:5), & |
---|
| 235 | date(2)(7:8), date(2)(5:6), date(2)(1:4), & |
---|
| 236 | time(2)(1:2), time(2)(3:4), time(2)(5:6), & |
---|
| 237 | zone(1:3), zone(4:5) |
---|
| 238 | DEALLOCATE(All_Etime, All_Ctime, All_Ratio, All_rank) |
---|
| 239 | END IF |
---|
| 240 | |
---|
| 241 | CASE DEFAULT |
---|
| 242 | WRITE(0,*) "Error, MPI_TIME: Invalid input parameter" |
---|
| 243 | |
---|
| 244 | END SELECT |
---|
| 245 | END SUBROUTINE MPI_Time |
---|
| 246 | END MODULE MPI_TIMES |
---|