1 | ! Conditional compilation : use scorep user task api |
---|
2 | #ifdef CPP_SCOREP |
---|
3 | |
---|
4 | #include "scorep/SCOREP_User.inc" |
---|
5 | #define SCOREP_DEFINE_HANDLE_ARRAY( handle ) SCOREP_USER_REGION_HANDLE, allocatable :: handle(:) |
---|
6 | #define SCOREP_ALLOCATE_HANDLE_ARRAY( handle, size ) allocate( scorep_handle(size) ) |
---|
7 | |
---|
8 | #else |
---|
9 | |
---|
10 | #define SCOREP_DEFINE_HANDLE_ARRAY(handle) |
---|
11 | #define SCOREP_ALLOCATE_HANDLE_ARRAY(handle,size) |
---|
12 | #define SCOREP_USER_REGION_INIT(handle,name,region_type) |
---|
13 | #define SCOREP_USER_REGION_ENTER(handle) |
---|
14 | #define SCOREP_USER_REGION_END(handle) |
---|
15 | |
---|
16 | #endif |
---|
17 | |
---|
18 | MODULE profiling_mod |
---|
19 | use omp_para, only : omp_size, omp_rank |
---|
20 | IMPLICIT NONE |
---|
21 | SAVE |
---|
22 | PRIVATE |
---|
23 | |
---|
24 | ! Shared variables |
---|
25 | LOGICAL, PARAMETER :: check_accuracy = .true. |
---|
26 | INTEGER, PARAMETER :: max_id=20, max_depth=10 |
---|
27 | INTEGER :: nb_id ! Number of timers |
---|
28 | CHARACTER(15), DIMENSION(max_id) :: name ! Timer names |
---|
29 | INTEGER :: id_profile |
---|
30 | |
---|
31 | ! Threadlocal variables |
---|
32 | INTEGER, ALLOCATABLE :: depth(:)!(omp_size) ! Number of current nested timers |
---|
33 | INTEGER, ALLOCATABLE :: current_id(:,:)!(omp_size, max_depth) ! Current nested timer ids |
---|
34 | REAL, ALLOCATABLE :: chrono(:,:)!(omp_size, max_depth) ! Last timer start time |
---|
35 | REAL, ALLOCATABLE :: elapsed(:,:)!(omp_size, max_id) ! Cumulative time for each timer |
---|
36 | SCOREP_DEFINE_HANDLE_ARRAY(scorep_handle) |
---|
37 | |
---|
38 | PUBLIC :: init_profiling, reset_profiling, register_id, enter_profile, exit_profile, print_profile |
---|
39 | |
---|
40 | CONTAINS |
---|
41 | |
---|
42 | SUBROUTINE init_profiling |
---|
43 | use omp_para, only : get_omp_size |
---|
44 | integer :: omp_size |
---|
45 | ! omp_size from module omp_para cannot be used here since it is not initialized yet |
---|
46 | ! TODO : conditional compilation to compile without openmp |
---|
47 | omp_size = get_omp_size() |
---|
48 | !$omp master |
---|
49 | nb_id=0 |
---|
50 | allocate(depth(0:omp_size-1)) |
---|
51 | allocate(current_id(0:omp_size-1, max_depth)) |
---|
52 | allocate(chrono(0:omp_size-1, max_depth)) |
---|
53 | allocate(elapsed(0:omp_size-1, max_id)) |
---|
54 | depth(:)=0 |
---|
55 | call reset_profiling() |
---|
56 | !$omp end master |
---|
57 | SCOREP_ALLOCATE_HANDLE_ARRAY(scorep_handle, max_id) |
---|
58 | !$omp barrier |
---|
59 | call register_id("print_profile", id_profile) |
---|
60 | END SUBROUTINE init_profiling |
---|
61 | |
---|
62 | SUBROUTINE reset_profiling |
---|
63 | use abort_mod |
---|
64 | !$omp barrier |
---|
65 | !$omp master |
---|
66 | if( any( depth /= 0 ) ) call dynamico_abort("Impossible to reset profiling : a profiling region is still open") |
---|
67 | elapsed(:,:)=0 |
---|
68 | !$omp end master |
---|
69 | !$omp barrier |
---|
70 | END SUBROUTINE |
---|
71 | |
---|
72 | SUBROUTINE register_id(thename, id) |
---|
73 | use abort_mod |
---|
74 | CHARACTER(*), INTENT(IN) :: thename |
---|
75 | INTEGER, INTENT(OUT) :: id |
---|
76 | !$OMP MASTER |
---|
77 | nb_id = nb_id+1 |
---|
78 | if(nb_id > max_id) call dynamico_abort("Too many timers") |
---|
79 | id = nb_id |
---|
80 | name(id)=thename |
---|
81 | !$OMP END MASTER |
---|
82 | SCOREP_USER_REGION_INIT( scorep_handle(id), thename, SCOREP_USER_REGION_TYPE_COMMON ) |
---|
83 | !$omp barrier |
---|
84 | END SUBROUTINE register_id |
---|
85 | |
---|
86 | FUNCTION get_elapsed(start) |
---|
87 | INTEGER(kind=8) :: count, count_rate |
---|
88 | REAL :: start,get_elapsed |
---|
89 | CALL SYSTEM_CLOCK(count,count_rate) |
---|
90 | if(check_accuracy .and. count < 10) print *, "Warning : Profiling elapsed time too short to be accurately measured" |
---|
91 | get_elapsed = (1.*count)/(1.*count_rate) - start |
---|
92 | IF(get_elapsed<0.) get_elapsed=0. |
---|
93 | END FUNCTION get_elapsed |
---|
94 | |
---|
95 | SUBROUTINE enter_profile(id) |
---|
96 | INTEGER, INTENT(IN) :: id |
---|
97 | depth(omp_rank) = depth(omp_rank)+1 |
---|
98 | chrono(omp_rank, depth(omp_rank)) = get_elapsed(0.) |
---|
99 | current_id(omp_rank,depth(omp_rank)) = id |
---|
100 | SCOREP_USER_REGION_ENTER(scorep_handle(id)) |
---|
101 | END SUBROUTINE enter_profile |
---|
102 | |
---|
103 | SUBROUTINE exit_profile(id) |
---|
104 | use abort_mod |
---|
105 | INTEGER, INTENT(IN) :: id |
---|
106 | INTEGER :: parent_id |
---|
107 | REAL :: my_elapsed |
---|
108 | |
---|
109 | IF(depth(omp_rank)<=0) call dynamico_abort("exit_profile called without a matching enter_profile (depth=0)") |
---|
110 | IF(id /= current_id(omp_rank, depth(omp_rank))) call dynamico_abort("wrong timer id : exit_profile id doesn't match enter_profile") |
---|
111 | |
---|
112 | SCOREP_USER_REGION_END(scorep_handle(id)) |
---|
113 | |
---|
114 | my_elapsed = get_elapsed(chrono(omp_rank, depth(omp_rank))) |
---|
115 | ! add elapsed to current profile |
---|
116 | elapsed(omp_rank,id) = elapsed(omp_rank,id) + my_elapsed |
---|
117 | depth(omp_rank) = depth(omp_rank)-1 |
---|
118 | ! and substract from parent profile |
---|
119 | IF(depth(omp_rank)>0) THEN |
---|
120 | parent_id = current_id(omp_rank,depth(omp_rank)) |
---|
121 | elapsed(omp_rank,parent_id) = elapsed(omp_rank,parent_id) - my_elapsed |
---|
122 | END IF |
---|
123 | END SUBROUTINE exit_profile |
---|
124 | |
---|
125 | SUBROUTINE print_profile |
---|
126 | use mpi_mod |
---|
127 | use mpipara |
---|
128 | use omp_para |
---|
129 | INTEGER :: i |
---|
130 | REAL :: mean_total, min_total, max_total, mean_total_local, min_total_local, max_total_local |
---|
131 | REAL :: omp_mean_time_local, omp_min_time_local, omp_max_time_local |
---|
132 | REAL :: mean_time, max_process, min_process, percent |
---|
133 | |
---|
134 | call enter_profile(id_profile) |
---|
135 | |
---|
136 | !$OMP MASTER |
---|
137 | ! mean_total : mean of sum of all timers on all threads and all procs |
---|
138 | mean_total_local = SUM(elapsed(:,1:nb_id))/omp_size/mpi_size |
---|
139 | call MPI_Reduce(mean_total_local, mean_total, 1, MPI_DOUBLE, MPI_SUM, 0, comm_icosa, ierr) |
---|
140 | ! min_total : min on all procs of sum of all timers on all threads |
---|
141 | min_total_local = MINVAL( SUM(elapsed(:,1:nb_id), 2) ) |
---|
142 | call MPI_Reduce(min_total_local, min_total, 1, MPI_DOUBLE, MPI_MIN, 0, comm_icosa, ierr) |
---|
143 | ! max_total : max on all procs of sum of all timers on all threads |
---|
144 | max_total_local = MAXVAL( SUM(elapsed(:,1:nb_id), 2) ) |
---|
145 | call MPI_Reduce(max_total_local, max_total, 1, MPI_DOUBLE, MPI_MAX, 0, comm_icosa, ierr) |
---|
146 | |
---|
147 | if(is_master) PRINT *, '---------------------- Profiling -----------------------' |
---|
148 | if(is_master) PRINT ('(A15, F12.3, A, F12.3, A, F12.3, A)'), 'Total (s) : ', mean_total, " [", min_total, ",", max_total, "]" |
---|
149 | if(is_master) PRINT ('(A15, A7, A47, A47)'), " "," % "," Process mean(s)[ min, max, diff/mean(%) ] ", "Process 1 threads(s)[ min, max, diff/mean(%) ]" |
---|
150 | DO i=1,nb_id |
---|
151 | omp_mean_time_local = SUM(elapsed(:,i))/omp_size ! Mean time of timer on local threads |
---|
152 | omp_min_time_local = MINVAL(elapsed(:,i)) ! Max time of timer on local threads |
---|
153 | omp_max_time_local = MAXVAL(elapsed(:,i)) ! Min time of timer on local threads |
---|
154 | !omp_inbalance_local = 100*(max_time-min_time)/min_time |
---|
155 | |
---|
156 | ! mean_time : mean of timer on all threads and all procs |
---|
157 | call MPI_Reduce(omp_mean_time_local, mean_time, 1, MPI_DOUBLE, MPI_SUM, 0, comm_icosa, ierr); mean_time = mean_time/mpi_size |
---|
158 | ! max_process_time : max on procs of mean omp time |
---|
159 | call MPI_Reduce(omp_mean_time_local, max_process, 1, MPI_DOUBLE, MPI_MAX, 0, comm_icosa, ierr) |
---|
160 | ! min_process_time : min on procs of mean omp time |
---|
161 | call MPI_Reduce(omp_mean_time_local, min_process, 1, MPI_DOUBLE, MPI_MIN, 0, comm_icosa, ierr) |
---|
162 | |
---|
163 | percent = 100*mean_time/mean_total |
---|
164 | |
---|
165 | if(is_master) PRINT ('(A15, F5.1, F12.3, A, F12.3, A, F12.3, A, F6.1, A, F12.3, A, F12.3, A, F12.3, A, F6.1, A)'), name(i), percent, mean_time, " [", min_process, ",", max_process, "," , inbalance(mean_time, min_process, max_process), "]", omp_mean_time_local, " [", omp_min_time_local, ",", omp_max_time_local, "," , inbalance(omp_mean_time_local, omp_min_time_local, omp_max_time_local) , "]" |
---|
166 | END DO |
---|
167 | if(is_master) PRINT *, '---------------------- Profiling -----------------------' |
---|
168 | !$OMP END MASTER |
---|
169 | call exit_profile(id_profile) |
---|
170 | contains |
---|
171 | function inbalance( mean, min, max ) |
---|
172 | real :: mean, min, max |
---|
173 | real :: inbalance |
---|
174 | inbalance = 100*(max-min)/(mean+1E-10) |
---|
175 | end function |
---|
176 | END SUBROUTINE print_profile |
---|
177 | |
---|
178 | END MODULE profiling_mod |
---|