source: codes/icosagcm/trunk/src/base/profiling.F90

Last change on this file was 962, checked in by adurocher, 5 years ago

Merge 'profiling' to trunk

File size: 7.3 KB
Line 
1! Conditional compilation : use scorep user task api
2#ifdef CPP_SCOREP 
3
4#include "scorep/SCOREP_User.inc"
5#define SCOREP_DEFINE_HANDLE_ARRAY( handle ) SCOREP_USER_REGION_HANDLE, allocatable :: handle(:)
6#define SCOREP_ALLOCATE_HANDLE_ARRAY( handle, size ) allocate( scorep_handle(size) )
7
8#else
9
10#define SCOREP_DEFINE_HANDLE_ARRAY(handle)
11#define SCOREP_ALLOCATE_HANDLE_ARRAY(handle,size)
12#define SCOREP_USER_REGION_INIT(handle,name,region_type)
13#define SCOREP_USER_REGION_ENTER(handle)
14#define SCOREP_USER_REGION_END(handle)
15
16#endif
17
18MODULE profiling_mod
19  use omp_para, only : omp_size, omp_rank
20  IMPLICIT NONE
21  SAVE
22  PRIVATE
23 
24  ! Shared variables
25  LOGICAL, PARAMETER :: check_accuracy = .true.
26  INTEGER, PARAMETER :: max_id=20, max_depth=10
27  INTEGER :: nb_id ! Number of timers
28  CHARACTER(15), DIMENSION(max_id) :: name ! Timer names
29  INTEGER :: id_profile
30 
31  ! Threadlocal variables
32  INTEGER, ALLOCATABLE :: depth(:)!(omp_size) ! Number of current nested timers
33  INTEGER, ALLOCATABLE :: current_id(:,:)!(omp_size, max_depth) ! Current nested timer ids
34  REAL, ALLOCATABLE :: chrono(:,:)!(omp_size, max_depth) ! Last timer start time
35  REAL, ALLOCATABLE :: elapsed(:,:)!(omp_size, max_id) ! Cumulative time for each timer
36  SCOREP_DEFINE_HANDLE_ARRAY(scorep_handle)
37 
38  PUBLIC :: init_profiling, reset_profiling, register_id, enter_profile, exit_profile, print_profile
39
40CONTAINS
41
42  SUBROUTINE init_profiling
43    use omp_para, only : get_omp_size
44    integer :: omp_size
45    ! omp_size from module omp_para cannot be used here since it is not initialized yet
46    ! TODO : conditional compilation to compile without openmp
47    omp_size = get_omp_size()
48    !$omp master
49    nb_id=0
50    allocate(depth(0:omp_size-1))
51    allocate(current_id(0:omp_size-1, max_depth))
52    allocate(chrono(0:omp_size-1, max_depth))
53    allocate(elapsed(0:omp_size-1, max_id))
54    depth(:)=0   
55    call reset_profiling()
56    !$omp end master
57    SCOREP_ALLOCATE_HANDLE_ARRAY(scorep_handle, max_id)
58    !$omp barrier
59    call register_id("print_profile", id_profile)
60  END SUBROUTINE init_profiling
61 
62  SUBROUTINE reset_profiling
63    use abort_mod
64    !$omp barrier
65    !$omp master
66    if( any( depth /= 0 ) ) call dynamico_abort("Impossible to reset profiling : a profiling region is still open")   
67    elapsed(:,:)=0   
68    !$omp end master
69    !$omp barrier
70  END SUBROUTINE
71
72  SUBROUTINE register_id(thename, id)
73    use abort_mod
74    CHARACTER(*), INTENT(IN) :: thename
75    INTEGER, INTENT(OUT) :: id
76    !$OMP MASTER
77    nb_id = nb_id+1
78    if(nb_id > max_id) call dynamico_abort("Too many timers")
79    id = nb_id
80    name(id)=thename
81    !$OMP END MASTER
82    SCOREP_USER_REGION_INIT( scorep_handle(id), thename, SCOREP_USER_REGION_TYPE_COMMON )
83    !$omp barrier
84  END SUBROUTINE register_id
85
86  FUNCTION get_elapsed(start)
87    INTEGER(kind=8) :: count, count_rate
88    REAL :: start,get_elapsed
89    CALL SYSTEM_CLOCK(count,count_rate)
90    if(check_accuracy .and. count < 10) print *, "Warning : Profiling elapsed time too short to be accurately measured"
91    get_elapsed = (1.*count)/(1.*count_rate) - start
92    IF(get_elapsed<0.) get_elapsed=0.
93  END FUNCTION get_elapsed
94
95  SUBROUTINE enter_profile(id)
96    INTEGER, INTENT(IN) :: id
97    depth(omp_rank) = depth(omp_rank)+1
98    chrono(omp_rank, depth(omp_rank)) = get_elapsed(0.)
99    current_id(omp_rank,depth(omp_rank)) = id
100    SCOREP_USER_REGION_ENTER(scorep_handle(id))
101  END SUBROUTINE enter_profile
102   
103  SUBROUTINE exit_profile(id)
104    use abort_mod
105    INTEGER, INTENT(IN) :: id
106    INTEGER :: parent_id
107    REAL :: my_elapsed
108   
109    IF(depth(omp_rank)<=0) call dynamico_abort("exit_profile called without a matching enter_profile (depth=0)")
110    IF(id /= current_id(omp_rank, depth(omp_rank))) call dynamico_abort("wrong timer id : exit_profile id doesn't match enter_profile")
111   
112    SCOREP_USER_REGION_END(scorep_handle(id))
113   
114    my_elapsed = get_elapsed(chrono(omp_rank, depth(omp_rank)))
115    ! add elapsed to current profile
116    elapsed(omp_rank,id) = elapsed(omp_rank,id) + my_elapsed
117    depth(omp_rank) = depth(omp_rank)-1
118    ! and substract from parent profile
119    IF(depth(omp_rank)>0) THEN
120       parent_id = current_id(omp_rank,depth(omp_rank))
121       elapsed(omp_rank,parent_id) = elapsed(omp_rank,parent_id) - my_elapsed
122    END IF
123  END SUBROUTINE exit_profile
124
125  SUBROUTINE print_profile
126    use mpi_mod
127    use mpipara
128    use omp_para
129    INTEGER :: i
130    REAL :: mean_total, min_total, max_total, mean_total_local, min_total_local, max_total_local 
131    REAL :: omp_mean_time_local, omp_min_time_local, omp_max_time_local
132    REAL :: mean_time, max_process, min_process, percent
133
134    call enter_profile(id_profile)
135
136    !$OMP MASTER
137    ! mean_total : mean of sum of all timers on all threads and all procs
138    mean_total_local = SUM(elapsed(:,1:nb_id))/omp_size/mpi_size
139    call MPI_Reduce(mean_total_local, mean_total, 1, MPI_DOUBLE, MPI_SUM, 0, comm_icosa, ierr)
140    ! min_total : min on all procs of sum of all timers on all threads
141    min_total_local = MINVAL( SUM(elapsed(:,1:nb_id), 2) )
142    call MPI_Reduce(min_total_local, min_total, 1, MPI_DOUBLE, MPI_MIN, 0, comm_icosa, ierr)
143    ! max_total : max on all procs of sum of all timers on all threads
144    max_total_local = MAXVAL( SUM(elapsed(:,1:nb_id), 2) )
145    call MPI_Reduce(max_total_local, max_total, 1, MPI_DOUBLE, MPI_MAX, 0, comm_icosa, ierr)
146
147    if(is_master) PRINT *, '---------------------- Profiling -----------------------'
148    if(is_master) PRINT ('(A15, F12.3, A, F12.3, A, F12.3, A)'), 'Total (s) : ', mean_total, "  [", min_total, ",", max_total, "]"
149    if(is_master) PRINT ('(A15, A7, A47, A47)'), " ","  %  ","  Process mean(s)[ min, max, diff/mean(%) ]  ", "Process 1 threads(s)[ min, max, diff/mean(%) ]"
150    DO i=1,nb_id
151       omp_mean_time_local = SUM(elapsed(:,i))/omp_size ! Mean time of timer on local threads
152       omp_min_time_local = MINVAL(elapsed(:,i)) ! Max time of timer on local threads
153       omp_max_time_local = MAXVAL(elapsed(:,i)) ! Min time of timer on local threads
154       !omp_inbalance_local = 100*(max_time-min_time)/min_time
155       
156       ! mean_time : mean of timer on all threads and all procs
157       call MPI_Reduce(omp_mean_time_local, mean_time, 1, MPI_DOUBLE, MPI_SUM, 0, comm_icosa, ierr); mean_time = mean_time/mpi_size
158       ! max_process_time : max on procs of mean omp time
159       call MPI_Reduce(omp_mean_time_local, max_process, 1, MPI_DOUBLE, MPI_MAX, 0, comm_icosa, ierr)
160       ! min_process_time : min on procs of mean omp time
161       call MPI_Reduce(omp_mean_time_local, min_process, 1, MPI_DOUBLE, MPI_MIN, 0, comm_icosa, ierr)
162       
163       percent = 100*mean_time/mean_total
164
165       if(is_master) PRINT ('(A15, F5.1, F12.3, A, F12.3, A, F12.3, A, F6.1, A, F12.3, A, F12.3, A, F12.3, A, F6.1, A)'), name(i), percent, mean_time, " [", min_process, ",", max_process, "," , inbalance(mean_time, min_process, max_process),  "]", omp_mean_time_local, " [", omp_min_time_local, ",", omp_max_time_local, "," , inbalance(omp_mean_time_local, omp_min_time_local, omp_max_time_local) , "]"
166    END DO
167    if(is_master) PRINT *, '---------------------- Profiling -----------------------'
168    !$OMP END MASTER
169    call exit_profile(id_profile)
170  contains
171    function inbalance( mean, min, max )
172      real :: mean, min, max
173      real :: inbalance
174      inbalance = 100*(max-min)/(mean+1E-10)
175    end function
176  END SUBROUTINE print_profile
177   
178END MODULE profiling_mod
Note: See TracBrowser for help on using the repository browser.