Changeset 1003


Ignore:
Timestamp:
01/10/20 12:20:52 (4 years ago)
Author:
adurocher
Message:

transfert_mpi : Fuse message/points loops in buffer copies.

Every points to copy - regardless of the tile - are stored in a unique array to avoid having multiple submessages with a variable number of points to copy.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • codes/icosagcm/trunk/src/parallel/transfert_mpi.f90

    r1002 r1003  
    2828  end type 
    2929   
     30  type t_compact_submessages 
     31    integer :: npoints 
     32    integer, allocatable, dimension(:) :: field_ind, field_displ, sign, remote_rank, mpi_displ, level_offset 
     33  end type 
     34   
     35  type t_compact_local_submessages 
     36    integer :: npoints 
     37    integer, allocatable, dimension(:) :: field_ind_src, field_displ_src, sign, field_ind_dest, field_displ_dest 
     38  end type 
     39   
    3040  type mpi_buffer_t 
    3141    integer :: n 
     
    4252    type (mpi_buffer_t), pointer :: mpi_buffer_in(:) 
    4353    type (mpi_buffer_t), pointer :: mpi_buffer_out(:) 
     54    type (t_compact_submessages), pointer :: message_in_compact 
     55    type (t_compact_submessages), pointer :: message_out_compact 
     56    type (t_compact_local_submessages), pointer  :: message_local_compact     
    4457    integer, pointer :: mpi_requests_in(:) ! MPI requests used for message_in. 
    4558    integer, pointer :: mpi_requests_out(:) ! MPI requests used for message_out. 
     
    106119    type(t_submessage) :: submessage_in, submessage_out 
    107120    type(t_local_submessage) :: submessage_local 
    108     integer :: dim3, dim4 
    109     integer :: ind, ind_loc, remote_ind_glo, loc_ind_glo, i, remote_rank 
     121    integer :: dim3, dim4, npoints, last_point 
     122    integer :: ind, ind_loc, remote_ind_glo, loc_ind_glo, i, k, remote_rank 
    110123    integer :: message_in_size, message_out_size, message_local_size, buffer_in_size, buffer_out_size 
    111124    type(t_local_submessage), allocatable :: message_local_tmp(:) 
     
    229242                          100, comm_icosa, message%mpi_requests_in(i), ierr ) 
    230243    end do 
     244     
     245    allocate(message%message_in_compact)     
     246    message%message_in_compact%npoints = sum(message%message_in(:)%npoints) 
     247    npoints = message%message_in_compact%npoints 
     248    allocate(message%message_in_compact%field_ind(npoints)) 
     249    allocate(message%message_in_compact%field_displ(npoints)) 
     250    allocate(message%message_in_compact%sign(npoints)) 
     251    allocate(message%message_in_compact%remote_rank(npoints)) 
     252    allocate(message%message_in_compact%mpi_displ(npoints)) 
     253    allocate(message%message_in_compact%level_offset(npoints)) 
     254 
     255    last_point=0 
     256    do i = 1, size( message%message_in ) 
     257      do k = 1, message%message_in(i)%npoints 
     258        last_point = last_point+1 
     259        message%message_in_compact%field_ind(last_point)   = message%message_in(i)%ind_loc 
     260        message%message_in_compact%field_displ(last_point) = message%message_in(i)%displs(k) 
     261        message%message_in_compact%sign(last_point)        = message%message_in(i)%sign(k) 
     262        message%message_in_compact%remote_rank(last_point) = message%message_in(i)%remote_rank 
     263        message%message_in_compact%mpi_displ(last_point)   = message%message_in(i)%mpi_buffer_displ + k 
     264        message%message_in_compact%level_offset(last_point)= message%message_in(i)%npoints 
     265      end do 
     266    end do 
     267     
     268    allocate(message%message_out_compact)  
     269    message%message_out_compact%npoints = sum(message%message_out(:)%npoints) 
     270    npoints = message%message_out_compact%npoints 
     271    allocate(message%message_out_compact%field_ind(npoints)) 
     272    allocate(message%message_out_compact%field_displ(npoints)) 
     273    allocate(message%message_out_compact%sign(npoints)) 
     274    allocate(message%message_out_compact%remote_rank(npoints)) 
     275    allocate(message%message_out_compact%mpi_displ(npoints)) 
     276    allocate(message%message_out_compact%level_offset(npoints)) 
     277 
     278    last_point=0 
     279    do i = 1, size( message%message_out ) 
     280      do k = 1, message%message_out(i)%npoints 
     281        last_point = last_point+1 
     282        message%message_out_compact%field_ind(last_point)   = message%message_out(i)%ind_loc 
     283        message%message_out_compact%field_displ(last_point) = message%message_out(i)%displs(k) 
     284        message%message_out_compact%sign(last_point)        = message%message_out(i)%sign(k) 
     285        message%message_out_compact%remote_rank(last_point) = message%message_out(i)%remote_rank 
     286        message%message_out_compact%mpi_displ(last_point)   = message%message_out(i)%mpi_buffer_displ + k 
     287        message%message_out_compact%level_offset(last_point)= message%message_out(i)%npoints 
     288      end do 
     289    end do  
     290     
     291    allocate(message%message_local_compact)  
     292    message%message_local_compact%npoints = sum(message%message_local(:)%npoints) 
     293    npoints = message%message_local_compact%npoints 
     294    allocate(message%message_local_compact%field_ind_src(npoints)) 
     295    allocate(message%message_local_compact%field_displ_src(npoints)) 
     296    allocate(message%message_local_compact%sign(npoints)) 
     297    allocate(message%message_local_compact%field_ind_dest(npoints)) 
     298    allocate(message%message_local_compact%field_displ_dest(npoints)) 
     299 
     300    last_point=0 
     301    do i = 1, size( message%message_local ) 
     302      do k = 1, message%message_local(i)%npoints 
     303        last_point = last_point+1 
     304        message%message_local_compact%field_ind_src(last_point)   = message%message_local(i)%src_ind_loc 
     305        message%message_local_compact%field_displ_src(last_point) = message%message_local(i)%displ_src(k) 
     306        message%message_local_compact%sign(last_point)            = message%message_local(i)%sign(k) 
     307        message%message_local_compact%field_ind_dest(last_point)  = message%message_local(i)%dest_ind_loc 
     308        message%message_local_compact%field_displ_dest(last_point)= message%message_local(i)%displ_dest(k) 
     309      end do 
     310    end do   
     311     
    231312    !$omp end master 
    232313    !$omp barrier 
     
    310391      !$acc enter data copyin(message%mpi_buffer_out(i)%buff(:)) async 
    311392    end do 
    312     !$acc enter data copyin(message%message_in(:)) async 
    313     do i = 1, size( message%message_in ) 
    314       !$acc enter data copyin(message%message_in(i)%displs(:)) async 
    315       !$acc enter data copyin(message%message_in(i)%sign(:)) async 
    316     end do 
    317     !$acc enter data copyin(message%message_out(:)) async 
    318     do i = 1, size( message%message_out ) 
    319       !$acc enter data copyin(message%message_out(i)%displs(:)) async 
     393    !!$acc enter data copyin(message%message_in(:)) async 
     394    !do i = 1, size( message%message_in ) 
     395    !  !$acc enter data copyin(message%message_in(i)%displs(:)) async 
     396    !  !$acc enter data copyin(message%message_in(i)%sign(:)) async 
     397    !end do 
     398    !!$acc enter data copyin(message%message_out(:)) async 
     399    !do i = 1, size( message%message_out ) 
     400    !  !$acc enter data copyin(message%message_out(i)%displs(:)) async 
    320401      !!$acc enter data copyin(message%message_out(i)%sign(:)) async 
    321     end do 
    322     !$acc enter data copyin(message%message_local(:)) async 
    323     do i = 1, size( message%message_local ) 
    324       !$acc enter data copyin(message%message_local(i)%displ_src(:)) async 
    325       !$acc enter data copyin(message%message_local(i)%displ_dest(:)) async 
    326       !$acc enter data copyin(message%message_local(i)%sign(:)) async 
    327     end do 
     402    !end do 
     403    !!$acc enter data copyin(message%message_local(:)) async 
     404    !do i = 1, size( message%message_local ) 
     405    !  !$acc enter data copyin(message%message_local(i)%displ_src(:)) async 
     406    !  !$acc enter data copyin(message%message_local(i)%displ_dest(:)) async 
     407    !  !$acc enter data copyin(message%message_local(i)%sign(:)) async 
     408    !end do 
    328409    !$acc enter data copyin(message%field(:)) async 
    329410    do i = 1, ndomain 
    330411      !$acc enter data copyin(message%field(i)%rval4d(:,:,:)) async 
    331412    end do 
     413     
     414    !$acc enter data copyin(message%message_in_compact) async 
     415    !$acc enter data copyin(message%message_in_compact%field_ind(:)) async 
     416    !$acc enter data copyin(message%message_in_compact%field_displ(:)) async 
     417    !$acc enter data copyin(message%message_in_compact%sign(:)) async 
     418    !$acc enter data copyin(message%message_in_compact%remote_rank(:)) async 
     419    !$acc enter data copyin(message%message_in_compact%mpi_displ(:)) async 
     420    !$acc enter data copyin(message%message_in_compact%level_offset(:)) async 
     421     
     422    !$acc enter data copyin(message%message_out_compact) async 
     423    !$acc enter data copyin(message%message_out_compact%field_ind(:)) async 
     424    !$acc enter data copyin(message%message_out_compact%field_displ(:)) async 
     425    !$acc enter data copyin(message%message_out_compact%sign(:)) async 
     426    !$acc enter data copyin(message%message_out_compact%remote_rank(:)) async 
     427    !$acc enter data copyin(message%message_out_compact%mpi_displ(:)) async 
     428    !$acc enter data copyin(message%message_out_compact%level_offset(:)) async  
     429     
     430    !$acc enter data copyin(message%message_local_compact) async 
     431    !$acc enter data copyin(message%message_local_compact%field_ind_src(:)) async 
     432    !$acc enter data copyin(message%message_local_compact%field_displ_src(:)) async 
     433    !$acc enter data copyin(message%message_local_compact%sign(:)) async 
     434    !$acc enter data copyin(message%message_local_compact%field_ind_dest(:)) async 
     435    !$acc enter data copyin(message%message_local_compact%field_displ_dest(:)) async 
    332436 
    333437    !$acc wait 
     
    357461    if( .not. message%ondevice ) call dynamico_abort("Message not on device") 
    358462 
    359     do i = 1, size( message%message_in ) 
    360       !$acc exit data delete(message%message_in(i)%displs(:)) async 
    361       !$acc exit data delete(message%message_in(i)%sign(:)) async 
    362     end do 
    363     !$acc exit data delete(message%message_in(:)) async 
    364     do i = 1, size( message%message_out ) 
    365       !$acc exit data delete(message%message_out(i)%displs(:)) async 
    366       !!$acc exit data delete(message%message_out(i)%sign(:)) async 
    367     end do 
    368     !$acc exit data delete(message%message_out(:)) async 
    369     do i = 1, size( message%message_local ) 
    370       !$acc exit data delete(message%message_local(i)%displ_src(:)) async 
    371       !$acc exit data delete(message%message_local(i)%displ_dest(:)) async 
    372       !$acc exit data delete(message%message_local(i)%sign(:)) async 
    373     end do 
    374     !$acc exit data delete(message%message_local(:)) async 
     463    !do i = 1, size( message%message_in ) 
     464    !  !$acc exit data delete(message%message_in(i)%displs(:)) async 
     465    !  !$acc exit data delete(message%message_in(i)%sign(:)) async 
     466    !end do 
     467    !!$acc exit data delete(message%message_in(:)) async 
     468    !do i = 1, size( message%message_out ) 
     469    !  !$acc exit data delete(message%message_out(i)%displs(:)) async 
     470    !  !!$acc exit data delete(message%message_out(i)%sign(:)) async 
     471    !end do 
     472    !!$acc exit data delete(message%message_out(:)) async 
     473    !do i = 1, size( message%message_local ) 
     474    !  !$acc exit data delete(message%message_local(i)%displ_src(:)) async 
     475    !  !$acc exit data delete(message%message_local(i)%displ_dest(:)) async 
     476    !  !$acc exit data delete(message%message_local(i)%sign(:)) async 
     477    !end do 
     478    !!$acc exit data delete(message%message_local(:)) async 
    375479    do i = 0, mpi_size-1 
    376480      !$acc exit data delete(message%mpi_buffer_in(i)%buff(:)) async 
     
    384488    !$acc exit data delete(message%field(:)) async 
    385489    !$acc exit data delete(message) async 
     490     
     491    !$acc exit data delete(message%message_in_compact%field_ind(:)) async 
     492    !$acc exit data delete(message%message_in_compact%field_displ(:)) async 
     493    !$acc exit data delete(message%message_in_compact%sign(:)) async 
     494    !$acc exit data delete(message%message_in_compact%remote_rank(:)) async 
     495    !$acc exit data delete(message%message_in_compact%mpi_displ(:)) async 
     496    !$acc exit data delete(message%message_in_compact%level_offset(:)) async 
     497    !$acc exit data delete(message%message_in_compact) async 
     498     
     499    !$acc exit data delete(message%message_out_compact%field_ind(:)) async 
     500    !$acc exit data delete(message%message_out_compact%field_displ(:)) async 
     501    !$acc exit data delete(message%message_out_compact%sign(:)) async 
     502    !$acc exit data delete(message%message_out_compact%remote_rank(:)) async 
     503    !$acc exit data delete(message%message_out_compact%mpi_displ(:)) async 
     504    !$acc exit data delete(message%message_out_compact%level_offset(:)) async 
     505    !$acc exit data delete(message%message_out_compact) async 
     506     
     507    !$acc exit data delete(message%message_local_compact%field_ind_src(:)) async 
     508    !$acc exit data delete(message%message_local_compact%field_displ_src(:)) async 
     509    !$acc exit data delete(message%message_local_compact%sign(:)) async 
     510    !$acc exit data delete(message%message_local_compact%field_ind_dest(:)) async 
     511    !$acc exit data delete(message%message_local_compact%field_displ_dest(:)) async 
     512    !$acc exit data delete(message%message_local_compact) async 
     513     
    386514    message%ondevice=.false. 
    387515  end subroutine 
     
    410538    deallocate(message%mpi_requests_in) 
    411539    deallocate(message%mpi_requests_out) 
     540    deallocate(message%message_in_compact) 
     541    deallocate(message%message_out_compact) 
     542    deallocate(message%message_local_compact) 
    412543    !$omp end master 
    413544    !$omp barrier 
     
    427558    CALL distrib_level( 1, dim3, d3_begin, d3_end ) 
    428559     
    429     !$acc parallel loop default(none) present(message, assigned_domain) async if(message%ondevice) 
    430     do i = 1, size( message%message_out ) 
    431       if( assigned_domain( message%message_out(i)%ind_loc ) ) then 
    432         !$acc loop collapse(2) 
    433         do d4 = 1, dim4 
    434           do d3 = d3_begin, d3_end 
    435             local_displ = message%message_out(i)%mpi_buffer_displ + message%message_out(i)%npoints*( (d3-1) + dim3*(d4-1) ) 
    436             !$acc loop 
    437              do k = 1, message%message_out(i)%npoints 
    438               message%mpi_buffer_out(message%message_out(i)%remote_rank)%buff(k+local_displ) = message%field(message%message_out(i)%ind_loc)%rval4d(message%message_out(i)%displs(k),d3, d4) 
    439             end do 
    440           end do 
     560    !$acc parallel loop collapse(3) present(message) default(present) async if(message%ondevice) 
     561    do d4 = 1, dim4 
     562      do d3 = d3_begin, d3_end 
     563        do i=1, message%message_out_compact%npoints 
     564          message%mpi_buffer_out( message%message_out_compact%remote_rank(i) )%buff( message%message_out_compact%mpi_displ(i) + message%message_out_compact%level_offset(i)*( (d3-1) + dim3*(d4-1) ) ) & 
     565            = message%field(message%message_out_compact%field_ind(i))%rval4d( message%message_out_compact%field_displ(i), d3, d4 ) 
    441566        end do 
    442       endif 
    443     end do 
     567      end do 
     568    end do 
     569     
    444570  end subroutine 
    445571 
     
    455581    dim3 = size(message%field(1)%rval4d, 2) 
    456582    CALL distrib_level( 1, dim3, d3_begin, d3_end ) 
    457  
    458     !$acc parallel loop default(none) present(message, assigned_domain) async if(message%ondevice) 
    459     do i = 1, size( message%message_local ) 
    460       if( assigned_domain( message%message_local(i)%dest_ind_loc ) ) then 
    461         !$acc loop collapse(2) 
    462         do d4 = 1, dim4 
    463           do d3 = d3_begin, d3_end 
    464             ! Cannot collapse because size(displ_dest) varies with i 
    465             !$acc loop vector 
    466             do k = 1, message%message_local(i)%npoints 
    467               message%field(message%message_local(i)%dest_ind_loc)%rval4d(message%message_local(i)%displ_dest(k),d3,d4) = & 
    468                 message%message_local(i)%sign(k)*message%field(message%message_local(i)%src_ind_loc)%rval4d(message%message_local(i)%displ_src(k),d3,d4) 
    469             end do 
    470           end do 
     583     
     584    ! TODO : too many copies when tiles are distributed among threads 
     585    !$acc parallel loop collapse(3) present(message) default(present) async if(message%ondevice) 
     586    do d4 = 1, dim4 
     587      do d3 = d3_begin, d3_end 
     588        do i=1, message%message_local_compact%npoints 
     589          message%field(message%message_local_compact%field_ind_dest(i))%rval4d( message%message_local_compact%field_displ_dest(i), d3, d4 ) & 
     590            = message%message_local_compact%sign(i)*message%field(message%message_local_compact%field_ind_src(i))%rval4d( message%message_local_compact%field_displ_src(i), d3, d4 ) 
    471591        end do 
    472       endif 
     592      end do 
    473593    end do 
    474594  end subroutine 
     
    481601    integer :: dim3, dim4, d3_begin, d3_end 
    482602    integer :: k, d3, d4, i 
    483     integer :: local_displ 
    484  
     603    integer :: last_point 
     604     
    485605    dim4 = size(message%field(1)%rval4d, 3) 
    486606    dim3 = size(message%field(1)%rval4d, 2) 
    487607    CALL distrib_level( 1, dim3, d3_begin, d3_end ) 
    488608     
    489     !$acc parallel loop default(none) present(message, assigned_domain) async if(message%ondevice) 
    490     do i = 1, size( message%message_in ) 
    491       if( assigned_domain( message%message_in(i)%ind_loc ) ) then 
    492         !$acc loop collapse(2) 
    493         do d4 = 1, dim4 
    494           do d3 = d3_begin, d3_end 
    495             local_displ = message%message_in(i)%mpi_buffer_displ + message%message_in(i)%npoints*( (d3-1) + dim3*(d4-1) ) 
    496             !$acc loop 
    497             do k = 1, message%message_in(i)%npoints  
    498               message%field(message%message_in(i)%ind_loc)%rval4d(message%message_in(i)%displs(k),d3,d4) & 
    499                   = message%message_in(i)%sign(k)*message%mpi_buffer_in(message%message_in(i)%remote_rank)%buff(k+local_displ) 
    500             end do 
    501           end do 
     609    !$acc parallel loop collapse(3) present(message) default(present) async if(message%ondevice) 
     610    do d4 = 1, dim4 
     611      do d3 = d3_begin, d3_end 
     612        do i=1, message%message_in_compact%npoints 
     613          message%field(message%message_in_compact%field_ind(i))%rval4d( message%message_in_compact%field_displ(i), d3, d4 ) & 
     614            = message%message_in_compact%sign(i)*message%mpi_buffer_in( message%message_in_compact%remote_rank(i) )%buff( message%message_in_compact%mpi_displ(i) + message%message_in_compact%level_offset(i)*( (d3-1) + dim3*(d4-1) ) )  
    502615        end do 
    503       endif 
    504     end do 
    505   end subroutine 
     616      end do 
     617    end do 
     618 
     619  end subroutine 
     620     
    506621 
    507622  subroutine send_message(field, message) 
Note: See TracChangeset for help on using the changeset viewer.