#if defined MULTI # define NAT_IN(k) cd_nat(k) # define SGN_IN(k) psgn(k) # define F_SIZE(ptab) kfld # define LBC_ARG (jf) # if defined DIM_2d # if defined SINGLE_PRECISION # define ARRAY_TYPE(i,j,k,l,f) TYPE(PTR_2D_sp) , INTENT(inout) :: ptab(f) # else # define ARRAY_TYPE(i,j,k,l,f) TYPE(PTR_2D_dp) , INTENT(inout) :: ptab(f) # endif # define ARRAY_IN(i,j,k,l,f) ptab(f)%pt2d(i,j) # define K_SIZE(ptab) 1 # define L_SIZE(ptab) 1 # endif # if defined DIM_3d # if defined SINGLE_PRECISION # define ARRAY_TYPE(i,j,k,l,f) TYPE(PTR_3D_sp) , INTENT(inout) :: ptab(f) # else # define ARRAY_TYPE(i,j,k,l,f) TYPE(PTR_3D_dp) , INTENT(inout) :: ptab(f) # endif # define ARRAY_IN(i,j,k,l,f) ptab(f)%pt3d(i,j,k) # define K_SIZE(ptab) SIZE(ptab(1)%pt3d,3) # define L_SIZE(ptab) 1 # endif # if defined DIM_4d # if defined SINGLE_PRECISION # define ARRAY_TYPE(i,j,k,l,f) TYPE(PTR_4D_sp) , INTENT(inout) :: ptab(f) # else # define ARRAY_TYPE(i,j,k,l,f) TYPE(PTR_4D_dp) , INTENT(inout) :: ptab(f) # endif # define ARRAY_IN(i,j,k,l,f) ptab(f)%pt4d(i,j,k,l) # define K_SIZE(ptab) SIZE(ptab(1)%pt4d,3) # define L_SIZE(ptab) SIZE(ptab(1)%pt4d,4) # endif #else ! !== IN: ptab is an array ==! # if defined SINGLE_PRECISION # define ARRAY_TYPE(i,j,k,l,f) REAL(sp) , INTENT(inout) :: ARRAY_IN(i,j,k,l,f) # else # define ARRAY_TYPE(i,j,k,l,f) REAL(dp) , INTENT(inout) :: ARRAY_IN(i,j,k,l,f) # endif # define NAT_IN(k) cd_nat # define SGN_IN(k) psgn # define F_SIZE(ptab) 1 # define LBC_ARG # if defined DIM_2d # define ARRAY_IN(i,j,k,l,f) ptab(i,j) # define K_SIZE(ptab) 1 # define L_SIZE(ptab) 1 # endif # if defined DIM_3d # define ARRAY_IN(i,j,k,l,f) ptab(i,j,k) # define K_SIZE(ptab) SIZE(ptab,3) # define L_SIZE(ptab) 1 # endif # if defined DIM_4d # define ARRAY_IN(i,j,k,l,f) ptab(i,j,k,l) # define K_SIZE(ptab) SIZE(ptab,3) # define L_SIZE(ptab) SIZE(ptab,4) # endif #endif # if defined SINGLE_PRECISION # define PRECISION sp # define SENDROUTINE mppsend_sp # define RECVROUTINE mpprecv_sp # define MPI_TYPE MPI_REAL # define HUGEVAL(x) HUGE(x/**/_sp) # else # define PRECISION dp # define SENDROUTINE mppsend_dp # define RECVROUTINE mpprecv_dp # define MPI_TYPE MPI_DOUBLE_PRECISION # define HUGEVAL(x) HUGE(x/**/_dp) # endif SUBROUTINE ROUTINE_NFD( ptab, cd_nat, psgn, kfillmode, pfillval, kfld ) !!---------------------------------------------------------------------- ARRAY_TYPE(:,:,:,:,:) ! array or pointer of arrays on which the boundary condition is applied CHARACTER(len=1) , INTENT(in ) :: NAT_IN(:) ! nature of array grid-points REAL(wp) , INTENT(in ) :: SGN_IN(:) ! sign used across the north fold boundary INTEGER , INTENT(in ) :: kfillmode ! filling method for halo over land REAL(wp) , INTENT(in ) :: pfillval ! background value (used at closed boundaries) INTEGER, OPTIONAL, INTENT(in ) :: kfld ! number of pt3d arrays ! LOGICAL :: ll_add_line INTEGER :: ji, jj, jk, jl, jh, jf, jr ! dummy loop indices INTEGER :: ipi, ipj, ipj2, ipk, ipl, ipf ! dimension of the input array INTEGER :: imigr, iihom, ijhom ! local integers INTEGER :: ierr, ibuffsize, iis0, iie0, impp INTEGER :: ii1, ii2, ij1, ij2 INTEGER :: ipimax, i0max INTEGER :: ij, iproc, ipni, ijnr INTEGER, DIMENSION (jpmaxngh) :: ml_req_nf ! for mpi_isend when avoiding mpi_allgather INTEGER :: ml_err ! for mpi_isend when avoiding mpi_allgather INTEGER, DIMENSION(MPI_STATUS_SIZE) :: ml_stat ! for mpi_isend when avoiding mpi_allgather ! ! Workspace for message transfers avoiding mpi_allgather INTEGER :: ipj_b ! sum of lines for all multi fields INTEGER :: i012 ! 0, 1 or 2 INTEGER , DIMENSION(:,:) , ALLOCATABLE :: jj_s ! position of sent lines INTEGER , DIMENSION(:,:) , ALLOCATABLE :: jj_b ! position of buffer lines INTEGER , DIMENSION(:) , ALLOCATABLE :: ipj_s ! number of sent lines REAL(PRECISION), DIMENSION(:,:,:,:) , ALLOCATABLE :: ztabb, ztabr, ztabw ! buffer, receive and work arrays REAL(PRECISION), DIMENSION(:,:,:,:,:) , ALLOCATABLE :: ztabglo, znorthloc REAL(PRECISION), DIMENSION(:,:,:,:,:,:), ALLOCATABLE :: znorthglo !!---------------------------------------------------------------------- ! ipk = K_SIZE(ptab) ! 3rd dimension ipl = L_SIZE(ptab) ! 4th - ipf = F_SIZE(ptab) ! 5th - use in "multi" case (array of pointers) ! IF( l_north_nogather ) THEN !== no allgather exchanges ==! ! --- define number of exchanged lines --- ! ! In theory we should exchange only nn_hls lines. ! ! However, some other points are duplicated in the north pole folding: ! - jperio=[34], grid=T : half of the last line (jpiglo/2+2:jpiglo-nn_hls) ! - jperio=[34], grid=U : half of the last line (jpiglo/2+1:jpiglo-nn_hls) ! - jperio=[34], grid=V : all the last line nn_hls+1 and (nn_hls+2:jpiglo-nn_hls) ! - jperio=[34], grid=F : all the last line (nn_hls+1:jpiglo-nn_hls) ! - jperio=[56], grid=T : 2 points of the last line (jpiglo/2+1 and jpglo-nn_hls) ! - jperio=[56], grid=U : no points are duplicated ! - jperio=[56], grid=V : half of the last line (jpiglo/2+1:jpiglo-nn_hls) ! - jperio=[56], grid=F : half of the last line (jpiglo/2+1:jpiglo-nn_hls-1) ! The order of the calculations may differ for these duplicated points (as, for example jj+1 becomes jj-1) ! This explain why these duplicated points may have different values even if they are at the exact same location. ! In consequence, we may want to force the folding on these points by setting l_full_nf_update = .TRUE. ! This is slightly slower but necessary to avoid different values on identical grid points!! ! !!!!!!!!! temporary switch off this optimisation ==> force TRUE !!!!!!!! !!!!!!!!! needed to get the same results without agrif and with agrif and no zoom !!!!!!!! !!!!!!!!! I don't know why we must do that... !!!!!!!! l_full_nf_update = .TRUE. ! also force it if not restart during the first 2 steps (leap frog?) ll_add_line = l_full_nf_update .OR. ( ncom_stp <= nit000+1 .AND. .NOT. ln_rstart ) ALLOCATE(ipj_s(ipf)) ! how many lines do we exchange? IF( ll_add_line ) THEN DO jf = 1, ipf ! Loop over the number of arrays to be processed ipj_s(jf) = nn_hls + COUNT( (/ npolj == 3 .OR. npolj == 4 .OR. NAT_IN(jf) == 'V' .OR. NAT_IN(jf) == 'F' /) ) END DO ELSE ipj_s(:) = nn_hls ENDIF ipj = MAXVAL(ipj_s(:)) ! Max 2nd dimension of message transfers ipj_b = SUM( ipj_s(:)) ! Total number of lines to be exchanged ALLOCATE( jj_s(ipj, ipf), jj_b(ipj, ipf) ) ! Index of modifying lines in input ij1 = 0 DO jf = 1, ipf ! Loop over the number of arrays to be processed ! SELECT CASE ( npolj ) CASE ( 3, 4 ) ! * North fold T-point pivot SELECT CASE ( NAT_IN(jf) ) CASE ( 'T', 'W', 'U' ) ; i012 = 1 ! T-, U-, W-point CASE ( 'V', 'F' ) ; i012 = 2 ! V-, F-point END SELECT CASE ( 5, 6 ) ! * North fold F-point pivot SELECT CASE ( NAT_IN(jf) ) CASE ( 'T', 'W', 'U' ) ; i012 = 0 ! T-, U-, W-point CASE ( 'V', 'F' ) ; i012 = 1 ! V-, F-point END SELECT END SELECT ! DO jj = 1, ipj_s(jf) ij1 = ij1 + 1 jj_b(jj,jf) = ij1 jj_s(jj,jf) = jpj - 2*nn_hls + jj - i012 END DO ! END DO ! ALLOCATE( ztabb(jpimax,ipj_b,ipk,ipl) ) ! store all the data to be sent in a buffer array ibuffsize = jpimax * ipj_b * ipk * ipl ! DO jf = 1, ipf ; DO jl = 1, ipl ; DO jk = 1, ipk DO jj = 1, ipj_s(jf) ij1 = jj_b(jj,jf) ij2 = jj_s(jj,jf) DO ji = 1, jpi ztabb(ji,ij1,jk,jl) = ARRAY_IN(ji,ij2,jk,jl,jf) END DO DO ji = jpi+1, jpimax ztabb(ji,ij1,jk,jl) = HUGEVAL(0.) ! avoid sending uninitialized values (make sure we don't use it) END DO END DO END DO ; END DO ; END DO ! ! start waiting time measurement IF( ln_timing ) CALL tic_tac(.TRUE.) ! ! send the data as soon as possible DO jr = 1, nsndto iproc = nfproc(isendto(jr)) IF( iproc /= narea-1 .AND. iproc /= -1 ) THEN CALL SENDROUTINE( 5, ztabb, ibuffsize, iproc, ml_req_nf(jr) ) ENDIF END DO ! ipimax = jpimax * jpmaxngh ALLOCATE( ztabw(jpimax,ipj_b,ipk,ipl), ztabr(ipimax,ipj_b,ipk,ipl) ) ! DO jr = 1, nsndto ! ipni = isendto(jr) iproc = nfproc(ipni) ipi = nfjpi (ipni) ! IF( ipni == 1 ) THEN ; iis0 = 1 ! domain left side: as e-w comm already done -> from 1st column ELSE ; iis0 = 1 + nn_hls ! default: -> from inner domain ENDIF IF( ipni == jpni ) THEN ; iie0 = ipi ! domain right side: as e-w comm already done -> until last column ELSE ; iie0 = ipi - nn_hls ! default: -> until inner domain ENDIF impp = nfimpp(ipni) - nfimpp(isendto(1)) ! IF( iproc == -1 ) THEN ! No neighbour (land proc that was suppressed) ! SELECT CASE ( kfillmode ) CASE ( jpfillnothing ) ! no filling CASE ( jpfillcopy ) ! filling with inner domain values DO jf = 1, ipf ; DO jl = 1, ipl ; DO jk = 1, ipk DO jj = 1, ipj_s(jf) ij1 = jj_b(jj,jf) ij2 = jj_s(jj,jf) DO ji = iis0, iie0 ztabr(impp+ji,ij1,jk,jl) = ARRAY_IN(Nis0,ij2,jk,jl,jf) ! chose to take the 1st iner domain point END DO END DO END DO ; END DO ; END DO CASE ( jpfillcst ) ! filling with constant value DO jl = 1, ipl ; DO jk = 1, ipk DO jj = 1, ipj_b DO ji = iis0, iie0 ztabr(impp+ji,jj,jk,jl) = pfillval END DO END DO END DO ; END DO END SELECT ! ELSE IF( iproc == narea-1 ) THEN ! get data from myself! ! DO jf = 1, ipf ; DO jl = 1, ipl ; DO jk = 1, ipk DO jj = 1, ipj_s(jf) ij1 = jj_b(jj,jf) ij2 = jj_s(jj,jf) DO ji = iis0, iie0 ztabr(impp+ji,ij1,jk,jl) = ARRAY_IN(ji,ij2,jk,jl,jf) END DO END DO END DO ; END DO ; END DO ! ELSE ! get data from a neighbour trough communication ! CALL RECVROUTINE(5, ztabw, ibuffsize, iproc) DO jl = 1, ipl ; DO jk = 1, ipk DO jj = 1, ipj_b DO ji = iis0, iie0 ztabr(impp+ji,jj,jk,jl) = ztabw(ji,jj,jk,jl) END DO END DO END DO ; END DO ENDIF ! END DO ! nsndto ! IF( ln_timing ) CALL tic_tac(.FALSE.) ! ! North fold boundary condition ! DO jf = 1, ipf ij1 = jj_b( 1 ,jf) ij2 = jj_b(ipj_s(jf),jf) CALL lbc_nfd_nogather( ARRAY_IN(:,:,:,:,jf), ztabr(:,ij1:ij2,:,:), cd_nat LBC_ARG, psgn LBC_ARG ) END DO ! DEALLOCATE( ztabr, ztabw, jj_s, jj_b, ipj_s ) ! DO jr = 1,nsndto iproc = nfproc(isendto(jr)) IF( iproc /= narea-1 .AND. iproc /= -1 ) THEN CALL mpi_wait( ml_req_nf(jr), ml_stat, ml_err ) ! put the wait at the very end just before the deallocate ENDIF END DO DEALLOCATE( ztabb ) ! ELSE !== allgather exchanges ==! ! ! how many lines do we exchange at max? -> ipj (no further optimizations in this case...) ipj = nn_hls + 2 ! how many lines do we need at max? -> ipj2 (no further optimizations in this case...) ipj2 = 2 * nn_hls + 2 ! i0max = jpimax - 2 * nn_hls ibuffsize = i0max * ipj * ipk * ipl * ipf ALLOCATE( znorthloc(i0max,ipj,ipk,ipl,ipf), znorthglo(i0max,ipj,ipk,ipl,ipf,ndim_rank_north) ) ! DO jf = 1, ipf ; DO jl = 1, ipl ; DO jk = 1, ipk ! put in znorthloc ipj j-lines of ptab DO jj = 1, ipj ij2 = jpj - ipj2 + jj ! the first ipj lines of the last ipj2 lines DO ji = 1, Ni_0 ii2 = Nis0 - 1 + ji ! inner domain: Nis0 to Nie0 znorthloc(ji,jj,jk,jl,jf) = ARRAY_IN(ii2,ij2,jk,jl,jf) END DO DO ji = Ni_0+1, i0max znorthloc(ji,jj,jk,jl,jf) = HUGEVAL(0.) ! avoid sending uninitialized values (make sure we don't use it) END DO END DO END DO ; END DO ; END DO ! ! start waiting time measurement IF( ln_timing ) CALL tic_tac(.TRUE.) #if defined key_mpp_mpi CALL MPI_ALLGATHER( znorthloc, ibuffsize, MPI_TYPE, znorthglo, ibuffsize, MPI_TYPE, ncomm_north, ierr ) #endif ! stop waiting time measurement IF( ln_timing ) CALL tic_tac(.FALSE.) DEALLOCATE( znorthloc ) ALLOCATE( ztabglo(jpiglo,ipj2,ipk,ipl,ipf) ) ! ! need to fill only the first ipj lines of ztabglo as lbc_nfd don't use the last nn_hls lines ijnr = 0 DO jr = 1, jpni ! recover the global north array iproc = nfproc(jr) impp = nfimpp(jr) ipi = nfjpi( jr) - 2 * nn_hls ! corresponds to Ni_0 but for subdomain iproc IF( iproc == -1 ) THEN ! No neighbour (land proc that was suppressed) ! SELECT CASE ( kfillmode ) CASE ( jpfillnothing ) ! no filling CASE ( jpfillcopy ) ! filling with inner domain values DO jf = 1, ipf ; DO jl = 1, ipl ; DO jk = 1, ipk DO jj = 1, ipj ij2 = jpj - ipj2 + jj ! the first ipj lines of the last ipj2 lines DO ji = 1, ipi ii1 = impp + nn_hls + ji - 1 ! corresponds to mig(nn_hls + ji) but for subdomain iproc ztabglo(ii1,jj,jk,jl,jf) = ARRAY_IN(Nis0,ij2,jk,jl,jf) ! chose to take the 1st iner domain point END DO END DO END DO ; END DO ; END DO CASE ( jpfillcst ) ! filling with constant value DO jf = 1, ipf ; DO jl = 1, ipl ; DO jk = 1, ipk DO jj = 1, ipj DO ji = 1, ipi ii1 = impp + nn_hls + ji - 1 ! corresponds to mig(nn_hls + ji) but for subdomain iproc ztabglo(ii1,jj,jk,jl,jf) = pfillval END DO END DO END DO ; END DO ; END DO END SELECT ! ELSE ijnr = ijnr + 1 DO jf = 1, ipf ; DO jl = 1, ipl ; DO jk = 1, ipk DO jj = 1, ipj DO ji = 1, ipi ii1 = impp + nn_hls + ji - 1 ! corresponds to mig(nn_hls + ji) but for subdomain iproc ztabglo(ii1,jj,jk,jl,jf) = znorthglo(ji,jj,jk,jl,jf,ijnr) END DO END DO END DO ; END DO ; END DO ENDIF ! END DO ! jpni DEALLOCATE( znorthglo ) ! DO jf = 1, ipf CALL lbc_nfd( ztabglo(:,:,:,:,jf), cd_nat LBC_ARG, psgn LBC_ARG ) ! North fold boundary condition DO jl = 1, ipl ; DO jk = 1, ipk ! e-w periodicity DO jj = 1, nn_hls + 1 ij1 = ipj2 - (nn_hls + 1) + jj ! need only the last nn_hls + 1 lines until ipj2 ztabglo( 1:nn_hls,ij1,jk,jl,jf) = ztabglo(jpiglo-2*nn_hls+1:jpiglo-nn_hls,ij1,jk,jl,jf) ztabglo(jpiglo-nn_hls+1:jpiglo,ij1,jk,jl,jf) = ztabglo( nn_hls+1: 2*nn_hls,ij1,jk,jl,jf) END DO END DO ; END DO END DO ! DO jf = 1, ipf ; DO jl = 1, ipl ; DO jk = 1, ipk ! Scatter back to ARRAY_IN DO jj = 1, nn_hls + 1 ij1 = jpj - (nn_hls + 1) + jj ! last nn_hls + 1 lines until jpj ij2 = ipj2 - (nn_hls + 1) + jj ! last nn_hls + 1 lines until ipj2 DO ji= 1, jpi ii2 = mig(ji) ARRAY_IN(ji,ij1,jk,jl,jf) = ztabglo(ii2,ij2,jk,jl,jf) END DO END DO END DO ; END DO ; END DO ! DEALLOCATE( ztabglo ) ! ENDIF ! l_north_nogather ! END SUBROUTINE ROUTINE_NFD #undef PRECISION #undef MPI_TYPE #undef SENDROUTINE #undef RECVROUTINE #undef ARRAY_TYPE #undef NAT_IN #undef SGN_IN #undef ARRAY_IN #undef K_SIZE #undef L_SIZE #undef F_SIZE #undef LBC_ARG #undef HUGEVAL