- Timestamp:
- 07/15/19 12:29:31 (5 years ago)
- Location:
- codes/icosagcm/trunk
- Files:
-
- 9 added
- 18 edited
- 6 moved
Legend:
- Unmodified
- Added
- Removed
-
codes/icosagcm/trunk/bld.cfg
r903 r953 52 52 bld::excl_dep use::netcdf 53 53 bld::excl_dep use::omp_lib 54 bld::excl_dep use::openacc 54 55 bld::excl_dep use::mpi 55 56 bld::excl_dep inc::mpif.h -
codes/icosagcm/trunk/src/base/abort.F90
r901 r953 17 17 !$omp end single 18 18 end subroutine 19 20 !!!Abort execution when openacc is on 21 subroutine abort_acc( message ) 22 use mpi_mod 23 implicit none 24 character(len=*), optional, intent(in) :: message 25 #ifdef _OPENACC 26 call dynamico_abort( "Not tested with OpenACC ! " // message ) 27 #endif 28 end subroutine 19 29 end module -
codes/icosagcm/trunk/src/base/field.f90
r548 r953 13 13 TYPE t_field 14 14 CHARACTER(30) :: name 15 REAL(rstd),POINTER :: rval2d(:) 16 REAL(rstd),POINTER :: rval3d(:,:) 17 REAL(rstd),POINTER :: rval4d(:,:,:) 15 REAL(rstd),POINTER :: rval2d(:) => null() 16 REAL(rstd),POINTER :: rval3d(:,:) => null() 17 REAL(rstd),POINTER :: rval4d(:,:,:) => null() 18 18 19 19 INTEGER,POINTER :: ival2d(:) … … 30 30 INTEGER :: dim3 31 31 INTEGER :: dim4 32 33 LOGICAL :: ondevice !< flag if field is allocated on device as well 32 34 END TYPE t_field 33 35 … … 48 50 CONTAINS 49 51 50 SUBROUTINE allocate_field(field,field_type,data_type,dim1,dim2,name )52 SUBROUTINE allocate_field(field,field_type,data_type,dim1,dim2,name,ondevice) 51 53 USE domain_mod 52 54 USE omp_para … … 56 58 INTEGER,OPTIONAL :: dim1,dim2 57 59 CHARACTER(*), OPTIONAL :: name 60 LOGICAL, INTENT(IN), OPTIONAL :: ondevice 58 61 !$OMP BARRIER 59 62 !$OMP MASTER 60 ALLOCATE(field(ndomain)) 63 ALLOCATE(field(ndomain)) 61 64 !$OMP END MASTER 62 65 !$OMP BARRIER 63 CALL allocate_field_(field,field_type,data_type,dim1,dim2,name) 66 67 CALL allocate_field_(field,field_type,data_type,dim1,dim2,name,ondevice) 68 64 69 END SUBROUTINE allocate_field 65 70 66 SUBROUTINE allocate_fields(nfield,field,field_type,data_type,dim1,dim2,name )71 SUBROUTINE allocate_fields(nfield,field,field_type,data_type,dim1,dim2,name, ondevice) 67 72 USE domain_mod 68 73 USE omp_para … … 73 78 INTEGER,OPTIONAL :: dim1,dim2 74 79 CHARACTER(*), OPTIONAL :: name 80 LOGICAL, INTENT(IN), OPTIONAL :: ondevice 75 81 INTEGER :: i 76 82 !$OMP BARRIER … … 80 86 !$OMP BARRIER 81 87 DO i=1,nfield 82 CALL allocate_field_(field(:,i),field_type,data_type,dim1,dim2,name )88 CALL allocate_field_(field(:,i),field_type,data_type,dim1,dim2,name,ondevice) 83 89 END DO 84 90 END SUBROUTINE allocate_fields 85 91 86 SUBROUTINE allocate_field_(field,field_type,data_type,dim1,dim2,name )92 SUBROUTINE allocate_field_(field,field_type,data_type,dim1,dim2,name,ondevice) 87 93 USE domain_mod 88 94 USE omp_para … … 93 99 INTEGER,OPTIONAL :: dim1,dim2 94 100 CHARACTER(*), OPTIONAL :: name 101 LOGICAL, INTENT(IN), OPTIONAL :: ondevice 95 102 INTEGER :: ind 96 103 INTEGER :: ii_size,jj_size … … 119 126 field(ind)%data_type=data_type 120 127 field(ind)%field_type=field_type 121 128 122 129 IF (field_type==field_T) THEN 123 130 jj_size=domain(ind)%jjm … … 131 138 132 139 IF (field(ind)%ndim==4) THEN 133 IF (data_type==type_integer) ALLOCATE(field(ind)%ival4d(ii_size*jj_size,dim1,dim2)) 134 IF (data_type==type_real) ALLOCATE(field(ind)%rval4d(ii_size*jj_size,dim1,dim2)) 135 IF (data_type==type_logical) ALLOCATE(field(ind)%lval4d(ii_size*jj_size,dim1,dim2)) 140 IF (data_type==type_integer) ALLOCATE(field(ind)%ival4d(ii_size*jj_size,dim1,dim2)) 141 IF (data_type==type_real) ALLOCATE(field(ind)%rval4d(ii_size*jj_size,dim1,dim2)) 142 IF (data_type==type_logical) ALLOCATE(field(ind)%lval4d(ii_size*jj_size,dim1,dim2)) 143 136 144 ELSE IF (field(ind)%ndim==3) THEN 137 IF (data_type==type_integer) ALLOCATE(field(ind)%ival3d(ii_size*jj_size,dim1)) 138 IF (data_type==type_real) ALLOCATE(field(ind)%rval3d(ii_size*jj_size,dim1)) 139 IF (data_type==type_logical) ALLOCATE(field(ind)%lval3d(ii_size*jj_size,dim1)) 145 IF (data_type==type_integer) ALLOCATE(field(ind)%ival3d(ii_size*jj_size,dim1)) 146 IF (data_type==type_real) ALLOCATE(field(ind)%rval3d(ii_size*jj_size,dim1)) 147 IF (data_type==type_logical) ALLOCATE(field(ind)%lval3d(ii_size*jj_size,dim1)) 148 140 149 ELSE IF (field(ind)%ndim==2) THEN 141 IF (data_type==type_integer) ALLOCATE(field(ind)%ival2d(ii_size*jj_size)) 142 IF (data_type==type_real) ALLOCATE(field(ind)%rval2d(ii_size*jj_size)) 143 IF (data_type==type_logical) ALLOCATE(field(ind)%lval2d(ii_size*jj_size)) 144 ENDIF 145 150 IF (data_type==type_integer) ALLOCATE(field(ind)%ival2d(ii_size*jj_size)) 151 IF (data_type==type_real) ALLOCATE(field(ind)%rval2d(ii_size*jj_size)) 152 IF (data_type==type_logical) ALLOCATE(field(ind)%lval2d(ii_size*jj_size)) 153 154 ENDIF 155 156 field(ind)%ondevice = .FALSE. 157 IF (PRESENT(ondevice)) THEN 158 IF (ondevice) CALL create_device_field(field(ind)) 159 END IF 160 146 161 ENDDO 147 162 !$OMP BARRIER … … 160 175 INTEGER :: ii_size,jj_size 161 176 162 ALLOCATE(field(ndomain_glo)) 177 ALLOCATE(field(ndomain_glo)) 163 178 164 179 DO ind=1,ndomain_glo … … 184 199 field(ind)%field_type=field_type 185 200 201 field(ind)%ondevice = .FALSE. 202 186 203 IF (field_type==field_T) THEN 187 204 jj_size=domain_glo(ind)%jjm … … 251 268 INTEGER :: ind 252 269 DO ind=1,ndomain 253 IF (.NOT. assigned_domain(ind) .OR. .NOT. is_omp_level_master) CYCLE 254 255 data_type=field(ind)%data_type 256 257 IF (field(ind)%ndim==4) THEN 258 IF (data_type==type_integer) DEALLOCATE(field(ind)%ival4d) 259 IF (data_type==type_real) DEALLOCATE(field(ind)%rval4d) 260 IF (data_type==type_logical) DEALLOCATE(field(ind)%lval4d) 261 ELSE IF (field(ind)%ndim==3) THEN 262 IF (data_type==type_integer) DEALLOCATE(field(ind)%ival3d) 263 IF (data_type==type_real) DEALLOCATE(field(ind)%rval3d) 264 IF (data_type==type_logical) DEALLOCATE(field(ind)%lval3d) 265 ELSE IF (field(ind)%ndim==2) THEN 266 IF (data_type==type_integer) DEALLOCATE(field(ind)%ival2d) 267 IF (data_type==type_real) DEALLOCATE(field(ind)%rval2d) 268 IF (data_type==type_logical) DEALLOCATE(field(ind)%lval2d) 269 ENDIF 270 271 ENDDO 270 IF (.NOT. assigned_domain(ind) .OR. .NOT. is_omp_level_master) CYCLE 271 272 data_type=field(ind)%data_type 273 274 IF (field(ind)%ndim==4) THEN 275 IF (data_type==type_integer) THEN 276 DEALLOCATE(field(ind)%ival4d) 277 IF (field(ind)%ondevice) THEN 278 !$acc exit data delete(field(ind)%ival4d) 279 CONTINUE 280 END IF 281 END IF 282 283 IF (data_type==type_real) THEN 284 DEALLOCATE(field(ind)%rval4d) 285 IF (field(ind)%ondevice) THEN 286 !$acc exit data delete(field(ind)%rval4d) 287 CONTINUE 288 END IF 289 END IF 290 291 IF (data_type==type_logical) THEN 292 DEALLOCATE(field(ind)%lval4d) 293 IF (field(ind)%ondevice) THEN 294 !$acc exit data delete(field(ind)%lval4d) 295 CONTINUE 296 END IF 297 END IF 298 299 ELSE IF (field(ind)%ndim==3) THEN 300 IF (data_type==type_integer) THEN 301 DEALLOCATE(field(ind)%ival3d) 302 IF (field(ind)%ondevice) THEN 303 !$acc exit data delete(field(ind)%ival3d) 304 CONTINUE 305 END IF 306 END IF 307 308 IF (data_type==type_real) THEN 309 DEALLOCATE(field(ind)%rval3d) 310 IF (field(ind)%ondevice) THEN 311 !$acc exit data delete(field(ind)%rval3d) 312 CONTINUE 313 END IF 314 END IF 315 316 IF (data_type==type_logical) THEN 317 DEALLOCATE(field(ind)%lval3d) 318 IF (field(ind)%ondevice) THEN 319 !$acc exit data delete(field(ind)%lval3d) 320 CONTINUE 321 END IF 322 END IF 323 324 ELSE IF (field(ind)%ndim==2) THEN 325 IF (data_type==type_integer) THEN 326 DEALLOCATE(field(ind)%ival2d) 327 IF (field(ind)%ondevice) THEN 328 !$acc exit data delete(field(ind)%ival2d) 329 CONTINUE 330 END IF 331 END IF 332 333 IF (data_type==type_real) THEN 334 DEALLOCATE(field(ind)%rval2d) 335 IF (field(ind)%ondevice) THEN 336 !$acc exit data delete(field(ind)%rval2d) 337 CONTINUE 338 END IF 339 END IF 340 341 IF (data_type==type_logical) THEN 342 DEALLOCATE(field(ind)%lval2d) 343 IF (field(ind)%ondevice) THEN 344 !$acc exit data delete(field(ind)%lval2d) 345 CONTINUE 346 END IF 347 END IF 348 349 ENDIF 350 351 ENDDO 272 352 END SUBROUTINE deallocate_field_ 273 353 … … 460 540 END SUBROUTINE getval_l4d 461 541 542 543 SUBROUTINE update_device_field(field) 544 USE domain_mod 545 USE omp_para 546 IMPLICIT NONE 547 TYPE(t_field) :: field(:) 548 INTEGER :: ind 549 550 DO ind=1,ndomain 551 IF (.NOT. assigned_domain(ind) .OR. .NOT. is_omp_level_master) CYCLE 552 553 IF (.NOT. field(ind)%ondevice) CALL create_device_field(field(ind)) 554 555 IF (field(ind)%ndim==4) THEN 556 IF (field(ind)%data_type==type_integer) THEN 557 !$acc update device(field(ind)%ival4d(:,:,:)) 558 CONTINUE 559 END IF 560 561 IF (field(ind)%data_type==type_real) THEN 562 !$acc update device(field(ind)%rval4d(:,:,:)) 563 CONTINUE 564 END IF 565 566 IF (field(ind)%data_type==type_logical) THEN 567 !$acc update device(field(ind)%lval4d(:,:,:)) 568 CONTINUE 569 END IF 570 571 ELSE IF (field(ind)%ndim==3) THEN 572 IF (field(ind)%data_type==type_integer) THEN 573 !$acc update device(field(ind)%ival3d(:,:)) 574 CONTINUE 575 END IF 576 577 IF (field(ind)%data_type==type_real) THEN 578 !$acc update device(field(ind)%rval3d(:,:)) 579 CONTINUE 580 END IF 581 582 IF (field(ind)%data_type==type_logical) THEN 583 !$acc update device(field(ind)%lval3d(:,:)) 584 CONTINUE 585 END IF 586 587 ELSE IF (field(ind)%ndim==2) THEN 588 IF (field(ind)%data_type==type_integer) THEN 589 !$acc update device(field(ind)%ival2d(:)) 590 CONTINUE 591 END IF 592 593 IF (field(ind)%data_type==type_real) THEN 594 !$acc update device(field(ind)%rval2d(:)) 595 CONTINUE 596 END IF 597 598 IF (field(ind)%data_type==type_logical) THEN 599 !$acc update device(field(ind)%lval2d(:)) 600 CONTINUE 601 END IF 602 ENDIF 603 ENDDO 604 !$OMP BARRIER 605 END SUBROUTINE update_device_field 606 607 SUBROUTINE update_host_field(field) 608 USE domain_mod 609 USE omp_para 610 IMPLICIT NONE 611 TYPE(t_field) :: field(:) 612 INTEGER :: ind 613 614 DO ind=1,ndomain 615 IF (.NOT. assigned_domain(ind) .OR. .NOT. is_omp_level_master) CYCLE 616 617 IF (field(ind)%ondevice) THEN 618 619 IF (field(ind)%ndim==4) THEN 620 IF (field(ind)%data_type==type_integer) THEN 621 !$acc update host(field(ind)%ival4d(:,:,:)) wait 622 CONTINUE 623 END IF 624 625 IF (field(ind)%data_type==type_real) THEN 626 !$acc update host(field(ind)%rval4d(:,:,:)) wait 627 CONTINUE 628 END IF 629 630 IF (field(ind)%data_type==type_logical) THEN 631 !$acc update host(field(ind)%lval4d(:,:,:)) wait 632 CONTINUE 633 END IF 634 635 ELSE IF (field(ind)%ndim==3) THEN 636 IF (field(ind)%data_type==type_integer) THEN 637 !$acc update host(field(ind)%ival3d(:,:)) wait 638 CONTINUE 639 END IF 640 641 IF (field(ind)%data_type==type_real) THEN 642 !$acc update host(field(ind)%rval3d(:,:)) wait 643 CONTINUE 644 END IF 645 646 IF (field(ind)%data_type==type_logical) THEN 647 !$acc update host(field(ind)%lval3d(:,:)) wait 648 CONTINUE 649 END IF 650 651 ELSE IF (field(ind)%ndim==2) THEN 652 IF (field(ind)%data_type==type_integer) THEN 653 !$acc update host(field(ind)%ival2d(:)) wait 654 CONTINUE 655 END IF 656 657 IF (field(ind)%data_type==type_real) THEN 658 !$acc update host(field(ind)%rval2d(:)) wait 659 CONTINUE 660 END IF 661 662 IF (field(ind)%data_type==type_logical) THEN 663 !$acc update host(field(ind)%lval2d(:)) wait 664 CONTINUE 665 END IF 666 ENDIF 667 END IF 668 ENDDO 669 !$OMP BARRIER 670 END SUBROUTINE update_host_field 671 672 SUBROUTINE create_device_field(field) 673 TYPE(t_field) :: field 674 675 IF (field%ondevice) THEN 676 PRINT *, "Field is already on device !" 677 STOP 1 678 END IF 679 IF (field%ndim==4) THEN 680 IF (field%data_type==type_integer) THEN 681 !$acc enter data create(field%ival4d(:,:,:)) 682 END IF 683 684 IF (field%data_type==type_real) THEN 685 !$acc enter data create(field%rval4d(:,:,:)) 686 END IF 687 688 IF (field%data_type==type_logical) THEN 689 !$acc enter data create(field%lval4d(:,:,:)) 690 END IF 691 692 ELSE IF (field%ndim==3) THEN 693 IF (field%data_type==type_integer) THEN 694 !$acc enter data create(field%ival3d(:,:)) 695 END IF 696 697 IF (field%data_type==type_real) THEN 698 !$acc enter data create(field%rval3d(:,:)) 699 END IF 700 701 IF (field%data_type==type_logical) THEN 702 !$acc enter data create(field%lval3d(:,:)) 703 END IF 704 705 ELSE IF (field%ndim==2) THEN 706 IF (field%data_type==type_integer) THEN 707 !$acc enter data create(field%ival2d(:)) 708 END IF 709 710 IF (field%data_type==type_real) THEN 711 !$acc enter data create(field%rval2d(:)) 712 END IF 713 714 IF (field%data_type==type_logical) THEN 715 !$acc enter data create(field%lval2d(:)) 716 END IF 717 ENDIF 718 field%ondevice = .TRUE. 719 END SUBROUTINE create_device_field 720 462 721 END MODULE field_mod -
codes/icosagcm/trunk/src/diagnostics/check_conserve.f90
r902 r953 1 1 MODULE check_conserve_mod 2 2 USE icosa 3 USE abort_mod 3 4 IMPLICIT NONE 4 5 … … 28 29 USE getin_mod 29 30 USE omp_para, ONLY : is_master 31 USE abort_mod 30 32 CHARACTER(LEN=255) :: check_type_str 31 33 CALL allocate_field(f_pk,field_t,type_real,llm) … … 47 49 STOP 48 50 END SELECT 51 52 IF (check_type /= check_basic) THEN 53 CALL abort_acc("check_conservation /= 'basic'") 54 END IF 49 55 END SUBROUTINE init_check_conserve 50 56 … … 179 185 180 186 IF(check_type == check_detailed) THEN 181 187 CALL abort_acc("!check_detailed") 182 188 CALL transfert_request(f_ue,req_e1_vect) 183 189 CALL pression(f_ps,f_p) -
codes/icosagcm/trunk/src/diagnostics/diagflux.F90
r604 r953 21 21 SUBROUTINE init_diagflux 22 22 USE getin_mod 23 USE abort_mod 23 24 INTEGER :: ll 24 25 diagflux_on = .FALSE. 25 26 CALL getin("diagflux", diagflux_on) 27 IF (diagflux_on) THEN 28 CALL abort_acc("diagflux /= .FALSE.") 29 END IF 30 26 31 ll = MERGE(llm,1,diagflux_on) 27 32 CALL allocate_field(f_masst, field_t,type_real,ll, name="masst") -
codes/icosagcm/trunk/src/diagnostics/observable.f90
r899 r953 30 30 CALL allocate_field(f_buf_s, field_t,type_real, name="buf_s") 31 31 32 CALL allocate_field(f_theta, field_t,type_real,llm,nqdyn, name='theta' ) ! potential temperature32 CALL allocate_field(f_theta, field_t,type_real,llm,nqdyn, name='theta', ondevice=.TRUE.) ! potential temperature 33 33 CALL allocate_field(f_pmid, field_t,type_real,llm, name='pmid') ! mid layer pressure 34 34 END SUBROUTINE init_observable … … 56 56 57 57 CALL transfert_request(f_ps,req_i1) 58 CALL update_host_field(f_ps) 58 59 59 60 IF(init) THEN … … 109 110 110 111 CALL progonostic_vel_to_horiz(f_geopot, f_ps, f_mass, f_u, f_W, f_buf_uh, f_buf_i) 111 CALL transfert_request(f_buf_uh,req_e1_vect) 112 CALL transfert_request(f_buf_uh,req_e1_vect) 112 113 CALL un2ulonlat(f_buf_uh, f_buf_ulon, f_buf_ulat) 113 114 IF(init) THEN -
codes/icosagcm/trunk/src/dissip/dissip_gcm.F90
r933 r953 1 1 MODULE dissip_gcm_mod 2 2 USE icosa 3 3 USE abort_mod 4 4 PRIVATE 5 5 … … 48 48 USE icosa 49 49 IMPLICIT NONE 50 CALL allocate_field(f_due_diss1,field_u,type_real,llm )51 CALL allocate_field(f_due_diss2,field_u,type_real,llm )50 CALL allocate_field(f_due_diss1,field_u,type_real,llm,ondevice=.TRUE.) 51 CALL allocate_field(f_due_diss2,field_u,type_real,llm,ondevice=.TRUE.) 52 52 CALL allocate_field(f_dtheta_diss,field_t,type_real,llm) 53 CALL allocate_field(f_dtheta_rhodz_diss,field_t,type_real,llm )53 CALL allocate_field(f_dtheta_rhodz_diss,field_t,type_real,llm,ondevice=.TRUE.) 54 54 ALLOCATE(tau_graddiv(llm)) 55 55 ALLOCATE(tau_gradrot(llm)) 56 56 ALLOCATE(tau_divgrad(llm)) 57 !$acc enter data create(tau_graddiv(:),tau_gradrot(:),tau_divgrad(:)) async 57 58 END SUBROUTINE allocate_dissip 58 59 … … 66 67 USE transfert_omp_mod 67 68 USE omp_para 69 USE abort_mod 68 70 IMPLICIT NONE 69 71 … … 98 100 IF (is_master) PRINT *, 'No Rayleigh friction' 99 101 CASE('dcmip2_schaer_noshear') 102 CALL abort_acc("rayleigh_friction_type /= 'none'") 100 103 rayleigh_friction_type=1 101 104 rayleigh_shear=0 102 105 IF (is_master) PRINT *, 'Rayleigh friction : Schaer-like mountain without shear DCMIP2.1' 103 106 CASE('dcmip2_schaer_shear') 107 CALL abort_acc("rayleigh_friction_type /= 'none'") 104 108 rayleigh_shear=1 105 109 rayleigh_friction_type=2 106 110 IF (is_master) PRINT *, 'Rayleigh friction : Schaer-like mountain with shear DCMIP2.2' 107 111 CASE('giant_liu_schneider') 112 CALL abort_acc("rayleigh_friction_type /= 'none'") 108 113 rayleigh_friction_type=99 109 114 IF (is_master) PRINT *, 'Rayleigh friction : giant planets Liu Schneider 2010' … … 130 135 131 136 CALL allocate_dissip 132 CALL allocate_field(f_u,field_u,type_real )133 CALL allocate_field(f_du,field_u,type_real )134 CALL allocate_field(f_theta,field_t,type_real )135 CALL allocate_field(f_dtheta,field_t,type_real )137 CALL allocate_field(f_u,field_u,type_real,ondevice=.TRUE.) 138 CALL allocate_field(f_du,field_u,type_real,ondevice=.TRUE.) 139 CALL allocate_field(f_theta,field_t,type_real,ondevice=.TRUE.) 140 CALL allocate_field(f_dtheta,field_t,type_real,ondevice=.TRUE.) 136 141 137 142 CALL init_message(f_due_diss1,req_e1_vect,req_due) … … 173 178 CALL RANDOM_SEED(put=(/(i,i=1,M)/)) 174 179 180 ! This cannot be ported on GPU due to compiler limitations 175 181 DO j=jj_begin,jj_end 176 182 DO i=ii_begin,ii_end … … 192 198 dumax=0 193 199 DO iter=1,nitergdiv 200 CALL update_device_field(f_u) 194 201 CALL transfert_request(f_u,req_e1_vect) 202 195 203 DO ind=1,ndomain 196 204 IF (.NOT. assigned_domain(ind) .OR. .NOT. is_omp_level_master) CYCLE … … 200 208 du=f_du(ind) 201 209 CALL compute_gradiv_inplace(u,1,1) 210 ! This should be ported on GPU but we are running into compiler issues... 211 !$acc update host(u(:)) wait 202 212 du=u 203 213 ENDDO 204 214 ENDDO 205 215 216 CALL update_device_field(f_du) 206 217 CALL transfert_request(f_du,req_e1_vect) 207 218 CALL update_host_field(f_du) 219 208 220 DO ind=1,ndomain 209 221 IF (.NOT. assigned_domain(ind) .OR. .NOT. is_omp_level_master) CYCLE 210 222 CALL swap_dimensions(ind) 211 223 CALL swap_geometry(ind) 212 u=f_u(ind)213 224 du=f_du(ind) 214 225 226 ! Not ported on GPU because all the other kernels cannot be ported 215 227 DO j=jj_begin,jj_end 216 228 DO i=ii_begin,ii_end … … 240 252 u=f_u(ind) 241 253 du=f_du(ind) 254 ! This should be ported on GPU but we are running into compiler issues... 242 255 u=du/dumax 243 256 ENDDO … … 265 278 CALL RANDOM_SEED(put=(/(i,i=1,M)/)) 266 279 280 ! This cannot be ported on GPU due to compiler limitations 267 281 DO j=jj_begin,jj_end 268 282 DO i=ii_begin,ii_end … … 280 294 !$OMP BARRIER 281 295 282 283 296 DO it=1,20 284 297 285 298 dumax=0 286 299 DO iter=1,nitergrot 300 CALL update_device_field(f_u) 287 301 CALL transfert_request(f_u,req_e1_vect) 288 302 DO ind=1,ndomain … … 293 307 du=f_du(ind) 294 308 CALL compute_gradrot_inplace(u,1,1) 309 ! This should be ported on GPU but we are running into compiler issues... 310 !$acc update host(u(:)) wait 295 311 du=u 296 312 ENDDO 297 313 ENDDO 298 314 315 CALL update_device_field(f_du) 299 316 CALL transfert_request(f_du,req_e1_vect) 317 CALL update_host_field(f_du) 300 318 301 319 DO ind=1,ndomain … … 303 321 CALL swap_dimensions(ind) 304 322 CALL swap_geometry(ind) 305 u=f_u(ind)306 323 du=f_du(ind) 307 324 325 ! Not ported on GPU because all the other kernels cannot be ported 308 326 DO j=jj_begin,jj_end 309 327 DO i=ii_begin,ii_end … … 334 352 u=f_u(ind) 335 353 du=f_du(ind) 354 ! This should be ported on GPU but we are running into compiler issues... 336 355 u=du/dumax 337 356 ENDDO … … 358 377 CALL RANDOM_SEED(put=(/(i,i=1,M)/)) 359 378 379 ! This cannot be ported on GPU due to compiler limitations 360 380 DO j=jj_begin,jj_end 361 381 DO i=ii_begin,ii_end … … 373 393 dthetamax=0 374 394 DO iter=1,niterdivgrad 395 CALL update_device_field(f_theta) 375 396 CALL transfert_request(f_theta,req_i1) 376 397 DO ind=1,ndomain … … 381 402 dtheta=f_dtheta(ind) 382 403 CALL compute_divgrad_inplace(theta,1,1) 404 ! This should be ported on GPU but we are running into compiler issues... 405 !$acc update host(theta(:)) wait 383 406 dtheta=theta 384 407 ENDDO 385 408 ENDDO 386 409 410 CALL update_device_field(f_dtheta) 387 411 CALL transfert_request(f_dtheta,req_i1) 412 CALL update_host_field(f_dtheta) 388 413 389 414 DO ind=1,ndomain … … 391 416 CALL swap_dimensions(ind) 392 417 CALL swap_geometry(ind) 393 theta=f_theta(ind)394 418 dtheta=f_dtheta(ind) 395 419 420 ! Not ported on GPU because all the other kernels cannot be ported 396 421 DO j=jj_begin,jj_end 397 422 DO i=ii_begin,ii_end … … 421 446 theta=f_theta(ind) 422 447 dtheta=f_dtheta(ind) 448 ! This should be ported on GPU but we are running into compiler issues... 423 449 theta=dtheta/dthetamax 424 450 ENDDO … … 489 515 ENDIF 490 516 517 !$acc update device(tau_graddiv(:),tau_gradrot(:),tau_divgrad(:)) async 518 491 519 END SUBROUTINE init_dissip 492 520 … … 501 529 USE time_mod 502 530 USE omp_para 531 USE abort_mod 503 532 IMPLICIT NONE 504 533 TYPE(t_field),POINTER :: f_ps(:), f_mass(:), f_phis(:), f_geopot(:) … … 534 563 dtheta_rhodz_diss=f_dtheta_rhodz_diss(ind) 535 564 565 !$acc parallel loop collapse(2) present(due(:,:), dtheta_rhodz(:,:,:), due_diss1(:,:), due_diss2(:,:), dtheta_rhodz_diss(:,:), tau_graddiv(:), tau_gradrot(:), tau_divgrad(:)) async 536 566 DO l=ll_begin,ll_end 537 567 !DIR$ SIMD … … 545 575 ENDDO 546 576 ENDDO 577 !$acc end parallel loop 547 578 548 579 ! dtheta_rhodz=0 … … 550 581 551 582 IF(rayleigh_friction_type>0) THEN 583 CALL abort_acc("dissip/rayleigh_friction_type>0") 552 584 IF(rayleigh_friction_type<99) THEN 553 585 phi=f_geopot(ind) … … 562 594 ELSE 563 595 ue=f_ue(ind) 596 !$acc parallel loop present(ue(:,:), due(:,:), lat_e(:)) 564 597 DO ij=ij_begin,ij_end 565 598 nn = ij+u_right … … 577 610 ENDIF 578 611 ENDDO 612 !$acc end parallel loop 579 613 ENDIF 580 614 END IF 581 615 END DO 582 583 616 CALL trace_end("dissip") 584 617 585 !CALL write_dissip_tendencies618 !CALL write_dissip_tendencies 586 619 !$OMP BARRIER 587 620 … … 653 686 ue=f_ue(ind) 654 687 due=f_due(ind) 688 !$acc parallel loop present(ue(:,:), due(:,:)) async 655 689 DO l = ll_begin, ll_end 690 !$acc loop 656 691 !DIR$ SIMD 657 692 DO ij=ij_begin,ij_end … … 664 699 665 700 DO it=1,nitergdiv 666 667 701 CALL send_message(f_due,req_due) 668 702 CALL wait_message(req_due) 669 703 670 704 DO ind=1,ndomain 671 705 IF (.NOT. assigned_domain(ind)) CYCLE 672 706 CALL swap_dimensions(ind) 673 707 CALL swap_geometry(ind) 674 due=f_due(ind) 708 due=f_due(ind) 675 709 CALL compute_gradiv_inplace(due,ll_begin,ll_end) 676 710 ENDDO … … 702 736 ue=f_ue(ind) 703 737 due=f_due(ind) 738 !$acc parallel loop present(ue(:,:), due(:,:)) async 704 739 DO l = ll_begin, ll_end 740 !$acc loop 705 741 !DIR$ SIMD 706 742 DO ij=ij_begin,ij_end … … 713 749 714 750 DO it=1,nitergrot 715 716 751 CALL send_message(f_due,req_due) 717 752 CALL wait_message(req_due) … … 740 775 REAL(rstd),POINTER :: theta(:,:) 741 776 REAL(rstd),POINTER :: dtheta(:,:) 742 INTEGER :: ind 777 INTEGER :: ind, l, ij 743 778 INTEGER :: it 744 779 … … 751 786 theta=f_theta(ind) 752 787 dtheta=f_dtheta(ind) 753 dtheta=theta 788 ! Replace Fortran 90 construct dtheta=theta because it confuses PGI acc kernels... 789 !$acc parallel loop collapse(2) present(theta(:,:), dtheta(:,:)) async 790 DO l=ll_begin,ll_end 791 !DIR$ SIMD 792 DO ij=1,iim*jjm 793 dtheta(ij,l)=theta(ij,l) 794 ENDDO 795 ENDDO 796 !$acc end parallel loop 754 797 ENDDO 755 798 756 799 DO it=1,niterdivgrad 757 758 800 CALL transfert_request(f_dtheta,req_i1) 759 801 … … 797 839 theta_rhodz=f_theta_rhodz(ind) 798 840 dtheta_rhodz=f_dtheta_rhodz(ind) 841 !$acc parallel loop present(mass(:,:), theta_rhodz(:,:,:), dtheta_rhodz(:,:)) async 799 842 DO l = ll_begin, ll_end 843 !$acc loop 800 844 !DIR$ SIMD 801 845 DO ij=ij_begin,ij_end … … 806 850 807 851 DO it=1,niterdivgrad 808 809 852 CALL send_message(f_dtheta_rhodz,req_dtheta) 810 853 CALL wait_message(req_dtheta) … … 826 869 mass=f_mass(ind) 827 870 871 !$acc parallel loop collapse(2) present(mass(:,:), dtheta_rhodz(:,:)) async 828 872 DO l = ll_begin, ll_end 829 873 !DIR$ SIMD … … 832 876 ENDDO 833 877 ENDDO 878 !$acc end parallel loop 834 879 ENDDO 835 880 … … 837 882 CALL trace_end("divgrad") 838 883 839 END SUBROUTINE divgrad_theta_rhodz 840 884 END SUBROUTINE divgrad_theta_rhodz 885 841 886 SUBROUTINE compute_gradiv(ue,gradivu_e,llb,lle) 842 USE icosa843 IMPLICIT NONE844 887 INTEGER,INTENT(IN) :: llb 845 888 INTEGER,INTENT(IN) :: lle 889 REAL(rstd),INTENT(OUT) :: gradivu_e(iim*3*jjm,llm) 846 890 REAL(rstd),INTENT(IN) :: ue(iim*3*jjm,llm) 847 REAL(rstd),INTENT(OUT) :: gradivu_e(iim*3*jjm,llm) 848 891 849 892 gradivu_e = ue 850 CALL compute_gradiv_inplace(gradivu_e, llb, lle) 893 CALL compute_gradiv_inplace(gradivu_e,llb,lle) 894 851 895 END SUBROUTINE compute_gradiv 852 896 853 897 SUBROUTINE compute_gradiv_inplace(ue_gradivu_e,llb,lle) 854 USE icosa 855 IMPLICIT NONE 898 USE geometry, ONLY : Ai, ne, le, de 856 899 INTEGER,INTENT(IN) :: llb 857 900 INTEGER,INTENT(IN) :: lle … … 861 904 INTEGER :: ij,l 862 905 906 ! ue and gradivu_e second dimension is not always llm so use the bounds explicitly 907 !$acc data present( ue_gradivu_e(:,llb:lle), Ai(:), ne(:,:), le(:), de(:)) create(divu_i(:,:)) async 908 909 !$acc parallel loop collapse(2) async 863 910 DO l=llb,lle 864 911 !DIR$ SIMD … … 872 919 ENDDO 873 920 ENDDO 921 !$acc end parallel loop 874 922 923 !$acc parallel loop collapse(2) async 875 924 DO l=llb,lle 876 925 !DIR$ SIMD … … 881 930 ENDDO 882 931 ENDDO 883 932 !$acc end parallel loop 933 934 !$acc parallel loop collapse(2) async 884 935 DO l=llb,lle 885 936 !DIR$ SIMD … … 889 940 ue_gradivu_e(ij+u_ldown,l)=-ue_gradivu_e(ij+u_ldown,l)*cgraddiv 890 941 ENDDO 891 ENDDO 942 ENDDO 943 !$acc end parallel loop 944 !$acc end data 892 945 END SUBROUTINE compute_gradiv_inplace 893 946 894 947 SUBROUTINE compute_divgrad(theta,divgrad_i,llb,lle) 895 USE icosa896 IMPLICIT NONE897 948 INTEGER,INTENT(IN) :: llb 898 949 INTEGER,INTENT(IN) :: lle 899 950 REAL(rstd),INTENT(IN) :: theta(iim*jjm,1:lle) 900 951 REAL(rstd),INTENT(OUT) :: divgrad_i(iim*jjm,1:lle) 901 952 902 953 divgrad_i = theta 903 CALL compute_divgrad_inplace(divgrad_i, llb,lle)904 END SUBROUTINE 954 CALL compute_divgrad_inplace(divgrad_i,llb,lle) 955 END SUBROUTINE compute_divgrad 905 956 906 957 SUBROUTINE compute_divgrad_inplace(theta_divgrad_i,llb,lle) 907 USE icosa 908 IMPLICIT NONE 958 USE geometry, ONLY : Ai, ne, le, de 909 959 INTEGER,INTENT(IN) :: llb 910 960 INTEGER,INTENT(IN) :: lle … … 912 962 REAL(rstd) :: grad_e(3*iim*jjm,llb:lle) 913 963 914 INTEGER :: ij,l 964 INTEGER :: ij,l 965 966 ! theta and divgrad_i second dimension is not always llm so use the bounds explicitly 967 !$acc data present(theta_divgrad_i(:,llb:lle), Ai(:), de(:), ne(:,:), le(:)) create(grad_e(:,:)) async 968 969 !$acc parallel loop collapse(2) async 915 970 DO l=llb,lle 916 971 !DIR$ SIMD … … 920 975 grad_e(ij+u_ldown,l)=-1/de(ij+u_ldown)*(ne(ij,ldown)*theta_divgrad_i(ij,l)+ne(ij+t_ldown,rup)*theta_divgrad_i(ij+t_ldown,l) ) 921 976 ENDDO 922 ENDDO 977 ENDDO 978 !$acc end parallel loop 923 979 980 981 !$acc parallel loop collapse(2) async 924 982 DO l=llb,lle 925 983 !DIR$ SIMD … … 933 991 ENDDO 934 992 ENDDO 993 !$acc end parallel loop 935 994 995 !$acc parallel loop collapse(2) async 936 996 DO l=llb,lle 937 997 DO ij=ij_begin,ij_end … … 939 999 ENDDO 940 1000 ENDDO 1001 !$acc end parallel loop 1002 !$acc end data 941 1003 END SUBROUTINE compute_divgrad_inplace 942 1004 … … 946 1008 REAL(rstd),INTENT(IN) :: ue(iim*3*jjm,lle) 947 1009 REAL(rstd),INTENT(OUT) :: gradrot_e(iim*3*jjm,lle) 948 1010 949 1011 gradrot_e = ue 950 1012 CALL compute_gradrot_inplace(gradrot_e,llb,lle) 951 END SUBROUTINE 952 1013 END SUBROUTINE compute_gradrot 1014 953 1015 SUBROUTINE compute_gradrot_inplace(ue_gradrot_e,llb,lle) 954 USE icosa 955 IMPLICIT NONE 1016 USE geometry, ONLY : Av, ne, le, de 956 1017 INTEGER,INTENT(IN) :: llb 957 1018 INTEGER,INTENT(IN) :: lle … … 961 1022 INTEGER :: ij,l 962 1023 1024 ! ue and gradrot_e second dimension is not always llm so use the bounds explicitly 1025 ! gradrot_e should be copyout but using copy instead allows to compare the output 1026 ! more easily as the code sometimes uses unintialed values 1027 !$acc data present(ue_gradrot_e(:,llb:lle), Av(:), ne(:,:), de(:), le(:)) create(rot_v(:,:)) async 1028 1029 !$acc parallel loop collapse(2) async 963 1030 DO l=llb,lle 964 1031 !DIR$ SIMD … … 972 1039 ENDDO 973 1040 ENDDO 974 1041 !$acc end parallel loop 1042 1043 !$acc parallel loop collapse(2) async 975 1044 DO l=llb,lle 976 1045 !DIR$ SIMD … … 981 1050 ENDDO 982 1051 ENDDO 983 1052 !$acc end parallel loop 1053 1054 !$acc parallel loop collapse(2) async 984 1055 DO l=llb,lle 985 1056 !DIR$ SIMD … … 989 1060 ue_gradrot_e(ij+u_ldown,l)=-ue_gradrot_e(ij+u_ldown,l)*cgradrot 990 1061 ENDDO 991 ENDDO 1062 ENDDO 1063 !$acc end parallel loop 1064 !$acc end data 992 1065 END SUBROUTINE compute_gradrot_inplace 993 1066 994 1067 995 1068 END MODULE dissip_gcm_mod 996 -
codes/icosagcm/trunk/src/dissip/sponge.f90
r548 r953 24 24 USE omp_para 25 25 USE mpipara, ONLY: is_mpi_master 26 USE abort_mod 26 27 IMPLICIT NONE 27 28 INTEGER :: l … … 43 44 RETURN 44 45 ENDIF 46 47 IF (iflag_sponge > 0) THEN 48 CALL abort_acc("iflag_sponge > 0") 49 END IF 45 50 46 51 !$OMP MASTER -
codes/icosagcm/trunk/src/dynamics/caldyn_gcm.F90
r899 r953 15 15 16 16 SUBROUTINE init_caldyn 17 USE abort_mod 17 18 CHARACTER(len=255) :: def 18 19 INTEGER :: ind … … 21 22 hydrostatic=.TRUE. 22 23 CALL getin("hydrostatic",hydrostatic) 23 24 25 IF (.NOT. hydrostatic) THEN 26 CALL abort_acc("hydrostatic /= .TRUE.") 27 END IF 28 24 29 dysl=.NOT.hydrostatic ! dysl defaults to .FALSE. if hydrostatic, .TRUE. if NH 25 30 CALL getin("dysl",dysl) … … 34 39 dysl_caldyn_coriolis=dysl 35 40 CALL getin("dysl_caldyn_coriolis",dysl_caldyn_coriolis) 41 42 ! dysl is not supported with openACC 43 IF (dysl_geopot .OR. dysl_caldyn_fast .OR. dysl_slow_hydro .OR. dysl_pvort_only .OR. dysl_caldyn_coriolis) THEN 44 CALL abort_acc("dysl not supported") 45 END IF 36 46 37 47 def='energy' … … 122 132 SUBROUTINE allocate_caldyn 123 133 CALL allocate_field(f_out_u,field_u,type_real,llm) 124 CALL allocate_field(f_qu,field_u,type_real,llm )125 CALL allocate_field(f_qv,field_z,type_real,llm )126 CALL allocate_field(f_pk, field_t,type_real,llm, name='pk' )127 CALL allocate_field(f_wwuu, field_u,type_real,llm+1,name='wwuu' )134 CALL allocate_field(f_qu,field_u,type_real,llm, ondevice=.TRUE.) 135 CALL allocate_field(f_qv,field_z,type_real,llm, ondevice=.TRUE.) 136 CALL allocate_field(f_pk, field_t,type_real,llm, name='pk', ondevice=.TRUE.) 137 CALL allocate_field(f_wwuu, field_u,type_real,llm+1,name='wwuu', ondevice=.TRUE.) 128 138 CALL allocate_field(f_planetvel, field_u,type_real, name='planetvel') ! planetary velocity at r=a 129 139 IF(.NOT.hydrostatic) THEN -
codes/icosagcm/trunk/src/dynamics/caldyn_hevi.F90
r933 r953 26 26 USE output_field_mod 27 27 USE checksum_mod 28 USE abort_mod 28 29 IMPLICIT NONE 29 30 LOGICAL,INTENT(IN) :: write_out … … 86 87 CALL wait_message(req_ps) ! COM00 87 88 ELSE 89 CALL abort_acc("HEVI_scheme/!eta_mass") 88 90 CALL send_message(f_mass,req_mass) ! COM00 89 91 CALL wait_message(req_mass) ! COM00 … … 93 95 94 96 IF(.NOT.hydrostatic) THEN 97 CALL abort_acc("HEVI_scheme/!hydrostatic") 95 98 CALL send_message(f_geopot,req_geopot) ! COM03 96 99 CALL wait_message(req_geopot) ! COM03 … … 112 115 du=f_du_fast(ind) 113 116 IF(hydrostatic) THEN 114 du(:,:)=0. 117 !$acc kernels present(du) async 118 du(:,:)=0.0d0 119 !$acc end kernels 115 120 CALL compute_geopot(mass,theta, ps,pk,geopot) 116 121 ELSE 122 CALL abort_acc("HEVI_scheme/!hydrostatic") 117 123 phis = f_phis(ind) 118 124 W = f_W(ind) … … 161 167 CALL compute_caldyn_slow_hydro(u,mass,hflux,du, .TRUE.) 162 168 ELSE 169 CALL abort_acc("HEVI_scheme/!hydrostatic") 163 170 W = f_W(ind) 164 171 dW = f_dW_slow(ind) … … 170 177 CALL compute_caldyn_slow_NH(u,mass,geopot,W, F_el,gradPhi2,w_il, hflux,du,dPhi,dW) 171 178 END IF 172 CALL compute_caldyn_Coriolis(hflux,theta,qu, convm,dtheta_rhodz,du) 179 CALL compute_caldyn_Coriolis(hflux,theta,qu,convm,dtheta_rhodz,du) 180 173 181 IF(caldyn_eta==eta_mass) THEN 174 182 wflux=f_wflux(ind) … … 177 185 CALL compute_caldyn_vert(u,theta,mass,convm, wflux,wwuu, dps, dtheta_rhodz, du) 178 186 IF(.NOT.hydrostatic) THEN 187 CALL abort_acc("HEVI_scheme/!hydrostatic") 179 188 W_etadot=f_Wetadot(ind) 180 189 CALL compute_caldyn_vert_NH(mass,geopot,W,wflux, W_etadot, du,dPhi,dW) -
codes/icosagcm/trunk/src/dynamics/caldyn_kernels_base.F90
r899 r953 5 5 USE omp_para 6 6 USE trace 7 USE abort_mod 7 8 IMPLICIT NONE 8 9 PRIVATE … … 25 26 26 27 !**************************** Geopotential ***************************** 27 28 28 SUBROUTINE compute_geopot(rhodz,theta, ps,pk,geopot) 29 29 REAL(rstd),INTENT(IN) :: rhodz(iim*jjm,llm) … … 46 46 47 47 IF(dysl_geopot) THEN 48 CALL abort_acc("HEVI_scheme/!dysl_geopot") 48 49 #include "../kernels/compute_geopot.k90" 49 50 ELSE … … 52 53 ! Works also when caldyn_eta=eta_mass 53 54 55 !$acc data present(rhodz(:,:), ps(:), pk(:,:), geopot(:,:), theta(:,:,:)) async 56 54 57 IF(boussinesq) THEN ! compute geopotential and pk=Lagrange multiplier 58 CALL abort_acc("HEVI_scheme/boussinesq") 55 59 ! specific volume 1 = dphi/g/rhodz 56 60 ! IF (is_omp_level_master) THEN ! no openMP on vertical due to dependency 57 61 DO l = 1,llm 62 !$acc parallel loop 58 63 !DIR$ SIMD 59 64 DO ij=ij_omp_begin_ext,ij_omp_end_ext 60 65 geopot(ij,l+1) = geopot(ij,l) + g*rhodz(ij,l) 61 66 ENDDO 67 !$acc end parallel loop 62 68 ENDDO 63 69 ! use hydrostatic balance with theta*rhodz to find pk (Lagrange multiplier=pressure) 64 70 ! uppermost layer 71 !$acc parallel loop 65 72 !DIR$ SIMD 66 73 DO ij=ij_begin_ext,ij_end_ext 67 74 pk(ij,llm) = ptop + (.5*g)*theta(ij,llm,1)*rhodz(ij,llm) 68 75 END DO 76 !$acc end parallel loop 69 77 ! other layers 70 78 DO l = llm-1, 1, -1 71 79 ! !$OMP DO SCHEDULE(STATIC) 80 !$acc parallel loop 72 81 !DIR$ SIMD 73 82 DO ij=ij_begin_ext,ij_end_ext 74 83 pk(ij,l) = pk(ij,l+1) + (.5*g)*(theta(ij,l,1)*rhodz(ij,l)+theta(ij,l+1,1)*rhodz(ij,l+1)) 75 84 END DO 85 !$acc end parallel loop 76 86 END DO 77 87 ! now pk contains the Lagrange multiplier (pressure) … … 81 91 SELECT CASE(caldyn_thermo) 82 92 CASE(thermo_theta, thermo_entropy) 93 !$acc parallel loop async 83 94 !DIR$ SIMD 84 95 DO ij=ij_omp_begin_ext,ij_omp_end_ext 85 96 pk(ij,llm) = ptop + (.5*g)*rhodz(ij,llm) 86 97 END DO 98 87 99 ! other layers 100 ! We use kernels here instead of "loop seq" + "loop gang vector" 101 ! to be sure the code really abides the standard. In practice, 102 ! it seems like the compiler interchanges the loops. 103 !$acc kernels async 88 104 DO l = llm-1, 1, -1 89 105 !DIR$ SIMD … … 92 108 END DO 93 109 END DO 110 !$acc end kernels 94 111 ! surface pressure (for diagnostics) 95 112 IF(caldyn_eta==eta_lag) THEN 113 !$acc parallel loop async 96 114 DO ij=ij_omp_begin_ext,ij_omp_end_ext 97 115 ps(ij) = pk(ij,1) + (.5*g)*rhodz(ij,1) … … 99 117 END IF 100 118 CASE(thermo_moist) ! theta(ij,l,2) = qv = mv/md 119 CALL abort_acc("HEVI_scheme/thermo_moist") 120 !$acc parallel loop 101 121 !DIR$ SIMD 102 122 DO ij=ij_omp_begin_ext,ij_omp_end_ext 103 123 pk(ij,llm) = ptop + (.5*g)*rhodz(ij,llm)*(1.+theta(ij,l,2)) 104 124 END DO 125 !$acc end parallel loop 126 105 127 ! other layers 106 128 DO l = llm-1, 1, -1 129 !$acc parallel loop 107 130 !DIR$ SIMD 108 131 DO ij=ij_omp_begin_ext,ij_omp_end_ext … … 111 134 rhodz(ij,l+1)*(1.+theta(ij,l+1,2)) ) 112 135 END DO 136 !$acc end parallel loop 113 137 END DO 114 138 ! surface pressure (for diagnostics) 115 139 IF(caldyn_eta==eta_lag) THEN 140 !$acc parallel loop 116 141 DO ij=ij_omp_begin_ext,ij_omp_end_ext 117 142 ps(ij) = pk(ij,1) + (.5*g)*rhodz(ij,1)*(1.+theta(ij,l,2)) 118 143 END DO 144 !$acc end parallel loop 119 145 END IF 120 146 END SELECT 121 147 122 DO l = 1,llm 123 SELECT CASE(caldyn_thermo) 148 SELECT CASE(caldyn_thermo) 124 149 CASE(thermo_theta) 125 !DIR$ SIMD 126 DO ij=ij_omp_begin_ext,ij_omp_end_ext 127 p_ik = pk(ij,l) 128 exner_ik = cpp * (p_ik/preff) ** kappa 129 pk(ij,l) = exner_ik 130 ! specific volume v = kappa*theta*pi/p = dphi/g/rhodz 131 geopot(ij,l+1) = geopot(ij,l) + (g*kappa)*rhodz(ij,l)*theta(ij,l,1)*exner_ik/p_ik 132 ENDDO 150 ! We use kernels here instead of "loop seq" + "loop gang vector" 151 ! to be sure the code really abides the standard. In practice, 152 ! it seems like the compiler interchanges the loops. 153 !$acc kernels async 154 DO l = 1,llm 155 !DIR$ SIMD 156 DO ij=ij_omp_begin_ext,ij_omp_end_ext 157 p_ik = pk(ij,l) 158 exner_ik = cpp * (p_ik/preff) ** kappa 159 pk(ij,l) = exner_ik 160 ! specific volume v = kappa*theta*pi/p = dphi/g/rhodz 161 geopot(ij,l+1) = geopot(ij,l) + (g*kappa)*rhodz(ij,l)*theta(ij,l,1)*exner_ik/p_ik 162 ENDDO 163 ENDDO 164 !$acc end kernels 165 133 166 CASE(thermo_entropy) ! theta is in fact entropy = cpp*log(theta/Treff) = cpp*log(T/Treff) - Rd*log(p/preff) 134 !DIR$ SIMD 135 DO ij=ij_omp_begin_ext,ij_omp_end_ext 136 p_ik = pk(ij,l) 137 temp_ik = Treff*exp((theta(ij,l,1) + Rd*log(p_ik/preff))/cpp) 138 pk(ij,l) = temp_ik 139 ! specific volume v = Rd*T/p = dphi/g/rhodz 140 geopot(ij,l+1) = geopot(ij,l) + (g*Rd)*rhodz(ij,l)*temp_ik/p_ik 167 CALL abort_acc("HEVI_scheme/thermo_entropy") 168 DO l = 1,llm 169 !$acc parallel loop 170 !DIR$ SIMD 171 DO ij=ij_omp_begin_ext,ij_omp_end_ext 172 p_ik = pk(ij,l) 173 temp_ik = Treff*exp((theta(ij,l,1) + Rd*log(p_ik/preff))/cpp) 174 pk(ij,l) = temp_ik 175 ! specific volume v = Rd*T/p = dphi/g/rhodz 176 geopot(ij,l+1) = geopot(ij,l) + (g*Rd)*rhodz(ij,l)*temp_ik/p_ik 177 ENDDO 178 !$acc end parallel loop 141 179 ENDDO 142 180 CASE(thermo_moist) ! theta is moist pseudo-entropy per dry air mass 143 DO ij=ij_omp_begin_ext,ij_omp_end_ext 144 p_ik = pk(ij,l) 145 qv = theta(ij,l,2) ! water vaper mixing ratio = mv/md 146 Rmix = Rd+qv*Rv 147 chi = ( theta(ij,l,1) + Rmix*log(p_ik/preff) ) / (cpp + qv*cppv) ! log(T/Treff) 148 temp_ik = Treff*exp(chi) 149 pk(ij,l) = temp_ik 150 ! specific volume v = R*T/p = dphi/g/rhodz 151 ! R = (Rd + qv.Rv)/(1+qv) 152 geopot(ij,l+1) = geopot(ij,l) + g*Rmix*rhodz(ij,l)*temp_ik/(p_ik*(1+qv)) 181 CALL abort_acc("HEVI_scheme/thermo_moist") 182 DO l = 1,llm 183 !$acc parallel loop 184 DO ij=ij_omp_begin_ext,ij_omp_end_ext 185 p_ik = pk(ij,l) 186 qv = theta(ij,l,2) ! water vaper mixing ratio = mv/md 187 Rmix = Rd+qv*Rv 188 chi = ( theta(ij,l,1) + Rmix*log(p_ik/preff) ) / (cpp + qv*cppv) ! log(T/Treff) 189 temp_ik = Treff*exp(chi) 190 pk(ij,l) = temp_ik 191 ! specific volume v = R*T/p = dphi/g/rhodz 192 ! R = (Rd + qv.Rv)/(1+qv) 193 geopot(ij,l+1) = geopot(ij,l) + g*Rmix*rhodz(ij,l)*temp_ik/(p_ik*(1+qv)) 194 ENDDO 195 !$acc end parallel loop 153 196 ENDDO 154 197 CASE DEFAULT 155 198 STOP 156 END SELECT 157 ENDDO 199 END SELECT 158 200 END IF 159 201 !$acc end data 160 202 END IF ! dysl 161 203 … … 167 209 END SUBROUTINE compute_geopot 168 210 169 SUBROUTINE compute_caldyn_vert(u,theta,rhodz,convm, wflux,wwuu, dps,dtheta_rhodz,du) 211 SUBROUTINE compute_caldyn_vert(u, theta, rhodz, convm, wflux, wwuu, dps, dtheta_rhodz, du) 212 USE disvert_mod, ONLY : bp 170 213 REAL(rstd),INTENT(IN) :: u(iim*3*jjm,llm) 171 214 REAL(rstd),INTENT(IN) :: theta(iim*jjm,llm,nqdyn) … … 193 236 CALL trace_start("compute_caldyn_vert") 194 237 238 !$acc data async & 239 !$acc present(rhodz(:,:), u(:,:), wwuu(:,:), wflux(:,:), dps(:), convm(:,:), du(:,:), dtheta_rhodz(:,:,:), theta(:,:,:), bp(:)) 240 241 195 242 !$OMP BARRIER 196 243 !!! cumulate mass flux convergence from top to bottom 197 244 ! IF (is_omp_level_master) THEN 245 ! We use kernels here instead of "loop seq" + "loop gang vector" 246 ! to be sure the code really abides the standard. In practice, 247 ! it seems like the compiler interchanges the loops. 248 !$acc kernels async 198 249 DO l = llm-1, 1, -1 199 250 ! IF (caldyn_conserv==energy) CALL test_message(req_qu) … … 205 256 ENDDO 206 257 ENDDO 258 !$acc end kernels 207 259 ! ENDIF 208 260 … … 213 265 ! compute dps 214 266 IF (is_omp_first_level) THEN 267 !$acc parallel loop async 215 268 !DIR$ SIMD 216 269 DO ij=ij_begin,ij_end … … 221 274 222 275 !!! Compute vertical mass flux (l=1,llm+1 done by caldyn_BC) 276 !$acc parallel loop collapse(2) async 223 277 DO l=ll_beginp1,ll_end 224 278 ! IF (caldyn_conserv==energy) CALL test_message(req_qu) … … 233 287 !--> flush wflux 234 288 !$OMP BARRIER 235 289 !$acc parallel loop collapse(3) async 236 290 DO iq=1,nqdyn 237 291 DO l=ll_begin,ll_endm1 … … 242 296 END DO 243 297 END DO 298 END DO 299 300 !$acc parallel loop collapse(3) async 301 DO iq=1,nqdyn 244 302 DO l=ll_beginp1,ll_end 245 303 !DIR$ SIMD … … 252 310 253 311 ! Compute vertical transport 312 !$acc parallel loop collapse(2) async 254 313 DO l=ll_beginp1,ll_end 255 314 !DIR$ SIMD … … 265 324 266 325 ! Add vertical transport to du 326 !$acc parallel loop collapse(2) async 267 327 DO l=ll_begin,ll_end 268 328 !DIR$ SIMD … … 283 343 ! ENDDO 284 344 345 !$acc end data 285 346 CALL trace_end("compute_caldyn_vert") 286 347 -
codes/icosagcm/trunk/src/dynamics/caldyn_kernels_hevi.F90
r899 r953 6 6 USE transfert_mod 7 7 USE caldyn_kernels_base_mod 8 USE abort_mod 8 9 IMPLICIT NONE 9 10 PRIVATE … … 20 21 21 22 SUBROUTINE compute_theta(ps,theta_rhodz, rhodz,theta) 23 USE disvert_mod, ONLY : mass_dbk, mass_dak 22 24 REAL(rstd),INTENT(IN) :: ps(iim*jjm) 23 25 REAL(rstd),INTENT(IN) :: theta_rhodz(iim*jjm,llm,nqdyn) … … 26 28 INTEGER :: ij,l,iq 27 29 REAL(rstd) :: m 30 28 31 CALL trace_start("compute_theta") 29 32 33 !$acc data async & 34 !$acc present(ps(:), theta_rhodz(:,:,:), rhodz(:,:), theta(:,:,:), mass_dak(:), mass_dbk(:)) 35 30 36 IF(caldyn_eta==eta_mass) THEN ! Compute mass 37 !$acc parallel loop collapse(2) async 31 38 DO l = ll_begin,ll_end 32 39 !DIR$ SIMD … … 38 45 END IF 39 46 40 DO l = ll_begin,ll_end 41 DO iq=1,nqdyn 47 !$acc parallel loop collapse(3) async 48 DO iq=1,nqdyn 49 DO l = ll_begin,ll_end 42 50 !DIR$ SIMD 43 51 DO ij=ij_begin_ext,ij_end_ext … … 47 55 END DO 48 56 57 !$acc end data 49 58 CALL trace_end("compute_theta") 50 59 END SUBROUTINE compute_theta 51 60 52 61 SUBROUTINE compute_pvort_only(u,rhodz,qu,qv) 62 USE geometry, ONLY : Av, Riv2, fv 53 63 REAL(rstd),INTENT(IN) :: u(iim*3*jjm,llm) 54 64 REAL(rstd),INTENT(INOUT) :: rhodz(iim*jjm,llm) … … 61 71 CALL trace_start("compute_pvort_only") 62 72 !!! Compute shallow-water potential vorticity 73 63 74 IF(dysl_pvort_only) THEN 75 CALL abort_acc("HEVI_scheme/dysl_pvort_only") 64 76 #include "../kernels/pvort_only.k90" 65 77 ELSE 66 78 79 !$acc data async & 80 !$acc present(rhodz(:,:), u(:,:), qu(:,:), qv(:,:), Av(:), Riv2(:,:), fv(:)) 81 67 82 radius_m2=radius**(-2) 83 !$acc parallel loop collapse(2) async 68 84 DO l = ll_begin,ll_end 69 85 !DIR$ SIMD … … 85 101 qv(ij+z_down,l) =( etav+fv(ij+z_down) )/hv 86 102 ENDDO 87 103 END DO 104 105 !$acc parallel loop collapse(2) async 106 DO l = ll_begin,ll_end 88 107 !DIR$ SIMD 89 108 DO ij=ij_begin,ij_end … … 94 113 95 114 ENDDO 115 116 !$acc end data 96 117 97 118 END IF ! dysl 119 98 120 CALL trace_end("compute_pvort_only") 99 121 … … 409 431 410 432 IF(dysl_caldyn_fast) THEN 433 CALL abort_acc("HEVI_scheme/dysl_caldyn_fast") 411 434 #include "../kernels/caldyn_fast.k90" 412 435 ELSE 413 436 414 ! Compute Bernoulli term 415 IF(boussinesq) THEN 416 DO l=ll_begin,ll_end 417 !DIR$ SIMD 418 DO ij=ij_begin,ij_end 419 berni(ij,l) = pk(ij,l) 420 ! from now on pk contains the vertically-averaged geopotential 421 pk(ij,l) = .5*(geopot(ij,l)+geopot(ij,l+1)) 437 438 ! Default case : ported to openacc 439 IF(.not. boussinesq .and. caldyn_thermo /= thermo_moist) THEN 440 !$acc data async & 441 !$acc present(rhodz(:,:), u(:,:),pk(:,:), geopot(:,:), theta(:,:,:), du(:,:)) 442 443 #define BERNI(ij) .5*(geopot(ij,l)+geopot(ij,l+1)) 444 !$acc parallel loop collapse(2) async 445 DO l=ll_begin,ll_end 446 !DIR$ SIMD 447 DO ij=ij_begin,ij_end 448 due_right = 0.5*(theta(ij,l,1)+theta(ij+t_right,l,1)) & 449 *(pk(ij+t_right,l)-pk(ij,l)) & 450 + BERNI(ij+t_right)-BERNI(ij) 451 due_lup = 0.5*(theta(ij,l,1)+theta(ij+t_lup,l,1)) & 452 *(pk(ij+t_lup,l)-pk(ij,l)) & 453 + BERNI(ij+t_lup)-BERNI(ij) 454 due_ldown = 0.5*(theta(ij,l,1)+theta(ij+t_ldown,l,1)) & 455 *(pk(ij+t_ldown,l)-pk(ij,l)) & 456 + BERNI(ij+t_ldown)-BERNI(ij) 457 du(ij+u_right,l) = du(ij+u_right,l) - ne_right*due_right 458 du(ij+u_lup,l) = du(ij+u_lup,l) - ne_lup*due_lup 459 du(ij+u_ldown,l) = du(ij+u_ldown,l) - ne_ldown*due_ldown 460 u(ij+u_right,l) = u(ij+u_right,l) + tau*du(ij+u_right,l) 461 u(ij+u_lup,l) = u(ij+u_lup,l) + tau*du(ij+u_lup,l) 462 u(ij+u_ldown,l) = u(ij+u_ldown,l) + tau*du(ij+u_ldown,l) 463 END DO 422 464 END DO 423 END DO 424 ELSE ! compressible 425 426 DO l=ll_begin,ll_end 427 SELECT CASE(caldyn_thermo) 428 CASE(thermo_theta) ! vdp = theta.dpi => B = Phi 429 !DIR$ SIMD 430 DO ij=ij_begin,ij_end 431 berni(ij,l) = .5*(geopot(ij,l)+geopot(ij,l+1)) 432 END DO 433 CASE(thermo_entropy) ! vdp = dG + sdT => B = Phi + G, G=h-Ts=T*(cpp-s) 434 !DIR$ SIMD 435 DO ij=ij_begin,ij_end 436 berni(ij,l) = .5*(geopot(ij,l)+geopot(ij,l+1)) & 437 + pk(ij,l)*(cpp-theta(ij,l,1)) ! pk=temperature, theta=entropy 438 END DO 439 CASE(thermo_moist) 440 !DIR$ SIMD 441 DO ij=ij_begin,ij_end 442 ! du/dt = grad(Bd)+rv.grad(Bv)+s.grad(T) 443 ! Bd = Phi + gibbs_d 444 ! Bv = Phi + gibbs_v 445 ! pk=temperature, theta=entropy 446 qv = theta(ij,l,2) 447 temp = pk(ij,l) 448 chi = log(temp/Treff) 449 nu = (chi*(cpp+qv*cppv)-theta(ij,l,1))/(Rd+qv*Rv) ! log(p/preff) 450 berni(ij,l) = .5*(geopot(ij,l)+geopot(ij,l+1)) & 451 + temp*(cpp*(1.-chi)+Rd*nu) 452 berniv(ij,l) = .5*(geopot(ij,l)+geopot(ij,l+1)) & 453 + temp*(cppv*(1.-chi)+Rv*nu) 454 END DO 455 END SELECT 456 END DO 457 458 END IF ! Boussinesq/compressible 465 466 #undef BERNI 467 468 !$acc end data 469 ELSE ! Not default case : not ported to openacc 470 CALL abort_acc("HEVI_scheme/compute_caldyn_fast") 471 !!$acc parallel loop gang private(berni(:,:), berniv(:,:)) async 472 DO l=ll_begin,ll_end 473 ! Compute Bernoulliterm 474 IF(boussinesq) THEN 475 !!$acc loop vector 476 !DIR$ SIMD 477 DO ij=ij_begin,ij_end 478 berni(ij,l) = pk(ij,l) 479 ! from now on pk contains the vertically-averaged geopotential 480 pk(ij,l) = .5*(geopot(ij,l)+geopot(ij,l+1)) 481 END DO 482 ELSE ! compressible 483 SELECT CASE(caldyn_thermo) 484 CASE(thermo_theta) ! vdp = theta.dpi => B = Phi 485 CALL dynamico_abort("Case already dealt with") 486 CASE(thermo_entropy) ! vdp = dG + sdT => B = Phi + G, G=h-Ts=T*(cpp-s) 487 !$acc loop vector 488 !DIR$ SIMD 489 DO ij=ij_begin,ij_end 490 berni(ij,l) = .5*(geopot(ij,l)+geopot(ij,l+1)) & 491 + pk(ij,l)*(cpp-theta(ij,l,1)) ! pk=temperature, theta=entropy 492 END DO 493 CASE(thermo_moist) 494 !!$acc loop vector 495 !DIR$ SIMD 496 DO ij=ij_begin,ij_end 497 ! du/dt = grad(Bd)+rv.grad(Bv)+s.grad(T) 498 ! Bd = Phi + gibbs_d 499 ! Bv = Phi + gibbs_v 500 ! pk=temperature, theta=entropy 501 qv = theta(ij,l,2) 502 temp = pk(ij,l) 503 chi = log(temp/Treff) 504 nu = (chi*(cpp+qv*cppv)-theta(ij,l,1))/(Rd+qv*Rv) ! log(p/preff) 505 berni(ij,l) = .5*(geopot(ij,l)+geopot(ij,l+1)) & 506 + temp*(cpp*(1.-chi)+Rd*nu) 507 berniv(ij,l) = .5*(geopot(ij,l)+geopot(ij,l+1)) & 508 + temp*(cppv*(1.-chi)+Rv*nu) 509 END DO 510 END SELECT 511 END IF ! Boussinesq/compressible 459 512 460 513 !!! u:=u+tau*du, du = -grad(B)-theta.grad(pi) 461 DO l=ll_begin,ll_end 462 IF(caldyn_thermo == thermo_moist) THEN 463 !DIR$ SIMD 464 DO ij=ij_begin,ij_end 465 due_right = berni(ij+t_right,l)-berni(ij,l) & 466 + 0.5*(theta(ij,l,1)+theta(ij+t_right,l,1)) & 467 *(pk(ij+t_right,l)-pk(ij,l)) & 468 + 0.5*(theta(ij,l,2)+theta(ij+t_right,l,2)) & 469 *(berniv(ij+t_right,l)-berniv(ij,l)) 470 471 due_lup = berni(ij+t_lup,l)-berni(ij,l) & 472 + 0.5*(theta(ij,l,1)+theta(ij+t_lup,l,1)) & 473 *(pk(ij+t_lup,l)-pk(ij,l)) & 474 + 0.5*(theta(ij,l,2)+theta(ij+t_lup,l,2)) & 475 *(berniv(ij+t_lup,l)-berniv(ij,l)) 476 477 due_ldown = berni(ij+t_ldown,l)-berni(ij,l) & 478 + 0.5*(theta(ij,l,1)+theta(ij+t_ldown,l,1)) & 479 *(pk(ij+t_ldown,l)-pk(ij,l)) & 480 + 0.5*(theta(ij,l,2)+theta(ij+t_ldown,l,2)) & 481 *(berniv(ij+t_ldown,l)-berniv(ij,l)) 482 483 du(ij+u_right,l) = du(ij+u_right,l) - ne_right*due_right 484 du(ij+u_lup,l) = du(ij+u_lup,l) - ne_lup*due_lup 485 du(ij+u_ldown,l) = du(ij+u_ldown,l) - ne_ldown*due_ldown 486 u(ij+u_right,l) = u(ij+u_right,l) + tau*du(ij+u_right,l) 487 u(ij+u_lup,l) = u(ij+u_lup,l) + tau*du(ij+u_lup,l) 488 u(ij+u_ldown,l) = u(ij+u_ldown,l) + tau*du(ij+u_ldown,l) 489 END DO 490 ELSE 491 !DIR$ SIMD 492 DO ij=ij_begin,ij_end 493 due_right = 0.5*(theta(ij,l,1)+theta(ij+t_right,l,1)) & 494 *(pk(ij+t_right,l)-pk(ij,l)) & 495 + berni(ij+t_right,l)-berni(ij,l) 496 due_lup = 0.5*(theta(ij,l,1)+theta(ij+t_lup,l,1)) & 497 *(pk(ij+t_lup,l)-pk(ij,l)) & 498 + berni(ij+t_lup,l)-berni(ij,l) 499 due_ldown = 0.5*(theta(ij,l,1)+theta(ij+t_ldown,l,1)) & 500 *(pk(ij+t_ldown,l)-pk(ij,l)) & 501 + berni(ij+t_ldown,l)-berni(ij,l) 502 du(ij+u_right,l) = du(ij+u_right,l) - ne_right*due_right 503 du(ij+u_lup,l) = du(ij+u_lup,l) - ne_lup*due_lup 504 du(ij+u_ldown,l) = du(ij+u_ldown,l) - ne_ldown*due_ldown 505 u(ij+u_right,l) = u(ij+u_right,l) + tau*du(ij+u_right,l) 506 u(ij+u_lup,l) = u(ij+u_lup,l) + tau*du(ij+u_lup,l) 507 u(ij+u_ldown,l) = u(ij+u_ldown,l) + tau*du(ij+u_ldown,l) 514 IF(caldyn_thermo == thermo_moist) THEN 515 !!$acc loop vector 516 !DIR$ SIMD 517 DO ij=ij_begin,ij_end 518 due_right = berni(ij+t_right,l)-berni(ij,l) & 519 + 0.5*(theta(ij,l,1)+theta(ij+t_right,l,1)) & 520 *(pk(ij+t_right,l)-pk(ij,l)) & 521 + 0.5*(theta(ij,l,2)+theta(ij+t_right,l,2)) & 522 *(berniv(ij+t_right,l)-berniv(ij,l)) 523 524 due_lup = berni(ij+t_lup,l)-berni(ij,l) & 525 + 0.5*(theta(ij,l,1)+theta(ij+t_lup,l,1)) & 526 *(pk(ij+t_lup,l)-pk(ij,l)) & 527 + 0.5*(theta(ij,l,2)+theta(ij+t_lup,l,2)) & 528 *(berniv(ij+t_lup,l)-berniv(ij,l)) 529 530 due_ldown = berni(ij+t_ldown,l)-berni(ij,l) & 531 + 0.5*(theta(ij,l,1)+theta(ij+t_ldown,l,1)) & 532 *(pk(ij+t_ldown,l)-pk(ij,l)) & 533 + 0.5*(theta(ij,l,2)+theta(ij+t_ldown,l,2)) & 534 *(berniv(ij+t_ldown,l)-berniv(ij,l)) 535 536 du(ij+u_right,l) = du(ij+u_right,l) - ne_right*due_right 537 du(ij+u_lup,l) = du(ij+u_lup,l) - ne_lup*due_lup 538 du(ij+u_ldown,l) = du(ij+u_ldown,l) - ne_ldown*due_ldown 539 u(ij+u_right,l) = u(ij+u_right,l) + tau*du(ij+u_right,l) 540 u(ij+u_lup,l) = u(ij+u_lup,l) + tau*du(ij+u_lup,l) 541 u(ij+u_ldown,l) = u(ij+u_ldown,l) + tau*du(ij+u_ldown,l) 542 END DO 543 ELSE 544 CALL dynamico_abort("Case already dealt with") 545 END IF 508 546 END DO 509 547 END IF 510 END DO 511 512 END IF ! dysl 548 END IF 513 549 CALL trace_end("compute_caldyn_fast") 514 550 515 551 END SUBROUTINE compute_caldyn_fast 516 552 517 553 SUBROUTINE compute_caldyn_Coriolis(hflux,theta,qu, convm,dtheta_rhodz,du) 554 USE geometry, ONLY : Ai, wee 518 555 REAL(rstd),INTENT(IN) :: hflux(3*iim*jjm,llm) ! hflux in kg/s 519 556 REAL(rstd),INTENT(IN) :: theta(iim*jjm,llm,nqdyn) ! active scalars … … 534 571 535 572 ELSE 536 #define FTHETA(ij) Ftheta(ij,1) 537 538 DO l=ll_begin, ll_end 539 ! compute theta flux 540 DO iq=1,nqdyn 541 !DIR$ SIMD 573 #define FTHETA(ij) Ftheta(ij,l) 574 575 !$acc data async & 576 !$acc create(Ftheta(3*iim*jjm,llm)) & 577 !$acc present(qu(:,:), hflux(:,:), convm(:,:), du(:,:), dtheta_rhodz(:,:,:), theta(:,:,:), Ai(:), wee(:,:,:)) 578 579 ! compute theta flux 580 DO iq=1,nqdyn 581 !$acc parallel loop collapse(2) present(Ftheta, theta) async 582 DO l=ll_begin, ll_end 583 !DIR$ SIMD 542 584 DO ij=ij_begin_ext,ij_end_ext 543 585 FTHETA(ij+u_right) = 0.5*(theta(ij,l,iq)+theta(ij+t_right,l,iq)) & … … 548 590 * hflux(ij+u_ldown,l) 549 591 END DO 592 END DO 593 594 !$acc parallel loop collapse(2) present(Ftheta) async 595 DO l=ll_begin, ll_end 550 596 ! horizontal divergence of fluxes 551 !DIR$ SIMD597 !DIR$ SIMD 552 598 DO ij=ij_begin,ij_end 553 599 ! dtheta_rhodz = -div(flux.theta) … … 561 607 END DO 562 608 END DO 563 609 END DO 610 611 !$acc parallel loop collapse(2) present(Ai) async 612 DO l=ll_begin, ll_end 564 613 !DIR$ SIMD 565 614 DO ij=ij_begin,ij_end … … 578 627 579 628 CASE(energy) ! energy-conserving TRiSK 580 629 !$acc parallel loop collapse(2) present(qu, wee, du) async 581 630 DO l=ll_begin,ll_end 582 631 !DIR$ SIMD … … 622 671 623 672 CASE(enstrophy) ! enstrophy-conserving TRiSK 624 673 674 !$acc parallel loop collapse(2) present(wee, du) async 625 675 DO l=ll_begin,ll_end 626 676 !DIR$ SIMD … … 665 715 END DO 666 716 END DO 717 !$acc end parallel loop 667 718 CASE DEFAULT 668 719 STOP 669 720 END SELECT 721 722 !$acc end data 723 670 724 #undef FTHETA 671 725 … … 676 730 END SUBROUTINE compute_caldyn_Coriolis 677 731 678 SUBROUTINE compute_caldyn_slow_hydro(u,rhodz,hflux,du, zero) 732 SUBROUTINE compute_caldyn_slow_hydro(u,rhodz,hflux,du,zero) 733 USE geometry, ONLY : Ai, le_de 679 734 LOGICAL, INTENT(IN) :: zero 680 735 REAL(rstd),INTENT(IN) :: u(3*iim*jjm,llm) ! prognostic "velocity" … … 684 739 685 740 REAL(rstd) :: berni(iim*jjm,llm) ! Bernoulli function 686 REAL(rstd) :: berni1(iim*jjm) ! Bernoulli function687 741 REAL(rstd) :: uu_right, uu_lup, uu_ldown, ke, uu 688 742 INTEGER :: ij,l … … 698 752 ELSE 699 753 700 #define BERNI(ij) berni 1(ij)754 #define BERNI(ij) berni(ij,l) 701 755 702 756 DO l = ll_begin, ll_end 703 ! Compute mass fluxes704 757 IF (caldyn_conserv==energy) CALL test_message(req_qu) 758 END DO 759 760 !$acc data async & 761 !$acc create(berni(:,ll_begin:ll_end)) & 762 !$acc present(rhodz(:,ll_begin:ll_end), u(:,ll_begin:ll_end), hflux(:,:), du(:,ll_begin:ll_end), Ai(:), le_de(:)) 763 764 ! Compute mass fluxes 765 !$acc parallel loop collapse(2) async 766 DO l = ll_begin, ll_end 705 767 !DIR$ SIMD 706 768 DO ij=ij_begin_ext,ij_end_ext … … 715 777 hflux(ij+u_ldown,l)=uu_ldown 716 778 ENDDO 717 ! Compute Bernoulli=kinetic energy 779 END DO 780 781 ! Compute Bernoulli=kinetic energy 782 !$acc parallel loop collapse(2) async 783 DO l = ll_begin, ll_end 718 784 !DIR$ SIMD 719 785 DO ij=ij_begin,ij_end … … 726 792 le_de(ij+u_rdown)*u(ij+u_rdown,l)**2 ) 727 793 ENDDO 728 ! Compute du=-grad(Bernoulli) 729 IF(zero) THEN 794 END DO 795 796 ! Compute du=-grad(Bernoulli) 797 IF(zero) THEN 798 !$acc parallel loop collapse(2) async 799 DO l = ll_begin, ll_end 730 800 !DIR$ SIMD 731 801 DO ij=ij_begin,ij_end … … 734 804 du(ij+u_ldown,l) = ne_ldown*(BERNI(ij)-BERNI(ij+t_ldown)) 735 805 END DO 736 ELSE 806 END DO 807 ELSE 808 !$acc parallel loop collapse(2) async 809 DO l = ll_begin, ll_end 737 810 !DIR$ SIMD 738 811 DO ij=ij_begin,ij_end … … 744 817 ne_ldown*(BERNI(ij)-BERNI(ij+t_ldown)) 745 818 END DO 746 END IF 747 END DO 819 END DO 820 END IF 821 822 !$acc end data 748 823 749 824 #undef BERNI 750 825 751 826 END IF ! dysl 827 752 828 CALL trace_end("compute_caldyn_slow_hydro") 753 829 END SUBROUTINE compute_caldyn_slow_hydro -
codes/icosagcm/trunk/src/kernels/advect_horiz.k90
r599 r953 1 1 !-------------------------------------------------------------------------- 2 2 !---------------------------- advect_horiz ---------------------------------- 3 4 !$acc data create(qflux(:,:)) present(qi(:,:), cc(:,:,:), gradq3d(:,:,:), mass(:,:), hfluxt(:,:), Ai(:), xyz_i(:,:)) async 5 3 6 ! evaluate tracer field at point cc using piecewise linear reconstruction 4 7 ! q(cc)= q0 + gradq.(cc-xyz_i) with xi centroid of hexagon 5 8 ! sign*hfluxt>0 iff outgoing 9 !$acc parallel loop collapse(2) async 6 10 DO l = ll_begin, ll_end 7 11 !DIR$ SIMD 8 12 DO ij=ij_begin_ext, ij_end_ext 9 13 IF(ne_right*hfluxt(ij+u_right,l)>0.) THEN 10 qe = qi(ij,l) 11 qe = qe + (cc(ij+u_right,l,1)-xyz_i(ij,1))*gradq3d(ij,l,1) 12 qe = qe + (cc(ij+u_right,l,2)-xyz_i(ij,2))*gradq3d(ij,l,2) 13 qe = qe + (cc(ij+u_right,l,3)-xyz_i(ij,3))*gradq3d(ij,l,3) 14 ij_tmp = ij 14 15 ELSE 15 qe = qi(ij+t_right,l) 16 qe = qe + (cc(ij+u_right,l,1)-xyz_i(ij+t_right,1))*gradq3d(ij+t_right,l,1) 17 qe = qe + (cc(ij+u_right,l,2)-xyz_i(ij+t_right,2))*gradq3d(ij+t_right,l,2) 18 qe = qe + (cc(ij+u_right,l,3)-xyz_i(ij+t_right,3))*gradq3d(ij+t_right,l,3) 16 ij_tmp = ij+t_right 19 17 END IF 18 19 qe = qi(ij_tmp,l) 20 qe = qe + (cc(ij+u_right,l,1)-xyz_i(ij_tmp,1))*gradq3d(ij_tmp,l,1) 21 qe = qe + (cc(ij+u_right,l,2)-xyz_i(ij_tmp,2))*gradq3d(ij_tmp,l,2) 22 qe = qe + (cc(ij+u_right,l,3)-xyz_i(ij_tmp,3))*gradq3d(ij_tmp,l,3) 23 20 24 qflux(ij+u_right,l) = hfluxt(ij+u_right,l)*qe 21 IF(diagflux_on) qfluxt(ij+u_right,l) = qfluxt(ij+u_right,l)+qflux(ij+u_right,l) 25 22 26 IF(ne_lup*hfluxt(ij+u_lup,l)>0.) THEN 23 qe = qi(ij,l) 24 qe = qe + (cc(ij+u_lup,l,1)-xyz_i(ij,1))*gradq3d(ij,l,1) 25 qe = qe + (cc(ij+u_lup,l,2)-xyz_i(ij,2))*gradq3d(ij,l,2) 26 qe = qe + (cc(ij+u_lup,l,3)-xyz_i(ij,3))*gradq3d(ij,l,3) 27 ij_tmp = ij 27 28 ELSE 28 qe = qi(ij+t_lup,l) 29 qe = qe + (cc(ij+u_lup,l,1)-xyz_i(ij+t_lup,1))*gradq3d(ij+t_lup,l,1) 30 qe = qe + (cc(ij+u_lup,l,2)-xyz_i(ij+t_lup,2))*gradq3d(ij+t_lup,l,2) 31 qe = qe + (cc(ij+u_lup,l,3)-xyz_i(ij+t_lup,3))*gradq3d(ij+t_lup,l,3) 29 ij_tmp = ij+t_lup 32 30 END IF 31 32 qe = qi(ij_tmp,l) 33 qe = qe + (cc(ij+u_lup,l,1)-xyz_i(ij_tmp,1))*gradq3d(ij_tmp,l,1) 34 qe = qe + (cc(ij+u_lup,l,2)-xyz_i(ij_tmp,2))*gradq3d(ij_tmp,l,2) 35 qe = qe + (cc(ij+u_lup,l,3)-xyz_i(ij_tmp,3))*gradq3d(ij_tmp,l,3) 36 33 37 qflux(ij+u_lup,l) = hfluxt(ij+u_lup,l)*qe 34 IF(diagflux_on) qfluxt(ij+u_lup,l) = qfluxt(ij+u_lup,l)+qflux(ij+u_lup,l) 38 35 39 IF(ne_ldown*hfluxt(ij+u_ldown,l)>0.) THEN 36 qe = qi(ij,l) 37 qe = qe + (cc(ij+u_ldown,l,1)-xyz_i(ij,1))*gradq3d(ij,l,1) 38 qe = qe + (cc(ij+u_ldown,l,2)-xyz_i(ij,2))*gradq3d(ij,l,2) 39 qe = qe + (cc(ij+u_ldown,l,3)-xyz_i(ij,3))*gradq3d(ij,l,3) 40 ij_tmp = ij 40 41 ELSE 41 qe = qi(ij+t_ldown,l) 42 qe = qe + (cc(ij+u_ldown,l,1)-xyz_i(ij+t_ldown,1))*gradq3d(ij+t_ldown,l,1) 43 qe = qe + (cc(ij+u_ldown,l,2)-xyz_i(ij+t_ldown,2))*gradq3d(ij+t_ldown,l,2) 44 qe = qe + (cc(ij+u_ldown,l,3)-xyz_i(ij+t_ldown,3))*gradq3d(ij+t_ldown,l,3) 42 ij_tmp = ij+t_ldown 45 43 END IF 44 45 qe = qi(ij_tmp,l) 46 qe = qe + (cc(ij+u_ldown,l,1)-xyz_i(ij_tmp,1))*gradq3d(ij_tmp,l,1) 47 qe = qe + (cc(ij+u_ldown,l,2)-xyz_i(ij_tmp,2))*gradq3d(ij_tmp,l,2) 48 qe = qe + (cc(ij+u_ldown,l,3)-xyz_i(ij_tmp,3))*gradq3d(ij_tmp,l,3) 49 46 50 qflux(ij+u_ldown,l) = hfluxt(ij+u_ldown,l)*qe 47 IF(diagflux_on) qfluxt(ij+u_ldown,l) = qfluxt(ij+u_ldown,l)+qflux(ij+u_ldown,l)48 51 END DO 49 52 END DO 53 54 IF(diagflux_on) THEN 55 !$acc parallel loop collapse(2) copy(qfluxt(:,:)) async 56 DO l = ll_begin, ll_end 57 !DIR$ SIMD 58 DO ij=ij_begin_ext, ij_end_ext 59 qfluxt(ij+u_right,l) = qfluxt(ij+u_right,l)+qflux(ij+u_right,l) 60 qfluxt(ij+u_lup,l) = qfluxt(ij+u_lup,l)+qflux(ij+u_lup,l) 61 qfluxt(ij+u_ldown,l) = qfluxt(ij+u_ldown,l)+qflux(ij+u_ldown,l) 62 END DO 63 END DO 64 END IF 65 50 66 ! update q and, if update_mass, update mass 67 !$acc parallel loop collapse(2) async 51 68 DO l = ll_begin, ll_end 52 69 !DIR$ SIMD … … 71 88 END DO 72 89 END DO 90 91 !$acc end data 92 73 93 !---------------------------- advect_horiz ---------------------------------- 74 94 !-------------------------------------------------------------------------- -
codes/icosagcm/trunk/src/output/xios_mod.F90
r904 r953 1120 1120 CHARACTER(LEN=*),INTENT(IN) :: name 1121 1121 REAL(rstd), INTENT(OUT) :: field 1122 field=0 1122 1123 END SUBROUTINE 1123 1124 -
codes/icosagcm/trunk/src/parallel/mpipara.F90
r892 r953 14 14 INTEGER,SAVE :: id_mpi ! id for profiling 15 15 16 INTEGER, SAVE :: device_id 17 16 18 INTERFACE allocate_mpi_buffer 17 19 MODULE PROCEDURE allocate_mpi_buffer_r2, allocate_mpi_buffer_r3,allocate_mpi_buffer_r4 … … 46 48 USE xios 47 49 #endif 50 USE abort_mod 48 51 IMPLICIT NONE 49 52 CHARACTER(LEN=256) :: required_mode_str … … 75 78 END SELECT 76 79 80 IF (required_mode==MPI_THREAD_SERIALIZED .OR. required_mode==MPI_THREAD_MULTIPLE) THEN 81 CALL abort_acc("mpi_threading_mode /= 'single' .AND. mpi_threading_mode /= 'funneled'") 82 ENDIF 77 83 78 84 IF (required_mode==MPI_THREAD_SINGLE) PRINT*,'MPI_INIT_THREAD : MPI_SINGLE_THREAD required' … … 120 126 ENDIF 121 127 128 129 #ifdef _OPENACC 130 device_id = setDevice(mpi_size, mpi_rank) 131 PRINT *, 'GPU device ', device_id 132 #else 133 device_id = -1 134 #endif 135 122 136 END SUBROUTINE init_mpipara 123 137 … … 233 247 END SUBROUTINE free_mpi_buffer_r4 234 248 249 #ifdef _OPENACC 250 FUNCTION setDevice(nprocs, myrank) 251 use iso_c_binding 252 use openacc 253 USE mpi_mod 254 implicit none 255 256 interface 257 function gethostid() bind(C) 258 use iso_c_binding 259 integer(C_INT) :: gethostid 260 end function gethostid 261 end interface 262 263 integer, intent(in) :: nprocs, myrank 264 integer :: hostids(nprocs), localprocs(nprocs) 265 integer :: hostid, ierr, numdev, mydev, i, numlocal 266 integer :: setDevice 267 268 ! get the hostids so we can determine what other processes are on this node 269 hostid = gethostid() 270 call mpi_allgather(hostid,1,MPI_INTEGER,hostids,1,MPI_INTEGER, MPI_COMM_WORLD, ierr) 271 272 ! determine which processors are on this node 273 numlocal = 0 274 localprocs(:) = 0 275 do i = 1, nprocs 276 if (hostid == hostids(i)) then 277 localprocs(i) = numlocal 278 numlocal = numlocal + 1 279 end if 280 end do 281 282 ! get the number of device on this node 283 numdev = acc_get_num_devices(ACC_DEVICE_NVIDIA) 284 285 if (numdev < 1) then 286 print *, "Error: there are no devices available on this host. ABORTING", myrank 287 stop 288 end if 289 290 ! print a warning if the number of devices is less than the number of processes on this node. Having multiple processes share a devices is not recommended 291 if (numdev < numlocal) then 292 if (localprocs(myrank+1) == 1) then 293 ! print warning message only once per node 294 print *, "WARNING: the number of process is greater than the number of GPUs.", myrank 295 end if 296 mydev = mod(localprocs(myrank+1), numdev) 297 else 298 mydev = localprocs(myrank+1) 299 end if 300 301 call acc_set_device_num(mydev,ACC_DEVICE_NVIDIA) 302 call acc_init(ACC_DEVICE_NVIDIA) 303 setDevice = acc_get_device_num(ACC_DEVICE_NVIDIA) 304 END FUNCTION setDevice 305 306 #endif 307 308 309 235 310 END MODULE mpipara -
codes/icosagcm/trunk/src/parallel/transfert_mpi.f90
r901 r953 5 5 6 6 TYPE array 7 INTEGER,POINTER :: value(:) 8 INTEGER,POINTER :: sign(:) 7 INTEGER,POINTER :: value(:)=>null() 8 INTEGER,POINTER :: sign(:)=>null() 9 9 INTEGER :: domain 10 10 INTEGER :: rank … … 13 13 INTEGER :: offset 14 14 INTEGER :: ireq 15 INTEGER,POINTER :: buffer(:) 16 REAL,POINTER :: buffer_r(:) 17 INTEGER,POINTER :: src_value(:) 15 INTEGER,POINTER :: buffer(:)=>null() 16 REAL,POINTER :: buffer_r(:)=>null() 17 INTEGER,POINTER :: src_value(:)=>null() 18 18 END TYPE array 19 19 … … 77 77 LOGICAL :: open ! for debug 78 78 INTEGER :: number 79 LOGICAL :: ondevice=.false. !<Ready to transfer ondevice field 79 80 END TYPE t_message 80 81 … … 942 943 !$OMP BARRIER 943 944 !$OMP MASTER 945 !init off-device 946 message%ondevice=.false. 947 944 948 IF(PRESENT(name)) THEN 945 949 message%name = TRIM(name) … … 1058 1062 TYPE(t_message) :: message 1059 1063 1060 INTEGER :: ireq 1064 INTEGER :: ireq, ibuff 1061 1065 1062 1066 !$OMP BARRIER … … 1084 1088 ENDDO 1085 1089 1086 ENDIF 1090 ENDIF 1087 1091 ENDIF 1088 1092 1093 !deallocate device data if ondevice 1094 if(message%ondevice) then 1095 do ireq=1, ndomain 1096 do ibuff=1,message%request(ireq)%nsend 1097 !$acc exit data delete(message%request(ireq)%send(ibuff)%buffer(:)) 1098 !$acc exit data delete(message%request(ireq)%send(ibuff)%buffer_r(:)) 1099 !$acc exit data delete(message%request(ireq)%send(ibuff)%sign(:)) 1100 !$acc exit data delete(message%request(ireq)%send(ibuff)%src_value(:)) 1101 !$acc exit data delete(message%request(ireq)%send(ibuff)%value(:)) 1102 end do 1103 do ibuff=1,message%request(ireq)%nrecv 1104 !$acc exit data delete(message%request(ireq)%recv(ibuff)%buffer(:)) 1105 !$acc exit data delete(message%request(ireq)%recv(ibuff)%buffer_r(:)) 1106 !$acc exit data delete(message%request(ireq)%recv(ibuff)%sign(:)) 1107 !$acc exit data delete(message%request(ireq)%recv(ibuff)%src_value(:)) 1108 !$acc exit data delete(message%request(ireq)%recv(ibuff)%value(:)) 1109 end do 1110 end do 1111 DO ireq=1,message%nreq 1112 !$acc exit data delete(message%buffers(ireq)%r) 1113 ENDDO 1114 message%ondevice=.false. 1115 end if 1116 1089 1117 DEALLOCATE(message%mpi_req) 1090 1118 DEALLOCATE(message%buffers) … … 1120 1148 1121 1149 1150 !!!Update buffers on device for 'message' 1151 !!! does create_device_message when not already ondevice 1152 SUBROUTINE update_device_message_mpi(message) 1153 USE domain_mod 1154 IMPLICIT NONE 1155 TYPE(t_message), intent(inout) :: message 1156 INTEGER :: ireq, ibuff 1157 1158 !if(.not. message%ondevice) call create_device_message_mpi(message) 1159 1160 do ireq=1, ndomain 1161 do ibuff=1,message%request(ireq)%nsend 1162 !device buffers updated even if pointers not attached : 1163 !non allocated buffers in 'message' must be set to NULL() 1164 !$acc enter data copyin(message%request(ireq)%send(ibuff)%buffer(:)) async 1165 !$acc enter data copyin(message%request(ireq)%send(ibuff)%buffer_r(:)) async 1166 !$acc enter data copyin(message%request(ireq)%send(ibuff)%sign(:)) async 1167 !$acc enter data copyin(message%request(ireq)%send(ibuff)%src_value(:)) async 1168 !$acc enter data copyin(message%request(ireq)%send(ibuff)%value(:)) async 1169 end do 1170 do ibuff=1,message%request(ireq)%nrecv 1171 !$acc enter data copyin(message%request(ireq)%recv(ibuff)%buffer(:)) async 1172 !$acc enter data copyin(message%request(ireq)%recv(ibuff)%buffer_r(:)) async 1173 !$acc enter data copyin(message%request(ireq)%recv(ibuff)%sign(:)) async 1174 !$acc enter data copyin(message%request(ireq)%recv(ibuff)%src_value(:)) async 1175 !$acc enter data copyin(message%request(ireq)%recv(ibuff)%value(:)) async 1176 end do 1177 end do 1178 DO ireq=1,message%nreq 1179 !$acc enter data copyin(message%buffers(ireq)%r) async 1180 ENDDO 1181 message%ondevice=.true. 1182 END SUBROUTINE 1183 1184 !TODO : add openacc with multiple process 1122 1185 SUBROUTINE send_message_mpi(field,message) 1123 1186 USE abort_mod … … 1129 1192 USE omp_para 1130 1193 USE trace 1194 USE abort_mod 1131 1195 IMPLICIT NONE 1132 1196 TYPE(t_field),POINTER :: field(:) … … 1145 1209 INTEGER :: dim3,dim4,d3,d4 1146 1210 INTEGER,POINTER :: src_value(:) 1147 INTEGER :: offset,msize, rank1211 INTEGER :: offset,msize,moffset,rank 1148 1212 INTEGER :: lbegin, lend 1149 1213 INTEGER :: max_req … … 1152 1216 1153 1217 CALL enter_profile(id_mpi) 1218 1219 !Prepare 'message' for on-device copies if field is on device 1220 if( field(1)%ondevice .AND. .NOT. message%ondevice ) call update_device_message_mpi(message) 1154 1221 1155 1222 !$OMP BARRIER … … 1195 1262 buffer_r=>message%buffers(ireq)%r 1196 1263 offset=send%offset 1197 1198 DO n=1,send%size 1264 msize=send%size 1265 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1266 DO n=1,msize 1199 1267 buffer_r(offset+n)=rval2d(value(n)) 1200 1268 ENDDO 1201 1269 1202 1270 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1271 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1203 1272 !$OMP CRITICAL 1204 1273 CALL MPI_ISEND(buffer_r,send%size,MPI_REAL8,send%rank, & … … 1206 1275 !$OMP END CRITICAL 1207 1276 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1277 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1208 1278 CALL MPI_ISEND(buffer_r,send%size,MPI_REAL8,send%rank, & 1209 1279 send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1228 1298 src_rval2d=>field(recv%domain)%rval2d 1229 1299 sgn=>recv%sign 1230 1231 DO n=1,recv%size 1300 msize=recv%size 1301 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1302 DO n=1,msize 1232 1303 rval2d(value(n))=src_rval2d(src_value(n))*sgn(n) 1233 1304 ENDDO … … 1239 1310 buffer_r=>message%buffers(ireq)%r 1240 1311 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1312 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1241 1313 !$OMP CRITICAL 1242 1314 CALL MPI_IRECV(buffer_r,recv%size,MPI_REAL8,recv%rank, & … … 1244 1316 !$OMP END CRITICAL 1245 1317 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1318 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1246 1319 CALL MPI_IRECV(buffer_r,recv%size,MPI_REAL8,recv%rank, & 1247 1320 recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1278 1351 buffer_r=>message%buffers(ireq)%r 1279 1352 1280 offset=send%offset*dim3 + (lbegin-1)*send%size1281 1353 msize=send%size 1354 moffset=send%offset 1282 1355 CALL trace_start("copy_to_buffer") 1283 1356 1357 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1284 1358 DO d3=lbegin,lend 1285 DO n=1,send%size 1359 offset=moffset*dim3 + (d3-1)*msize 1360 !$acc loop 1361 DO n=1,msize 1286 1362 buffer_r(n+offset)=rval3d(value(n),d3) 1287 1363 ENDDO 1288 offset=offset+send%size1289 1364 ENDDO 1290 1365 CALL trace_end("copy_to_buffer") … … 1296 1371 IF (is_omp_level_master) THEN 1297 1372 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1373 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1298 1374 !$OMP CRITICAL 1299 1375 CALL MPI_ISEND(buffer_r,send%size*dim3,MPI_REAL8,send%rank, & … … 1301 1377 !$OMP END CRITICAL 1302 1378 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1379 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1303 1380 CALL MPI_ISEND(buffer_r,send%size*dim3,MPI_REAL8,send%rank, & 1304 1381 send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1335 1412 src_rval3d=>field(recv%domain)%rval3d 1336 1413 sgn=>recv%sign 1414 msize=recv%size 1337 1415 1338 1416 CALL trace_start("copy_data") 1339 1417 !$acc parallel loop collapse(2) default(present) async if (field(ind)%ondevice) 1340 1418 DO d3=lbegin,lend 1341 DO n=1, recv%size1419 DO n=1,msize 1342 1420 rval3d(value(n),d3)=src_rval3d(src_value(n),d3)*sgn(n) 1343 1421 ENDDO … … 1351 1429 IF (is_omp_level_master) THEN 1352 1430 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1431 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1353 1432 !$OMP CRITICAL 1354 1433 CALL MPI_IRECV(buffer_r,recv%size*dim3,MPI_REAL8,recv%rank, & … … 1356 1435 !$OMP END CRITICAL 1357 1436 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1437 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1358 1438 CALL MPI_IRECV(buffer_r,recv%size*dim3,MPI_REAL8,recv%rank, & 1359 1439 recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1390 1470 ireq=send%ireq 1391 1471 buffer_r=>message%buffers(ireq)%r 1472 msize=send%size 1473 moffset=send%offset 1392 1474 1393 1475 CALL trace_start("copy_to_buffer") 1476 !$acc parallel loop default(present) collapse(2) async if (field(ind)%ondevice) 1394 1477 DO d4=1,dim4 1395 offset=send%offset*dim3*dim4 + dim3*send%size*(d4-1) + &1396 (lbegin-1)*send%size1397 1398 1478 DO d3=lbegin,lend 1399 DO n=1,send%size 1479 offset=moffset*dim3*dim4 + dim3*msize*(d4-1) + & 1480 (d3-1)*msize 1481 !$acc loop 1482 DO n=1,msize 1400 1483 buffer_r(n+offset)=rval4d(value(n),d3,d4) 1401 1484 ENDDO 1402 offset=offset+send%size1403 1485 ENDDO 1404 1486 ENDDO … … 1411 1493 IF (is_omp_level_master) THEN 1412 1494 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1495 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1413 1496 !$OMP CRITICAL 1414 1497 CALL MPI_ISEND(buffer_r,send%size*dim3*dim4,MPI_REAL8,send%rank, & … … 1416 1499 !$OMP END CRITICAL 1417 1500 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1501 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1418 1502 CALL MPI_ISEND(buffer_r,send%size*dim3*dim4,MPI_REAL8,send%rank, & 1419 1503 send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1451 1535 src_rval4d=>field(recv%domain)%rval4d 1452 1536 sgn=>recv%sign 1453 1537 msize=recv%size 1454 1538 CALL trace_start("copy_data") 1539 1540 !$acc parallel loop collapse(3) default(present) async if (field(ind)%ondevice) 1455 1541 DO d4=1,dim4 1456 1542 DO d3=lbegin,lend 1457 DO n=1, recv%size1543 DO n=1,msize 1458 1544 rval4d(value(n),d3,d4)=src_rval4d(src_value(n),d3,d4)*sgn(n) 1459 1545 ENDDO … … 1469 1555 IF (is_omp_level_master) THEN 1470 1556 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1557 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1471 1558 !$OMP CRITICAL 1472 1559 CALL MPI_IRECV(buffer_r,recv%size*dim3*dim4,MPI_REAL8,recv%rank, & … … 1474 1561 !$OMP END CRITICAL 1475 1562 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1563 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1476 1564 CALL MPI_IRECV(buffer_r,recv%size*dim3*dim4,MPI_REAL8,recv%rank, & 1477 1565 recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1485 1573 1486 1574 IF (mpi_threading_mode==MPI_THREAD_FUNNELED .OR. mpi_threading_mode==MPI_THREAD_SINGLE) THEN 1575 !$acc wait 1487 1576 !$OMP BARRIER 1488 1577 !$OMP MASTER … … 1492 1581 msize=message%buffers(ireq)%size 1493 1582 rank=message%buffers(ireq)%rank 1494 CALL MPI_ISEND(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1495 comm_icosa, message%mpi_req(ireq),ierr) 1583 ! Using the "if" clause on the "host_data" directive seems to cause a crash at compilation 1584 IF (message%ondevice) THEN 1585 !$acc host_data use_device(buffer_r) ! if (message%ondevice) 1586 CALL MPI_ISEND(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1587 comm_icosa, message%mpi_req(ireq),ierr) 1588 !$acc end host_data 1589 ELSE 1590 CALL MPI_ISEND(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1591 comm_icosa, message%mpi_req(ireq),ierr) 1592 ENDIF 1496 1593 ENDDO 1497 1594 … … 1500 1597 msize=message%buffers(ireq)%size 1501 1598 rank=message%buffers(ireq)%rank 1502 CALL MPI_IRECV(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1503 comm_icosa, message%mpi_req(ireq),ierr) 1599 ! Using the "if" clause on the "host_data" directive seems to cause a crash at compilation 1600 IF (message%ondevice) THEN 1601 !$acc host_data use_device(buffer_r) ! if (message%ondevice) 1602 CALL MPI_IRECV(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1603 comm_icosa, message%mpi_req(ireq),ierr) 1604 !$acc end host_data 1605 ELSE 1606 CALL MPI_IRECV(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1607 comm_icosa, message%mpi_req(ireq),ierr) 1608 ENDIF 1504 1609 ENDDO 1505 1610 … … 1552 1657 INTEGER :: ind,n 1553 1658 INTEGER :: dim3,dim4,d3,d4,lbegin,lend 1554 INTEGER :: offset 1659 INTEGER :: offset, msize, moffset 1555 1660 1556 1661 message%open=.FALSE. … … 1586 1691 sgn=>recv%sign 1587 1692 offset=recv%offset 1588 DO n=1,recv%size 1693 msize=recv%size 1694 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1695 DO n=1,msize 1589 1696 rval2d(value(n))=buffer_r(n+offset)*sgn(n) 1590 1697 ENDDO … … 1621 1728 1622 1729 CALL distrib_level(1,dim3, lbegin,lend) 1623 offset=recv%offset*dim3 + (lbegin-1)*recv%size 1730 msize=recv%size 1731 moffset=recv%offset 1624 1732 CALL trace_start("copy_from_buffer") 1625 1733 1626 1734 IF (req%vector) THEN 1735 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1627 1736 DO d3=lbegin,lend 1628 DO n=1,recv%size 1737 offset=moffset*dim3 + (d3-1)*msize 1738 !$acc loop 1739 DO n=1,msize 1629 1740 rval3d(value(n),d3)=buffer_r(n+offset)*sgn(n) 1630 1741 ENDDO 1631 offset=offset+recv%size1632 1742 ENDDO 1633 1743 ELSE 1744 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1634 1745 DO d3=lbegin,lend 1635 DO n=1,recv%size 1746 offset=moffset*dim3 + (d3-1)*msize 1747 !$acc loop 1748 DO n=1,msize 1636 1749 rval3d(value(n),d3)=buffer_r(n+offset) 1637 1750 ENDDO 1638 offset=offset+recv%size1639 1751 ENDDO 1640 1752 ENDIF … … 1670 1782 CALL distrib_level(1,dim3, lbegin,lend) 1671 1783 dim4=size(rval4d,3) 1784 msize=recv%size 1785 moffset=recv%offset 1672 1786 CALL trace_start("copy_from_buffer") 1787 !$acc parallel loop default(present) collapse(2) async if (field(ind)%ondevice) 1673 1788 DO d4=1,dim4 1674 offset=recv%offset*dim3*dim4 + dim3*recv%size*(d4-1) + &1675 (lbegin-1)*recv%size1676 1789 DO d3=lbegin,lend 1677 DO n=1,recv%size 1790 offset=moffset*dim3*dim4 + dim3*msize*(d4-1) + & 1791 (d3-1)*msize 1792 !$acc loop 1793 DO n=1,msize 1678 1794 rval4d(value(n),d3,d4)=buffer_r(n+offset)*sgn(n) 1679 1795 ENDDO 1680 offset=offset+recv%size1681 1796 ENDDO 1682 1797 ENDDO … … 1697 1812 ! CALL trace_end("wait_message_mpi") 1698 1813 !$OMP BARRIER 1699 1814 1700 1815 CALL exit_profile(id_mpi) 1701 1816 -
codes/icosagcm/trunk/src/physics/physics.f90
r910 r953 27 27 SUBROUTINE init_physics 28 28 USE mpipara 29 USE abort_mod 29 30 USE etat0_mod 30 31 USE physics_dcmip_mod, ONLY : init_physics_dcmip=>init_physics … … 118 119 119 120 !$OMP END PARALLEL 121 IF (phys_type /= phys_none) THEN 122 CALL abort_acc("physics /= 'none'") 123 END IF 120 124 END SUBROUTINE init_physics 121 125 122 126 SUBROUTINE zero_du_phys() 127 USE abort_mod 123 128 REAL(rstd), DIMENSION(:,:), POINTER :: du 124 129 INTEGER :: ind … … 154 159 USE etat0_heldsz_mod 155 160 USE etat0_venus_mod, ONLY : phys_venus => physics 161 USE abort_mod 156 162 INTEGER, INTENT(IN) :: it 157 163 TYPE(t_field),POINTER :: f_phis(:) -
codes/icosagcm/trunk/src/sphere/geometry.f90
r899 r953 266 266 INTEGER :: nb_it=0 267 267 TYPE(t_domain),POINTER :: d 268 INTEGER :: ind,it,i,j,n,k 269 REAL(rstd) :: x1(3),x2(3) 268 INTEGER :: ind,it,i,j,n 270 269 REAL(rstd) :: vect(3,6) 271 270 REAL(rstd) :: centr(3) … … 690 689 ! CALL surf_triangle(d%xyz(:,ii_begin,jj_begin),d%xyz(:,ii_begin,jj_end),d%xyz(:,ii_end,jj_begin),S) 691 690 691 CALL update_device_field(geom%Ai) 692 693 CALL update_device_field(geom%xyz_i) 694 CALL update_device_field(geom%lon_i) 695 CALL update_device_field(geom%lat_i) 696 CALL update_device_field(geom%elon_i) 697 CALL update_device_field(geom%elat_i) 698 CALL update_device_field(geom%centroid) 699 700 CALL update_device_field(geom%xyz_e) 701 CALL update_device_field(geom%lon_e) 702 CALL update_device_field(geom%lat_e) 703 CALL update_device_field(geom%elon_e) 704 CALL update_device_field(geom%elat_e) 705 CALL update_device_field(geom%ep_e) 706 CALL update_device_field(geom%et_e) 707 708 CALL update_device_field(geom%xyz_v) 709 CALL update_device_field(geom%de) 710 CALL update_device_field(geom%le) 711 CALL update_device_field(geom%le_de) 712 CALL update_device_field(geom%bi) 713 CALL update_device_field(geom%Av) 714 CALL update_device_field(geom%S1) 715 CALL update_device_field(geom%S2) 716 CALL update_device_field(geom%Riv) 717 CALL update_device_field(geom%Riv2) 718 CALL update_device_field(geom%ne) 719 CALL update_device_field(geom%Wee) 720 CALL update_device_field(geom%bi) 721 CALL update_device_field(geom%fv) 722 692 723 END SUBROUTINE set_geometry 693 724 -
codes/icosagcm/trunk/src/time/euler_scheme.F90
r933 r953 1 1 MODULE euler_scheme_mod 2 2 USE field_mod 3 USE abort_mod 3 4 IMPLICIT NONE 4 5 PRIVATE … … 45 46 46 47 IF(with_dps) THEN ! update ps/mass 48 CALL abort_acc("Euler_scheme/with_dps") 47 49 IF(caldyn_eta==eta_mass) THEN ! update ps 48 50 ps=f_ps(ind) ; dps=f_dps(ind) ; … … 71 73 du=f_du(ind) ; dtheta_rhodz=f_dtheta_rhodz(ind) 72 74 73 DO l=ll_begin,ll_end 75 CALL compute_euler_scheme(u, du, theta_rhodz, dtheta_rhodz) 76 ENDDO 77 78 CALL trace_end("Euler_scheme") 79 80 CONTAINS 81 82 SUBROUTINE compute_euler_scheme(u, du, theta_rhodz, dtheta_rhodz) 83 REAL(rstd),INTENT(INOUT) :: u(iim*3*jjm,llm) 84 REAL(rstd),INTENT(IN) :: du(iim*3*jjm,llm) 85 REAL(rstd),INTENT(INOUT) :: theta_rhodz(iim*jjm,llm,nqdyn) 86 REAL(rstd),INTENT(IN) :: dtheta_rhodz(iim*jjm,llm,nqdyn) 87 88 !$acc data present(theta_rhodz(:,:,:), u(:,:),du(:,:), dtheta_rhodz(:,:,:)) async 89 90 !$acc parallel loop async 91 DO l=ll_begin,ll_end 92 !$acc loop 74 93 !DIR$ SIMD 75 94 DO ij=ij_begin,ij_end … … 80 99 ENDDO 81 100 ENDDO 82 ENDDO83 84 CALL trace_end("Euler_scheme")101 102 !$acc end data 103 END SUBROUTINE compute_euler_scheme 85 104 86 105 END SUBROUTINE Euler_scheme … … 97 116 INTEGER :: l,ij 98 117 118 !$acc data present(hflux(:,:), wflux(:,:), hfluxt(:,:), wfluxt(:,:)) async 119 99 120 IF(fluxt_zero) THEN 100 121 101 122 fluxt_zero=.FALSE. 102 103 DO l=ll_begin,ll_end 123 !$acc parallel loop async 124 DO l=ll_begin,ll_end 125 !$acc loop 104 126 !DIR$ SIMD 105 127 DO ij=ij_begin_ext,ij_end_ext … … 111 133 112 134 IF(caldyn_eta==eta_mass) THEN ! no need for vertical fluxes if eta_lag 135 !$acc parallel loop async 113 136 DO l=ll_begin,ll_endp1 137 !$acc loop 114 138 !DIR$ SIMD 115 139 DO ij=ij_begin,ij_end … … 120 144 121 145 ELSE 122 123 DO l=ll_begin,ll_end 146 !$acc parallel loop async 147 DO l=ll_begin,ll_end 148 !$acc loop 124 149 !DIR$ SIMD 125 150 DO ij=ij_begin_ext,ij_end_ext … … 131 156 132 157 IF(caldyn_eta==eta_mass) THEN ! no need for vertical fluxes if eta_lag 158 !$acc parallel loop async 133 159 DO l=ll_begin,ll_endp1 160 !$acc loop 134 161 !DIR$ SIMD 135 162 DO ij=ij_begin,ij_end … … 138 165 ENDDO 139 166 END IF 140 141 167 END IF 142 168 !$acc end data 143 169 END SUBROUTINE accumulate_fluxes 144 170 145 171 SUBROUTINE legacy_to_DEC(f_ps, f_u) 172 USE icosa 173 USE disvert_mod 174 USE omp_para 175 USE trace 176 TYPE(t_field),POINTER :: f_ps(:), f_u(:) 177 REAL(rstd), POINTER :: ps(:), u(:,:) 178 INTEGER :: ind,ij,l 179 180 CALL trace_start("legacy_to_DEC") 181 182 DO ind=1,ndomain 183 IF (.NOT. assigned_domain(ind)) CYCLE 184 CALL swap_dimensions(ind) 185 CALL swap_geometry(ind) 186 187 IF(caldyn_eta==eta_mass .AND. is_omp_first_level) THEN ! update ps 188 ps=f_ps(ind) 189 !$acc parallel loop async default(present) 190 !DIR$ SIMD 191 DO ij=ij_begin,ij_end 192 ps(ij)=(ps(ij)-ptop)/g ! convert ps to column-integrated mass 193 ENDDO 194 END IF 195 196 u=f_u(ind) 197 !$acc parallel loop async default(present) 198 DO l=ll_begin,ll_end 199 !$acc loop 200 !DIR$ SIMD 201 DO ij=ij_begin,ij_end 202 u(ij+u_right,l)=u(ij+u_right,l)*de(ij+u_right) 203 u(ij+u_lup,l)=u(ij+u_lup,l)*de(ij+u_lup) 204 u(ij+u_ldown,l)=u(ij+u_ldown,l)*de(ij+u_ldown) 205 ENDDO 206 ENDDO 207 ENDDO 208 209 CALL trace_end("legacy_to_DEC") 210 211 END SUBROUTINE Legacy_to_DEC 212 213 SUBROUTINE DEC_to_legacy(f_ps, f_u) 146 214 USE icosa 147 215 USE disvert_mod … … 158 226 CALL swap_geometry(ind) 159 227 160 IF(caldyn_eta==eta_mass .AND. is_omp_first_level) THEN ! update ps161 ps=f_ps(ind)162 !DIR$ SIMD163 DO ij=ij_begin,ij_end164 ps(ij)=(ps(ij)-ptop)/g ! convert ps to column-integrated mass165 ENDDO166 END IF167 168 u=f_u(ind)169 DO l=ll_begin,ll_end170 !DIR$ SIMD171 DO ij=ij_begin,ij_end172 u(ij+u_right,l)=u(ij+u_right,l)*de(ij+u_right)173 u(ij+u_lup,l)=u(ij+u_lup,l)*de(ij+u_lup)174 u(ij+u_ldown,l)=u(ij+u_ldown,l)*de(ij+u_ldown)175 ENDDO176 ENDDO177 ENDDO178 179 CALL trace_end("legacy_to_DEC")180 END SUBROUTINE Legacy_to_DEC181 182 SUBROUTINE DEC_to_legacy(f_ps, f_u)183 USE icosa184 USE disvert_mod185 USE omp_para186 USE trace187 TYPE(t_field),POINTER :: f_ps(:), f_u(:)188 REAL(rstd), POINTER :: ps(:), u(:,:)189 INTEGER :: ind,ij,l190 CALL trace_start("legacy_to_DEC")191 192 DO ind=1,ndomain193 IF (.NOT. assigned_domain(ind)) CYCLE194 CALL swap_dimensions(ind)195 CALL swap_geometry(ind)196 197 228 IF(caldyn_eta==eta_mass .AND. is_omp_first_level) THEN 198 229 ps=f_ps(ind) 230 !$acc parallel loop async default(present) 199 231 !DIR$ SIMD 200 232 DO ij=ij_begin,ij_end … … 204 236 205 237 u=f_u(ind) 206 DO l=ll_begin,ll_end 238 !$acc parallel loop async default(present) 239 DO l=ll_begin,ll_end 240 !$acc loop 207 241 !DIR$ SIMD 208 242 DO ij=ij_begin,ij_end -
codes/icosagcm/trunk/src/time/hevi_scheme.F90
r933 r953 4 4 USE field_mod 5 5 USE euler_scheme_mod 6 USE abort_mod 6 7 IMPLICIT NONE 7 8 PRIVATE … … 13 14 14 15 CONTAINS 15 16 16 SUBROUTINE set_coefs_ark23(dt) 17 17 ! ARK2 scheme by Giraldo, Kelly, Constantinescu 2013 … … 60 60 61 61 CALL legacy_to_DEC(f_ps, f_u) 62 62 63 DO j=1,nb_stage 63 64 CALL caldyn_hevi((j==1) .AND. (MOD(it,itau_out)==0), taujj(j), & … … 76 77 wflux=f_wflux(ind); wfluxt=f_wfluxt(ind) 77 78 CALL accumulate_fluxes(hflux,wflux, hfluxt,wfluxt, wj(j), fluxt_zero(ind)) 79 78 80 END DO 79 81 ! update model state … … 88 90 CALL update_3D(cjl(l,j), f_u, f_du_fast(:,l)) 89 91 IF(.NOT. hydrostatic) THEN 92 CALL abort_acc("HEVI_scheme/!hydrostatic") 90 93 CALL update_3D(bjl(l,j), f_W, f_dW_slow(:,l)) 91 94 CALL update_3D(cjl(l,j), f_W, f_dW_fast(:,l)) … … 93 96 CALL update_3D(cjl(l,j), f_geopot, f_dPhi_fast(:,l)) 94 97 END IF 98 95 99 !$OMP BARRIER 96 100 END DO … … 142 146 INTENT(IN) :: dy 143 147 INTEGER :: l 148 !$acc kernels async default(present) 144 149 DO l=ll_begin,ll_end 145 150 y(:,l)=y(:,l)+w*dy(:,l) 146 151 ENDDO 152 !$acc end kernels 147 153 END SUBROUTINE compute_update_3D 148 154 … … 164 170 INTENT(INOUT) :: y 165 171 INTENT(IN) :: dy 172 !$acc kernels async default(present) 166 173 y(:)=y(:)+w*dy(:) 174 !$acc end kernels 167 175 END SUBROUTINE compute_update_2D 168 176 -
codes/icosagcm/trunk/src/time/timeloop_gcm.F90
r933 r953 94 94 END SELECT 95 95 96 IF (scheme_family /= hevi) THEN 97 CALL abort_acc("scheme_family /= hevi") 98 END IF 99 96 100 ! Time-independant orography 97 101 CALL allocate_field(f_phis,field_t,type_real,name='phis') … … 103 107 CALL allocate_field(f_u,field_u,type_real,llm,name='u') 104 108 CALL allocate_field(f_geopot,field_t,type_real,llm+1,name='geopot') 105 CALL allocate_field(f_W,field_t,type_real,llm+1,name='W') 109 CALL allocate_field(f_W,field_t,type_real,llm+1,name='W') ! used only if .not. hydrostatic 106 110 CALL allocate_field(f_q,field_t,type_real,llm,nqtot,'q') 107 111 ! Mass fluxes 108 CALL allocate_field(f_hflux,field_u,type_real,llm ) ! instantaneous mass fluxes109 CALL allocate_field(f_hfluxt,field_u,type_real,llm ) ! mass "fluxes" accumulated in time112 CALL allocate_field(f_hflux,field_u,type_real,llm, ondevice=.TRUE.) ! instantaneous mass fluxes 113 CALL allocate_field(f_hfluxt,field_u,type_real,llm,ondevice=.TRUE.) ! mass "fluxes" accumulated in time 110 114 CALL allocate_field(f_wflux,field_t,type_real,llm+1) ! vertical mass fluxes 111 CALL allocate_field(f_wfluxt,field_t,type_real,llm+1,name='wfluxt' )115 CALL allocate_field(f_wfluxt,field_t,type_real,llm+1,name='wfluxt',ondevice=.TRUE.) 112 116 113 117 SELECT CASE(scheme_family) … … 125 129 CASE(hevi) 126 130 ! Trends 127 CALL allocate_fields(nb_stage,f_dps_slow, field_t,type_real,name='dps_slow' )128 CALL allocate_fields(nb_stage,f_dmass_slow, field_t,type_real,llm, name='dmass_slow' )129 CALL allocate_fields(nb_stage,f_dtheta_rhodz_slow, field_t,type_real,llm,nqdyn,name='dtheta_rhodz_fast' )130 CALL allocate_fields(nb_stage,f_du_slow, field_u,type_real,llm,name='du_slow' )131 CALL allocate_fields(nb_stage,f_du_fast, field_u,type_real,llm,name='du_fast' )131 CALL allocate_fields(nb_stage,f_dps_slow, field_t,type_real,name='dps_slow', ondevice=.TRUE.) 132 CALL allocate_fields(nb_stage,f_dmass_slow, field_t,type_real,llm, name='dmass_slow', ondevice=.TRUE.) 133 CALL allocate_fields(nb_stage,f_dtheta_rhodz_slow, field_t,type_real,llm,nqdyn,name='dtheta_rhodz_fast', ondevice=.TRUE.) 134 CALL allocate_fields(nb_stage,f_du_slow, field_u,type_real,llm,name='du_slow', ondevice=.TRUE.) 135 CALL allocate_fields(nb_stage,f_du_fast, field_u,type_real,llm,name='du_fast', ondevice=.TRUE.) 132 136 CALL allocate_fields(nb_stage,f_dW_slow, field_t,type_real,llm+1,name='dW_slow') 133 137 CALL allocate_fields(nb_stage,f_dW_fast, field_t,type_real,llm+1,name='dW_fast') … … 172 176 173 177 SUBROUTINE timeloop 178 USE abort_mod 174 179 USE dissip_gcm_mod 175 180 USE sponge_mod … … 211 216 rhodz=f_rhodz(ind); mass=f_mass(ind); ps=f_ps(ind) 212 217 IF(caldyn_eta==eta_mass) THEN 213 CALL compute_rhodz(.TRUE., ps, rhodz ) ! save rhodz for transport scheme before dynamics update ps218 CALL compute_rhodz(.TRUE., ps, rhodz, ondevice=.FALSE.) ! save rhodz for transport scheme before dynamics update ps 214 219 ELSE 215 220 DO l=ll_begin,ll_end … … 244 249 CALL SYSTEM_CLOCK(start_clock, rate_clock) 245 250 !$OMP END MASTER 251 call update_device_field(f_ps) 252 call update_device_field(f_mass) 253 CALL update_device_field(f_theta_rhodz) 254 CALL update_device_field(f_u) 255 CALL update_device_field(f_q) 256 CALL update_device_field(f_geopot) 257 CALL update_device_field(f_wflux) 258 CALL update_device_field(f_rhodz) 259 246 260 247 261 DO it=itau0+1,itau0+itaumax … … 263 277 CALL wait_message(req_mass0) 264 278 CALL send_message(f_theta_rhodz,req_theta_rhodz0) 265 CALL wait_message(req_theta_rhodz0) 279 CALL wait_message(req_theta_rhodz0) 266 280 CALL send_message(f_u,req_u0) 267 281 CALL wait_message(req_u0) … … 281 295 SELECT CASE(scheme_family) 282 296 CASE(explicit) 297 CALL abort_acc("explicit_scheme") 283 298 CALL explicit_scheme(it, fluxt_zero) 284 299 CASE(hevi) … … 298 313 CALL swap_geometry(ind) 299 314 mass=f_mass(ind); ps=f_ps(ind); 300 CALL compute_rhodz(.TRUE., ps, mass )315 CALL compute_rhodz(.TRUE., ps, mass, ondevice=.TRUE.) 301 316 END DO 302 317 ENDIF … … 311 326 CALL euler_scheme(.FALSE.) ! update only u, theta 312 327 IF (iflag_sponge > 0) THEN 328 CALL abort_acc("iflag_sponge>0") 313 329 CALL sponge(f_u,f_du,f_theta_rhodz,f_dtheta_rhodz) 314 330 CALL euler_scheme(.FALSE.) ! update only u, theta … … 321 337 END IF 322 338 CALL exit_profile(id_dissip) 323 339 324 340 CALL enter_profile(id_adv) 325 341 IF(MOD(it,itau_adv)==0) THEN … … 329 345 ! At this point advect_tracer has obtained the halos of u and rhodz, 330 346 ! needed for correct computation of kinetic energy 347 IF(diagflux_on) CALL abort_acc("diagflux_on") 331 348 IF(diagflux_on) CALL diagflux_energy(adv_over_out, f_phis,f_rhodz,f_theta_rhodz,f_u, f_geopot,f_theta,f_buf_i, f_hfluxt) 332 349 … … 340 357 END DO 341 358 ENDIF 359 IF(positive_theta) CALL abort_acc("positive_theta") 342 360 IF(positive_theta) CALL copy_q_to_theta(f_theta_rhodz,f_rhodz,f_q) 343 361 END IF 344 362 CALL exit_profile(id_adv) 345 363 346 364 CALL enter_profile(id_diags) 347 365 ! IF (MOD(it,itau_physics)==0) THEN … … 360 378 361 379 IF (MOD(it,itau_check_conserv)==0) THEN 380 CALL update_host_field(f_ps) 381 CALL update_host_field(f_theta_rhodz) 382 CALL update_host_field(f_u) 383 CALL update_host_field(f_dps) 384 CALL update_host_field(f_q) 362 385 CALL check_conserve_detailed(it, AAM_dyn, & 363 386 f_ps,f_dps,f_u,f_theta_rhodz,f_phis) … … 367 390 IF (mod(it,itau_out)==0 ) THEN 368 391 CALL transfert_request(f_u,req_e1_vect) 392 CALL update_host_field(f_ps) 393 CALL update_host_field(f_mass) 394 CALL update_host_field(f_theta_rhodz) 395 CALL update_host_field(f_geopot) 396 CALL update_host_field(f_u) 397 CALL update_host_field(f_q) 369 398 CALL write_output_fields_basic(.FALSE.,f_phis, f_ps, f_mass, f_geopot, f_theta_rhodz, f_u, f_W, f_q) 370 399 ENDIF … … 374 403 END DO 375 404 405 CALL update_host_field(f_ps) 406 CALL update_host_field(f_theta_rhodz) 407 CALL update_host_field(f_u) 408 CALL update_host_field(f_q) 409 CALL update_host_field(f_geopot) 410 376 411 ! CALL write_etat0(itau0+itaumax,f_ps, f_phis,f_theta_rhodz,f_u,f_q) 377 412 CALL write_etat0(itau0+itaumax,f_ps, f_phis,f_theta_rhodz,f_u,f_q, f_geopot, f_W) 378 413 414 CALL update_host_field(f_dps) 379 415 CALL check_conserve_detailed(it, AAM_dyn, & 380 416 f_ps,f_dps,f_u,f_theta_rhodz,f_phis) -
codes/icosagcm/trunk/src/transport/advect.F90
r899 r953 49 49 !======================================================================================= 50 50 51 SUBROUTINE compute_gradq3d(qi,sqrt_leng,gradq3d,xyz_i, xyz_v)51 SUBROUTINE compute_gradq3d(qi,sqrt_leng,gradq3d,xyz_i, xyz_v) 52 52 USE trace 53 53 USE omp_para … … 68 68 REAL(rstd) :: x1,x2,x3 69 69 REAL(rstd) :: dq(3) 70 70 71 71 CALL trace_start("compute_gradq3d1") 72 72 … … 86 86 ! END DO 87 87 88 DO l = ll_begin,ll_end 88 !$acc data create(gradtri(:,:,:), arr(:), ar(:)) present(sqrt_leng(:), xyz_i(:,:), xyz_v(:,:), qi(:,:), gradq3d(:,:,:)) async 89 90 !$acc parallel loop collapse(2) async private(A, dq) 91 DO l = ll_begin,ll_end 89 92 !DIR$ SIMD 90 93 DO ij=ij_begin_ext,ij_end_ext … … 151 154 152 155 ENDDO 156 ENDDO 153 157 158 !$acc parallel loop collapse(2) async private(A, dq) 159 DO l = ll_begin,ll_end 154 160 DO ij=ij_begin_ext,ij_end_ext 155 161 … … 219 225 220 226 !DIR$ SIMD 227 !$acc parallel loop async 221 228 DO ij=ij_begin,ij_end 222 229 ar(ij) = arr(ij+z_up)+arr(ij+z_lup)+arr(ij+z_ldown)+arr(ij+z_down)+arr(ij+z_rdown)+arr(ij+z_rup)+1.e-50 … … 225 232 CALL trace_start2("compute_gradq3d2") 226 233 234 !$acc parallel loop collapse(3) async 227 235 DO k=1,3 228 236 DO l =ll_begin,ll_end … … 239 247 240 248 !============================================================================================= LIMITING 249 !$acc parallel loop collapse(2) async 241 250 DO l =ll_begin,ll_end 242 251 !DIR$ SIMD … … 251 260 minq = min(qi(ij,l),qi(ij+t_right,l),qi(ij+t_lup,l),qi(ij+t_rup,l),qi(ij+t_left,l), & 252 261 qi(ij+t_rdown,l),qi(ij+t_ldown,l)) 253 alphamx = (maxq - qi(ij,l)) ; alphamx = alphamx/(maxq_c - qi(ij,l) ) 254 alphamx = max(alphamx,0.0) 255 alphami = (minq - qi(ij,l)); alphami = alphami/(minq_c - qi(ij,l)) 256 alphami = max(alphami,0.0) 262 IF ((maxq_c - qi(ij,l)) /= 0.0) THEN 263 alphamx = (maxq - qi(ij,l)) ; alphamx = alphamx/(maxq_c - qi(ij,l) ) 264 alphamx = max(alphamx,0.0) 265 ELSE 266 alphamx = 0.0 267 ENDIF 268 IF ((minq_c - qi(ij,l)) /= 0.0) THEN 269 alphami = (minq - qi(ij,l)); alphami = alphami/(minq_c - qi(ij,l)) 270 alphami = max(alphami,0.0) 271 ELSE 272 alphami = 0.0 273 ENDIF 257 274 alpha = min(alphamx,alphami,1.0) 258 275 ! gradq3d(ij,l,:) = alpha*gradq3d(ij,l,:) … … 264 281 265 282 CALL trace_end("compute_gradq3d3") 283 284 !$acc end data 266 285 267 286 CONTAINS … … 329 348 330 349 ! Backward trajectories, for use with Miura approach 331 SUBROUTINE compute_backward_traj(normal,tangent,ue,tau, cc) 350 SUBROUTINE compute_backward_traj(normal,tangent,ue,tau,cc) 351 USE geometry, ONLY : xyz_e, de, wee, le 332 352 USE trace 333 353 USE omp_para … … 345 365 346 366 ! TODO : compute normal displacement ue*tau as hfluxt / mass(upwind) then reconstruct tangential displacement 347 367 368 !$acc data present(ue(:,:), cc(:,:,:), normal(:,:), tangent(:,:), xyz_e(:,:), de(:), wee(:,:,:), le(:)) async 369 348 370 ! reconstruct tangential wind then 3D wind at edge then cc = edge midpoint - u*tau 371 !$acc parallel loop private(up_e, v_e) collapse(2) gang vector async 349 372 DO l = ll_begin,ll_end 350 373 !DIR$ SIMD … … 397 420 ENDDO 398 421 END DO 399 422 !$acc end data 400 423 CALL trace_end("compute_backward_traj") 401 424 … … 404 427 ! Horizontal transport (S. Dubey, T. Dubos) 405 428 ! Slope-limited van Leer approach with hexagons 406 SUBROUTINE compute_advect_horiz(update_mass,diagflux_on, hfluxt,cc,gradq3d, mass, qi,qfluxt)429 SUBROUTINE compute_advect_horiz(update_mass,diagflux_on, hfluxt,cc,gradq3d, mass, qi, qfluxt) 407 430 USE trace 408 431 USE omp_para 432 USE abort_mod 433 USE geometry, only : Ai, xyz_i 409 434 IMPLICIT NONE 410 435 LOGICAL, INTENT(IN) :: update_mass, diagflux_on … … 415 440 REAL(rstd), INTENT(INOUT) :: qi(iim*jjm,llm) 416 441 REAL(rstd), INTENT(INOUT) :: qfluxt(3*iim*jjm,MERGE(llm,1,diagflux_on)) ! time-integrated tracer flux 417 418 REAL(rstd) :: dq,dmass,qe, newmass 442 ! metrics terms 443 444 REAL(rstd) :: dq,dmass,qe,newmass 419 445 REAL(rstd) :: qflux(3*iim*jjm,llm) 420 INTEGER :: ij,l 446 INTEGER :: ij,l,ij_tmp 447 448 IF(diagflux_on) CALL abort_acc("compute_advect_horiz : diagflux_on") 421 449 422 450 CALL trace_start("compute_advect_horiz") 423 451 #include "../kernels/advect_horiz.k90" 424 452 CALL trace_end("compute_advect_horiz") 453 425 454 END SUBROUTINE compute_advect_horiz 426 455 -
codes/icosagcm/trunk/src/transport/advect_tracer.F90
r933 r953 32 32 INTEGER :: ind 33 33 34 CALL allocate_field(f_normal,field_u,type_real,3, name='normal' )35 CALL allocate_field(f_tangent,field_u,type_real,3, name='tangent' )36 CALL allocate_field(f_gradq3d,field_t,type_real,llm,3, name='gradq3d' )37 CALL allocate_field(f_cc,field_u,type_real,llm,3, name='cc' )38 CALL allocate_field(f_sqrt_leng,field_t,type_real, name='sqrt_leng' )39 CALL allocate_field(f_dzqw, field_t, type_real, llm, name='dzqw' )40 CALL allocate_field(f_adzqw, field_t, type_real, llm, name='adzqw' )41 CALL allocate_field(f_dzq, field_t, type_real, llm, name='dzq' )42 CALL allocate_field(f_wq, field_t, type_real, llm+1, name='wq' )34 CALL allocate_field(f_normal,field_u,type_real,3, name='normal',ondevice=.TRUE.) 35 CALL allocate_field(f_tangent,field_u,type_real,3, name='tangent',ondevice=.TRUE.) 36 CALL allocate_field(f_gradq3d,field_t,type_real,llm,3, name='gradq3d',ondevice=.TRUE.) 37 CALL allocate_field(f_cc,field_u,type_real,llm,3, name='cc',ondevice=.TRUE.) 38 CALL allocate_field(f_sqrt_leng,field_t,type_real, name='sqrt_leng',ondevice=.TRUE.) 39 CALL allocate_field(f_dzqw, field_t, type_real, llm, name='dzqw',ondevice=.TRUE.) 40 CALL allocate_field(f_adzqw, field_t, type_real, llm, name='adzqw',ondevice=.TRUE.) 41 CALL allocate_field(f_dzq, field_t, type_real, llm, name='dzq',ondevice=.TRUE.) 42 CALL allocate_field(f_wq, field_t, type_real, llm+1, name='wq',ondevice=.TRUE.) 43 43 44 44 DO ind=1,ndomain … … 52 52 END DO 53 53 54 CALL update_device_field(f_tangent) 55 CALL update_device_field(f_normal) 56 CALL update_device_field(f_sqrt_leng) 57 54 58 END SUBROUTINE init_advect_tracer 55 59 … … 60 64 USE write_field_mod 61 65 USE tracer_mod 66 USE abort_mod 62 67 TYPE(t_field),POINTER :: f_hfluxt(:) ! time-integrated horizontal mass flux 63 68 TYPE(t_field),POINTER :: f_wfluxt(:) ! time-integrated vertical mass flux … … 138 143 END DO 139 144 140 CALL compute_backward_traj(tangent,normal,u,0.5*dt*itau_adv, cc) 141 145 CALL compute_backward_traj(tangent,normal,u,0.5*dt*itau_adv, cc) 142 146 END DO 143 147 … … 174 178 175 179 IF(frac>0.) THEN ! accumulate mass, mass flux and tracer mass 180 CALL abort_acc("frac>0.") 176 181 qmasst = f_qmasst(ind) 182 !$acc kernels default(present) async 177 183 qmasst(:,ll_begin:ll_end,k) = qmasst(:,ll_begin:ll_end,k) + & 178 184 frac*rhodz(:,ll_begin:ll_end)*q(:,ll_begin:ll_end,k) 185 !$acc end kernels 179 186 IF(k==nq_last) THEN 180 187 masst = f_masst(ind) 181 188 massfluxt = f_massfluxt(ind) 189 !$acc kernels default(present) async 182 190 masst(:,ll_begin:ll_end) = masst(:,ll_begin:ll_end)+frac*rhodz(:,ll_begin:ll_end) 183 191 massfluxt(:,ll_begin:ll_end) = massfluxt(:,ll_begin:ll_end)+hfluxt(:,ll_begin:ll_end) 192 !$acc end kernels 184 193 END IF 185 194 END IF … … 189 198 ENDIF 190 199 END DO 191 200 192 201 ! 1/2 vertical transport 193 202 !!$OMP BARRIER … … 208 217 IF (advection_scheme(k)==advect_vanleer) CALL vlz(k==nq_last, 0.5,wfluxt,rhodz, q(:,:,k),0, dzqw, adzqw, dzq, wq) 209 218 END DO 210 211 219 END DO 212 220 … … 251 259 ! finite difference of q 252 260 261 !$acc data present(q(:,:), mass(:,:), wfluxt(:,:), dzqw(:,:), adzqw(:,:), dzq(:,:), wq(:,:)) async 262 263 !$acc parallel loop async collapse(2) 253 264 DO l=ll_beginp1,ll_end 254 265 !DIR$ SIMD … … 265 276 ! dzq = slope*dz, i.e. the reconstructed q varies by dzq inside level l 266 277 278 !$acc parallel loop async collapse(2) 267 279 DO l=ll_beginp1,ll_endm1 268 280 !DIR$ SIMD … … 281 293 ! 0 slope in top and bottom layers 282 294 IF (is_omp_first_level) THEN 295 !$acc parallel loop async 283 296 DO ij=ijb,ije 284 297 dzq(ij,1)=0. … … 287 300 288 301 IF (is_omp_last_level) THEN 302 !$acc parallel loop async 289 303 DO ij=ijb,ije 290 304 dzq(ij,llm)=0. … … 297 311 ! sigw = fraction of mass that leaves level l/l+1 298 312 ! then amount of q leaving level l/l+1 = wq = w * qq 313 !$acc parallel loop collapse(2) async 299 314 DO l=ll_beginp1,ll_end 300 315 !DIR$ SIMD … … 313 328 ! wq = 0 at top and bottom 314 329 IF (is_omp_first_level) THEN 330 !$acc parallel loop async 315 331 DO ij=ijb,ije 316 332 wq(ij,1)=0. … … 319 335 320 336 IF (is_omp_last_level) THEN 337 !$acc parallel loop async 321 338 DO ij=ijb,ije 322 339 wq(ij,llm+1)=0. … … 329 346 330 347 ! update q, mass is updated only after all q's have been updated 348 !$acc parallel loop collapse(2) async 331 349 DO l=ll_begin,ll_end 332 350 !DIR$ SIMD … … 337 355 ENDDO 338 356 END DO 339 357 !$acc end data 340 358 CALL trace_end("vlz") 341 359 -
codes/icosagcm/trunk/src/vertical/disvert.f90
r898 r953 1 1 MODULE disvert_mod 2 2 USE icosa 3 USE abort_mod 3 4 REAL(rstd), SAVE, POINTER :: ap(:) 4 5 !$OMP THREADPRIVATE(ap) … … 146 147 END IF 147 148 149 !$acc enter data copyin(ap(:), bp(:), mass_dak(:), mass_dbk(:)) async 150 148 151 END SUBROUTINE init_disvert 149 152 150 SUBROUTINE compute_rhodz(comp, ps, rhodz )153 SUBROUTINE compute_rhodz(comp, ps, rhodz, ondevice) 151 154 USE icosa 152 155 USE omp_para … … 154 157 REAL(rstd), INTENT(IN) :: ps(iim*jjm) 155 158 REAL(rstd), INTENT(INOUT) :: rhodz(iim*jjm,llm) 159 LOGICAL, INTENT(IN), OPTIONAL :: ondevice ! .TRUE. compute on device, .FALSE. stay on host 156 160 REAL(rstd) :: m, err 157 161 INTEGER :: l,i,j,ij,dd 162 LOGICAL :: ondevice_ 163 158 164 err=0. 165 166 ! by default, do not compute on device 167 ondevice_ = .FALSE. 168 IF (PRESENT(ondevice)) ondevice_ = ondevice 159 169 160 170 IF(comp) THEN … … 164 174 END IF 165 175 176 !$acc data present(ps(:), rhodz(:,:), ap(:), bp(:)) async if (ondevice_) 177 166 178 IF(comp) THEN 167 DO l = ll_begin, ll_end 168 DO j=jj_begin-dd,jj_end+dd 169 DO i=ii_begin-dd,ii_end+dd 170 ij=(j-1)*iim+i 171 m = ( ap(l) - ap(l+1) + (bp(l)-bp(l+1))*ps(ij) )/g 172 rhodz(ij,l) = m 173 ENDDO 174 ENDDO 175 ENDDO 176 ELSE 179 !$acc parallel loop collapse(3) async if (ondevice_) 177 180 DO l = ll_begin, ll_end 178 181 DO j=jj_begin-dd,jj_end+dd … … 180 183 ij=(j-1)*iim+i 181 184 m = ( ap(l) - ap(l+1) + (bp(l)-bp(l+1))*ps(ij) )/g 182 err = MAX(err,abs(m-rhodz(ij,l)))185 rhodz(ij,l) = m 183 186 ENDDO 184 ENDDO 185 ENDDO 186 187 IF(err>1e-10) THEN 187 ENDDO 188 ENDDO 189 ELSE 190 !$acc parallel loop reduction(max:err) collapse(3) async if (ondevice_) 191 DO l = ll_begin, ll_end 192 DO j=jj_begin-dd,jj_end+dd 193 DO i=ii_begin-dd,ii_end+dd 194 ij=(j-1)*iim+i 195 m = ( ap(l) - ap(l+1) + (bp(l)-bp(l+1))*ps(ij) )/g 196 err = MAX(err,abs(m-rhodz(ij,l))) 197 ENDDO 198 ENDDO 199 ENDDO 200 201 !$acc wait 202 IF(err>1e-10) THEN 188 203 PRINT *, 'Discrepancy between ps and rhodz detected', err 189 204 STOP … … 191 206 ENDIF 192 207 208 !$acc end data 193 209 END SUBROUTINE compute_rhodz 194 210 195 211 196 212 SUBROUTINE write_apbp 197 213 USE icosa
Note: See TracChangeset
for help on using the changeset viewer.