Changeset 962 for codes/icosagcm/trunk/src
- Timestamp:
- 07/25/19 11:36:36 (5 years ago)
- Location:
- codes/icosagcm/trunk/src
- Files:
-
- 1 added
- 1 deleted
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
codes/icosagcm/trunk/src/parallel/omp_para.F90
r604 r962 59 59 60 60 CONTAINS 61 62 FUNCTION get_omp_size() result(omp_size) 63 #ifdef CPP_USING_OMP 64 use omp_lib, only : omp_get_max_threads 65 integer :: omp_size 66 omp_size = omp_get_max_threads() 67 #else 68 integer :: omp_size 69 omp_size = 1 70 #endif 71 END FUNCTION 61 72 62 73 SUBROUTINE init_omp_para(is_mpi_master) -
codes/icosagcm/trunk/src/parallel/transfert_mpi.f90
r953 r962 88 88 END INTERFACE 89 89 90 integer :: profile_mpi_copies, profile_mpi_waitall, profile_mpi_omp_barrier 91 90 92 CONTAINS 91 93 … … 103 105 104 106 CALL register_id('MPI', id_mpi) 107 CALL register_id('MPI_copies', profile_mpi_copies) 108 CALL register_id('MPI_waitall', profile_mpi_waitall) 109 CALL register_id('MPI_omp_barrier', profile_mpi_omp_barrier) 105 110 106 111 CALL create_request(field_t,req_i1) … … 1068 1073 1069 1074 1070 IF (field(1)%data_type==type_real) THEN 1071 1072 IF (field(1)%ndim==2) THEN 1073 1074 DO ireq=1,message%nreq 1075 CALL free_mpi_buffer(message%buffers(ireq)%r) 1076 ENDDO 1077 1078 ELSE IF (field(1)%ndim==3) THEN 1079 1080 DO ireq=1,message%nreq 1081 CALL free_mpi_buffer(message%buffers(ireq)%r) 1082 ENDDO 1083 1084 ELSE IF (field(1)%ndim==4) THEN 1085 1086 DO ireq=1,message%nreq 1087 CALL free_mpi_buffer(message%buffers(ireq)%r) 1088 ENDDO 1089 1090 ENDIF 1075 IF (message%field(1)%data_type==type_real) THEN 1076 DO ireq=1,message%nreq 1077 CALL free_mpi_buffer(message%buffers(ireq)%r) 1078 ENDDO 1091 1079 ENDIF 1092 1080 … … 1220 1208 if( field(1)%ondevice .AND. .NOT. message%ondevice ) call update_device_message_mpi(message) 1221 1209 1210 CALL enter_profile(profile_mpi_omp_barrier) 1222 1211 !$OMP BARRIER 1212 CALL exit_profile(profile_mpi_omp_barrier) 1223 1213 1224 1214 … … 1241 1231 ENDIF 1242 1232 !$OMP END MASTER 1233 CALL enter_profile(profile_mpi_omp_barrier) 1243 1234 !$OMP BARRIER 1235 CALL exit_profile(profile_mpi_omp_barrier) 1244 1236 1245 1237 IF (field(1)%data_type==type_real) THEN … … 1263 1255 offset=send%offset 1264 1256 msize=send%size 1257 call enter_profile(profile_mpi_copies) 1265 1258 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1266 1259 DO n=1,msize 1267 1260 buffer_r(offset+n)=rval2d(value(n)) 1268 1261 ENDDO 1262 call exit_profile(profile_mpi_copies) 1269 1263 1270 1264 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN … … 1299 1293 sgn=>recv%sign 1300 1294 msize=recv%size 1295 call enter_profile(profile_mpi_copies) 1301 1296 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1302 1297 DO n=1,msize 1303 1298 rval2d(value(n))=src_rval2d(src_value(n))*sgn(n) 1304 1299 ENDDO 1305 1300 call exit_profile(profile_mpi_copies) 1306 1301 1307 1302 ELSE … … 1353 1348 msize=send%size 1354 1349 moffset=send%offset 1355 CALL trace_start("copy_to_buffer")1350 call enter_profile(profile_mpi_copies) 1356 1351 1357 1352 !$acc parallel loop default(present) async if (field(ind)%ondevice) … … 1363 1358 ENDDO 1364 1359 ENDDO 1365 CALL trace_end("copy_to_buffer")1360 call exit_profile(profile_mpi_copies) 1366 1361 1367 1362 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN 1368 !$OMP BARRIER 1363 CALL enter_profile(profile_mpi_omp_barrier) 1364 !$OMP BARRIER 1365 CALL exit_profile(profile_mpi_omp_barrier) 1366 1369 1367 ENDIF 1370 1368 … … 1384 1382 ELSE 1385 1383 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN 1386 !$OMP BARRIER 1384 CALL enter_profile(profile_mpi_omp_barrier) 1385 !$OMP BARRIER 1386 CALL exit_profile(profile_mpi_omp_barrier) 1387 1387 1388 ENDIF 1388 1389 ENDIF … … 1391 1392 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN 1392 1393 DO isend=req%nsend+1,max_req 1393 !$OMP BARRIER 1394 CALL enter_profile(profile_mpi_omp_barrier) 1395 !$OMP BARRIER 1396 CALL exit_profile(profile_mpi_omp_barrier) 1397 1394 1398 ENDDO 1395 1399 ENDIF … … 1414 1418 msize=recv%size 1415 1419 1420 call enter_profile(profile_mpi_copies) 1416 1421 CALL trace_start("copy_data") 1417 1422 !$acc parallel loop collapse(2) default(present) async if (field(ind)%ondevice) … … 1421 1426 ENDDO 1422 1427 ENDDO 1428 call exit_profile(profile_mpi_copies) 1423 1429 CALL trace_end("copy_data") 1424 1430 … … 1473 1479 moffset=send%offset 1474 1480 1481 call enter_profile(profile_mpi_copies) 1475 1482 CALL trace_start("copy_to_buffer") 1476 1483 !$acc parallel loop default(present) collapse(2) async if (field(ind)%ondevice) … … 1486 1493 ENDDO 1487 1494 CALL trace_end("copy_to_buffer") 1495 call exit_profile(profile_mpi_copies) 1488 1496 1489 1497 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN 1490 !$OMP BARRIER 1498 CALL enter_profile(profile_mpi_omp_barrier) 1499 !$OMP BARRIER 1500 CALL exit_profile(profile_mpi_omp_barrier) 1501 1491 1502 ENDIF 1492 1503 … … 1506 1517 ELSE 1507 1518 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN 1508 !$OMP BARRIER 1519 CALL enter_profile(profile_mpi_omp_barrier) 1520 !$OMP BARRIER 1521 CALL exit_profile(profile_mpi_omp_barrier) 1522 1509 1523 ENDIF 1510 1524 ENDIF … … 1513 1527 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN 1514 1528 DO isend=req%nsend+1,max_req 1515 !$OMP BARRIER 1529 CALL enter_profile(profile_mpi_omp_barrier) 1530 !$OMP BARRIER 1531 CALL exit_profile(profile_mpi_omp_barrier) 1532 1516 1533 ENDDO 1517 1534 ENDIF … … 1527 1544 rval4d=>field(ind)%rval4d 1528 1545 req=>message%request(ind) 1529 1546 1530 1547 DO irecv=1,req%nrecv 1531 1548 recv=>req%recv(irecv) … … 1536 1553 sgn=>recv%sign 1537 1554 msize=recv%size 1555 call enter_profile(profile_mpi_copies) 1538 1556 CALL trace_start("copy_data") 1539 1540 1557 !$acc parallel loop collapse(3) default(present) async if (field(ind)%ondevice) 1541 1558 DO d4=1,dim4 … … 1546 1563 ENDDO 1547 1564 ENDDO 1548 1565 call exit_profile(profile_mpi_copies) 1549 1566 CALL trace_end("copy_data") 1550 1567 … … 1573 1590 1574 1591 IF (mpi_threading_mode==MPI_THREAD_FUNNELED .OR. mpi_threading_mode==MPI_THREAD_SINGLE) THEN 1592 CALL enter_profile(profile_mpi_omp_barrier) 1575 1593 !$acc wait 1576 1594 !$OMP BARRIER 1595 CALL exit_profile(profile_mpi_omp_barrier) 1577 1596 !$OMP MASTER 1578 1597 … … 1612 1631 ENDIF 1613 1632 ENDIF 1614 1633 CALL enter_profile(profile_mpi_omp_barrier) 1615 1634 !$OMP BARRIER 1635 CALL exit_profile(profile_mpi_omp_barrier) 1636 1616 1637 ! CALL trace_end("send_message_mpi") 1617 1638 … … 1672 1693 IF (field(1)%ndim==2) THEN 1673 1694 1674 !$OMP MASTER 1675 IF (.NOT. message%completed) CALL MPI_WAITALL(nreq,message%mpi_req, & 1695 call enter_profile(profile_mpi_waitall) 1696 !$OMP MASTER 1697 IF (.NOT. message%completed) CALL MPI_WAITALL(nreq,message%mpi_req, & 1676 1698 message%status,ierr) 1677 1699 !$OMP END MASTER 1678 1700 !$OMP BARRIER 1679 1701 call exit_profile(profile_mpi_waitall) 1702 call enter_profile(profile_mpi_copies) 1680 1703 DO ind=1,ndomain 1681 1704 IF (.NOT. assigned_domain(ind) .OR. .NOT. is_omp_level_master) CYCLE … … 1701 1724 1702 1725 ENDDO 1726 call exit_profile(profile_mpi_copies) 1703 1727 1704 1728 1705 1729 ELSE IF (field(1)%ndim==3) THEN 1706 1730 call enter_profile(profile_mpi_waitall) 1707 1731 !$OMP MASTER 1708 IF (.NOT. message%completed) CALL MPI_WAITALL(nreq,message%mpi_req, &1709 message%status,ierr)1732 IF (.NOT. message%completed) CALL MPI_WAITALL(nreq,message%mpi_req, & 1733 message%status,ierr) 1710 1734 !$OMP END MASTER 1711 1735 !$OMP BARRIER 1736 call exit_profile(profile_mpi_waitall) 1712 1737 1713 1738 … … 1730 1755 msize=recv%size 1731 1756 moffset=recv%offset 1757 call enter_profile(profile_mpi_copies) 1732 1758 CALL trace_start("copy_from_buffer") 1733 1759 … … 1753 1779 1754 1780 CALL trace_end("copy_from_buffer") 1781 call exit_profile(profile_mpi_copies) 1755 1782 ENDIF 1756 1783 ENDDO … … 1759 1786 1760 1787 ELSE IF (field(1)%ndim==4) THEN 1788 call enter_profile(profile_mpi_waitall) 1761 1789 !$OMP MASTER 1762 1790 IF (.NOT. message%completed) CALL MPI_WAITALL(nreq,message%mpi_req, & … … 1764 1792 !$OMP END MASTER 1765 1793 !$OMP BARRIER 1794 call exit_profile(profile_mpi_waitall) 1766 1795 1767 1796 … … 1784 1813 msize=recv%size 1785 1814 moffset=recv%offset 1815 call enter_profile(profile_mpi_copies) 1786 1816 CALL trace_start("copy_from_buffer") 1787 1817 !$acc parallel loop default(present) collapse(2) async if (field(ind)%ondevice) … … 1797 1827 ENDDO 1798 1828 CALL trace_end("copy_from_buffer") 1829 call exit_profile(profile_mpi_copies) 1799 1830 ENDIF 1800 1831 ENDDO -
codes/icosagcm/trunk/src/time/timeloop_gcm.F90
r953 r962 45 45 IF (.NOT. enable_io) itau_out=HUGE(itau_out) 46 46 47 itau_prof=100 47 itau_prof=1000 48 48 CALL getin('itau_profiling',itau_prof) 49 49 … … 257 257 CALL update_device_field(f_wflux) 258 258 CALL update_device_field(f_rhodz) 259 call reset_profiling() 259 260 260 261 261 262 DO it=itau0+1,itau0+itaumax 262 IF (is_master)CALL print_iteration(it, itau0, itaumax, start_clock, rate_clock)263 CALL print_iteration(it, itau0, itaumax, start_clock, rate_clock) 263 264 264 265 CALL enter_profile(id_timeloop) … … 432 433 INTEGER(kind=8) :: start_clock, stop_clock, rate_clock 433 434 REAL :: per_step,total, elapsed 434 WRITE(*,'(A,I7,A,F14.1)') "It No :",it," t :",dt*it 435 IF(MOD(it,10)==0) THEN 436 CALL SYSTEM_CLOCK(stop_clock) 437 elapsed = (stop_clock-start_clock)*1./rate_clock 438 per_step = elapsed/(it-itau0) 439 throughput = INT(dt/per_step) 440 total = per_step*itaumax 441 WRITE(*,'(A,I5,A,F6.2,A,I6)') 'Time spent (s):',INT(elapsed), & 442 ' -- ms/step : ', 1000*per_step, & 443 ' -- Throughput :', throughput 444 WRITE(*,'(A,I5,A,I5)') 'Whole job (min) :', INT(total/60.), & 445 ' -- Completion in (min) : ', INT((total-elapsed)/60.) 435 436 IF(is_master) THEN 437 WRITE(*,'(A,I7,A,F14.1)') "It No :",it," t :",dt*it 438 IF(MOD(it,10)==0) THEN 439 CALL SYSTEM_CLOCK(stop_clock) 440 elapsed = (stop_clock-start_clock)*1./rate_clock 441 per_step = elapsed/(it-itau0) 442 throughput = INT(dt/per_step) 443 total = per_step*itaumax 444 WRITE(*,'(A,I5,A,F6.2,A,I6)') 'Time spent (s):',INT(elapsed), & 445 ' -- ms/step : ', 1000*per_step, & 446 ' -- Throughput :', throughput 447 WRITE(*,'(A,I5,A,I5)') 'Whole job (min) :', INT(total/60.), & 448 ' -- Completion in (min) : ', INT((total-elapsed)/60.) 449 END IF 446 450 END IF 447 451 IF(MOD(it,itau_prof)==0) CALL print_profile
Note: See TracChangeset
for help on using the changeset viewer.