Changeset 570 for branches/libIGCM_MPI_OpenMP
- Timestamp:
- 03/06/12 18:27:23 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/libIGCM_MPI_OpenMP/libIGCM_sys/libIGCM_sys_titane.ksh
r534 r570 170 170 NUM_COREPERNODE=8 171 171 172 #==================================================== 173 #- Default number of MPI task for IPSL coupled model 174 #- required for backward compatibility 175 #- 176 DEFAULT_NUM_PROC_OCE=5 177 DEFAULT_NUM_PROC_CPL=1 178 DEFAULT_NUM_PROC_ATM=26 179 DEFAULT_NUM_PROC_TOTAL=32 172 180 173 181 #D-#================================================== … … 1331 1339 } 1332 1340 1333 1334 # function IGCM_sys_build_run_file {1335 1341 ############################################################ 1336 # Build run file 1337 1338 # IGCM_debug_PushStack "IGCM_sys_build_run_file" $@ 1339 # if ( $DEBUG_sys ) ; then 1340 # echo "IGCM_sys_build_run_file " $@ 1341 # fi 1342 1343 # # set Number of processors for OCE here 1344 # NUM_PROC_OCE=5 1345 1346 # (( NUM_PROC_ATM = $BATCH_NUM_PROC_TOT - NUM_PROC_OCE - 1)) 1347 # (( nb_tot_m1 = $BATCH_NUM_PROC_TOT - NUM_PROC_OCE )) 1348 # if [ $1 = MPI1 ]; then 1349 # cat <<EOF > run_file 1350 # -np 1 ./oasis 1351 # -np ${NUM_PROC_ATM} ./lmdz.x 1352 # -np ${NUM_PROC_OCE} ./opa.xx 1353 # EOF 1354 # config_UserChoices_JobRunOptions='"--app"' 1355 # IGCM_sys_Chmod u+x run_file 1356 # fi 1357 # IGCM_debug_PopStack "IGCM_sys_build_run_file" 1358 1359 # } 1342 # Build MPI/OMP scripts run file (dummy function) 1343 1344 function IGCM_sys_build_run_file { 1345 1346 IGCM_debug_Print 3 " dummy function : IGCM_sys_build_run_file " 1347 1348 } 1360 1349 1361 1350 ############################################################ 1362 1351 # Build MPI/OMP scripts 1352 1363 1353 function IGCM_sys_build_execution_scripts 1364 1354 { 1365 1355 IGCM_debug_PushStack "IGCM_sys_build_execution_scripts" $@ 1366 1356 if ( $DEBUG_sys ) ; then 1367 echo "IGCM_sys_build_execution_scripts " $@ 1368 fi 1369 1370 typeset nodes listnodes init_node node_num_current start_num init_exec comp ExeNameIn ExeNameOut 1371 typeset nombre_restant_node node_num_current node_current comp_proc_mpi_loc comp_proc_omp_loc 1357 echo "IGCM_sys_build_execution_scripts " $@ 1358 fi 1359 typeset nodes listnodes init_node start_num init_exec comp ExeNameIn ExeNameOut 1360 typeset node_num_current node_current comp_proc_mpi_loc comp_proc_omp_loc 1372 1361 typeset num_corempi nombre_restant_node nombre_restant_comp 1373 1362 1374 # Verification with PBS parameter 1375 if [ X${BATCH_NUM_PROC_TOT} != X ] ; then 1376 1377 if ( ${OK_PARA_MPI} ) ; then 1378 EXECUTION=${HOST_MPIRUN_COMMAND} 1379 else 1380 if [ ${BATCH_NUM_PROC_TOT} -eq 1 ] ; then 1381 EXECUTION=${MPIRUN_COMMAND:="time "} 1382 fi 1383 fi 1384 else 1385 if ( ! ${OK_PARA_MPI} ) ; then 1386 EXECUTION=${MPIRUN_COMMAND:="time "} 1387 fi 1388 fi 1389 1390 if [ ${PROCESSUS_NUMBER} -gt 1 ] ; then 1363 if [ ! -f ${SUBMIT_DIR}/Job_${config_UserChoices_JobName} ] ; then 1364 IGCM_debug_Exit "IGCM_sys_titane build_execution_scripts : Job_${config_UserChoices_JobName} doesn't exist in SUBMIT_DIR : ${SUBMIT_DIR} " 1365 fi 1366 1367 if ( ${OK_PARA_MPMD} ) ; then 1368 1369 if [ -f run_file ] ; then 1370 IGCM_sys_Rm -f run_file 1371 fi 1372 touch run_file 1373 1374 if ( ${OK_PARA_OMP} ) ; then 1391 1375 1392 1376 # Hosts treatment 1393 1377 1394 ${EXECUTION} hostname | sort | uniq > hosts.tmp1378 ${HOST_MPIRUN_COMMAND} hostname | sort | uniq > hosts.tmp 1395 1379 1396 i=01397 rm -f hosts1398 IGCM_debug_Print 1 "sys Titane, Hosts avaible :"1399 for nodes in `cat hosts.tmp`1400 do1401 1402 1403 1404 1405 done1406 rm -f hosts.tmp1407 1408 listnodes=${host[*]}1409 1410 EXECUTION="${EXECUTION} -hostfile hosts"1411 1412 1380 i=0 1381 rm -f hosts 1382 IGCM_debug_Print 1 "sys Titane, Hosts avaible :" 1383 for nodes in `cat hosts.tmp` 1384 do 1385 host[$i]=$nodes 1386 echo "${host[$i]} slots=8 max_slots=8" >> hosts 1387 IGCM_debug_Print 1 ${host[$i]} 1388 i=$((i+1)) 1389 done 1390 rm -f hosts.tmp 1391 1392 listnodes=${host[*]} 1393 1394 EXECUTION="${HOST_MPIRUN_COMMAND} -hostfile hosts" 1395 # fi 1396 1413 1397 # Initialisation 1414 1398 1415 init_node=y1416 node_num_current=01417 start_num=01418 init_exec=n1399 init_node=y 1400 node_num_current=0 1401 start_num=0 1402 init_exec=n 1419 1403 1420 1404 1421 1405 # Test : if oasis is there, we put it at the first position 1406 1407 for comp in ${config_ListOfComponents[*]} ; do 1422 1408 1423 for comp in ${config_ListOfComponents[*]} ; do 1424 1425 if [ "X${comp}" = "XCPL" ] ; then 1409 if [ "X${comp}" = "XCPL" ] ; then 1410 1411 eval ExeNameIn=\${config_Executable_${comp}[0]} 1412 eval ExeNameOut=\${config_Executable_${comp}[1]} 1413 1414 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1415 echo "" >> script_${ExeNameOut}.ksh 1416 echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1417 echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh 1418 echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh 1419 echo "./${ExeNameOut} > out_${ExeNameOut}.out 2>out_${ExeNameOut}.err" >> script_${ExeNameOut}.ksh 1420 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1421 1422 init_node=n 1423 1424 (( nombre_restant_node = NUM_COREPERNODE - 1 )) 1425 node_num_current=0 1426 node_current=${host[${node_num_current}]} 1427 1428 EXECUTION="${EXECUTION} -H ${node_current} -np 1 ./script_${ExeNameOut}.ksh" 1429 1430 init_exec=y 1431 start_num=1 1432 1433 fi 1434 1435 done 1426 1436 1427 eval ExeNameIn=\${config_Executable_${comp}[0]}1428 eval ExeNameOut=\${config_Executable_${comp}[1]}1429 1430 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh1431 echo "" >> script_${ExeNameOut}.ksh1432 echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh1433 echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh1434 echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh1435 echo "./${ExeNameOut} > out_${ExeNameOut}.out 2>out_${ExeNameOut}.err" >> script_${ExeNameOut}.ksh1436 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh1437 1438 init_node=n1439 1440 (( nombre_restant_node = NUM_COREPERNODE - 1 ))1441 node_num_current=01442 node_current=${host[${node_num_current}]}1443 1444 EXECUTION="${EXECUTION} -H ${node_current} -np 1 ./script_${ExeNameOut}.ksh"1445 1446 init_exec=y1447 start_num=11448 1449 fi1450 1451 done1452 1453 1437 # Then loop on the components (except for oasis) 1454 1438 1455 for comp in ${config_ListOfComponents[*]} ; do1456 1457 eval ExeNameIn=\${config_Executable_${comp}[0]}1458 eval ExeNameOut=\${config_Executable_${comp}[1]}1459 1460 1439 for comp in ${config_ListOfComponents[*]} ; do 1440 1441 eval ExeNameIn=\${config_Executable_${comp}[0]} 1442 eval ExeNameOut=\${config_Executable_${comp}[1]} 1443 1444 1461 1445 # Only if we really have an executable for the component : 1462 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 1463 1464 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1465 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1466 1467 if ( ${OK_PARA_MPI} ) ; then 1468 1469 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1446 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 1447 1448 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1449 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1450 1451 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1470 1452 # echo "set -vx" >> script_${ExeNameOut}.ksh 1471 echo "" >> script_${ExeNameOut}.ksh1472 echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh1473 echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh1474 echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh1475 echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh1476 echo "(( MYMPIRANK = OMPI_COMM_WORLD_RANK - ${start_num})) " >> script_${ExeNameOut}.ksh1477 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${MYMPIRANK} 2>out_${ExeNameOut}.err.\${MYMPIRANK}" >> script_${ExeNameOut}.ksh1478 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh1479 1480 node_num=01481 1453 echo "" >> script_${ExeNameOut}.ksh 1454 echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1455 echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh 1456 echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh 1457 echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1458 echo "(( MYMPIRANK = OMPI_COMM_WORLD_RANK - ${start_num})) " >> script_${ExeNameOut}.ksh 1459 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${MYMPIRANK} 2>out_${ExeNameOut}.err.\${MYMPIRANK}" >> script_${ExeNameOut}.ksh 1460 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1461 1462 node_num=0 1463 1482 1464 # We define the number of MPI process to be assigned for the component 1483 1465 1484 nombre_restant_comp=${comp_proc_mpi_loc}1485 1466 nombre_restant_comp=${comp_proc_mpi_loc} 1467 1486 1468 # Loop on the allocated nodes 1487 1469 1488 for node in ${listnodes} ; do1470 for node in ${listnodes} ; do 1489 1471 1490 1472 # We go to the current node 1491 1473 1492 1493 1494 node_current=${host[${node_num_current}]}1474 if [ ${node_num} = ${node_num_current} ] ; then 1475 1476 node_current=${host[${node_num_current}]} 1495 1477 1496 1478 # If first time on the node : initialisation 1497 1479 1498 if [ ${init_node} = y ] ; then 1499 nombre_restant_node=${NUM_COREPERNODE} 1480 if [ ${init_node} = y ] ; then 1481 nombre_restant_node=${NUM_COREPERNODE} 1482 fi 1483 1484 # Test on the number of OMP threads 1485 1486 if [ ${comp_proc_omp_loc} -gt ${nombre_restant_node} ] ; then 1487 (( node_num = node_num + 1 )) 1488 node_num_current=${node_num} 1489 init_node=y 1490 continue 1491 fi 1492 1493 # Number of MPI process to assign 1494 1495 (( num_corempi = nombre_restant_node / comp_proc_omp_loc )) 1496 1497 if [ ${num_corempi} -gt ${nombre_restant_comp} ] ; then 1498 num_corempi=${nombre_restant_comp} 1499 fi 1500 1501 (( nombre_restant_node = nombre_restant_node - num_corempi * comp_proc_omp_loc )) 1502 (( nombre_restant_comp = nombre_restant_comp - num_corempi )) 1503 1504 if [ ${init_exec} = y ] ; then 1505 EXECUTION="${EXECUTION} : -H ${node_current} -np ${num_corempi} ./script_${ExeNameOut}.ksh" 1506 else 1507 EXECUTION="${EXECUTION} -H ${node_current} -np ${num_corempi} ./script_${ExeNameOut}.ksh" 1508 init_exec=y 1509 fi 1510 1511 (( start_num = num_corempi + start_num )) 1512 1513 else 1514 1515 (( node_num = node_num + 1 )) 1516 continue 1500 1517 fi 1501 1502 # Test on the number of OMP threads1503 1504 if [ ${ comp_proc_omp_loc} -gt ${nombre_restant_node}] ; then1518 1519 # Test on the number of core/process remaining on the node/component 1520 1521 if [ ${nombre_restant_node} = 0 ] ; then 1505 1522 (( node_num = node_num + 1 )) 1506 1523 node_num_current=${node_num} 1507 1524 init_node=y 1508 continue 1525 1526 if [ ${nombre_restant_comp} = 0 ] ; then 1527 break 1 1528 fi 1529 else 1530 1531 node_num_current=${node_num} 1532 init_node=n 1533 1534 if [ ${nombre_restant_comp} = 0 ] ; then 1535 break 1 1536 fi 1509 1537 fi 1510 1511 # Number of MPI process to assign 1512 1513 (( num_corempi = nombre_restant_node / comp_proc_omp_loc )) 1514 1515 if [ ${num_corempi} -gt ${nombre_restant_comp} ] ; then 1516 num_corempi=${nombre_restant_comp} 1517 fi 1518 1519 (( nombre_restant_node = nombre_restant_node - num_corempi * comp_proc_omp_loc )) 1520 (( nombre_restant_comp = nombre_restant_comp - num_corempi )) 1538 done 1539 fi 1540 done 1541 1542 else 1543 1544 # Then first loop on the components for the coupler ie oasis 1545 1546 ### the coupler ie oasis must be the first one 1547 for comp in ${config_ListOfComponents[*]} ; do 1548 1549 eval ExeNameOut=\${config_Executable_${comp}[1]} 1550 1551 # for CPL component only 1552 if [ "X${comp}" = "XCPL" ] ; then 1553 1554 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1521 1555 1522 if [ ${init_exec} = y ] ; then 1523 EXECUTION="${EXECUTION} : -H ${node_current} -np ${num_corempi} ./script_${ExeNameOut}.ksh" 1524 else 1525 EXECUTION="${EXECUTION} -H ${node_current} -np ${num_corempi} ./script_${ExeNameOut}.ksh" 1526 init_exec=y 1527 fi 1528 1529 (( start_num = num_corempi + start_num )) 1530 1556 if ( ${OK_PARA_MPI} ) ; then 1557 (( mpi_count = 1 )) 1558 until [ ${mpi_count} -gt ${comp_proc_mpi_loc} ] ; do 1559 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut}" >> run_file 1560 (( mpi_count = mpi_count + 1 )) 1561 done 1531 1562 else 1532 1533 (( node_num = node_num + 1 )) 1534 continue 1563 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut} " >> run_file 1535 1564 fi 1565 fi 1566 done 1567 1568 # Then second loop on the components 1569 1570 for comp in ${config_ListOfComponents[*]} ; do 1536 1571 1537 # Test on the number of core/process remaining on the node/component 1572 eval ExeNameOut=\${config_Executable_${comp}[1]} 1538 1573 1539 if [ ${nombre_restant_node} = 0 ] ; then 1540 (( node_num = node_num + 1 )) 1541 node_num_current=${node_num} 1542 init_node=y 1543 1544 if [ ${nombre_restant_comp} = 0 ] ; then 1545 break 1 1546 fi 1574 # Only if we really have an executable for the component and not the coupler ie oasis: 1575 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 1576 1577 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1578 1579 if ( ${OK_PARA_MPI} ) ; then 1580 1581 (( mpi_count = 1 )) 1582 until [ ${mpi_count} -gt ${comp_proc_mpi_loc} ] ; do 1583 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut}" >> run_file 1584 (( mpi_count = mpi_count + 1 )) 1585 done 1547 1586 else 1548 1549 node_num_current=${node_num} 1550 init_node=n 1551 1552 if [ ${nombre_restant_comp} = 0 ] ; then 1553 break 1 1554 fi 1587 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut} " >> run_file 1555 1588 fi 1556 done 1557 else 1558 EXECUTION="${EXECUTION} ./${ExeNameOut}" 1589 fi 1590 done 1591 IGCM_sys_Chmod u+x run_file 1592 1593 EXECUTION="${HOST_MPIRUN_COMMAND} --app ./run_file" 1594 1595 fi 1596 1597 else # Only one executable. launch it. 1598 1599 for comp in ${config_ListOfComponents[*]} ; do 1600 1601 # Only if we really have an executable for the component : 1602 eval ExeNameOut=\${config_Executable_${comp}[1]} 1603 if ( [ "X${ExeNameOut}" != X\"\" ] ) ; then 1604 1605 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1606 echo "" >> script_${ExeNameOut}.ksh 1607 if ( ${OK_PARA_OMP} ) ; then 1608 echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1609 fi 1610 if ( ${OK_PARA_MPI} ) ; then 1611 # Attention : a voir si sur 1 proc mais non // 1612 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${OMPI_COMM_WORLD_RANK} 2>out_${ExeNameOut}.err.\${OMPI_COMM_WORLD_RANK}" >> script_${ExeNameOut}.ksh 1613 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1614 EXECUTION="${HOST_MPIRUN_COMMAND} ./script_${ExeNameOut}.ksh" 1615 else 1616 EXECUTION="time ./${ExeNameOut}" 1617 fi 1559 1618 fi 1560 fi 1561 done 1562 1563 IGCM_debug_Print 1 "sys Titane : La commande d execution est " 1564 IGCM_debug_Print 1 $EXECUTION 1565 1566 IGCM_debug_PopStack "IGCM_sys_build_execution_scripts" 1619 done 1620 1621 fi 1622 1623 IGCM_debug_Print 1 "sys Titane : La commande d execution est " 1624 IGCM_debug_Print 1 $EXECUTION 1625 1626 IGCM_debug_PopStack "IGCM_sys_build_execution_scripts" 1567 1627 } 1568 1628
Note: See TracChangeset
for help on using the changeset viewer.