Changeset 490 for branches/libIGCM_MPI_OpenMP
- Timestamp:
- 06/16/11 12:31:47 (13 years ago)
- Location:
- branches/libIGCM_MPI_OpenMP
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/libIGCM_MPI_OpenMP/AA_job
r476 r490 440 440 441 441 echo "========================================================================" 442 if ( [ X${BATCH_NUM_PROC_TOT} != X ] && [ "${BATCH_NUM_PROC_TOT}" -gt 1 ] ) ; then443 MPIRUN_COMMAND=${HOST_MPIRUN_COMMAND}444 eval MPIRUN_OPTIONS=${config_UserChoices_JobRunOptions}445 else446 MPIRUN_COMMAND=${MPIRUN_COMMAND:="time "}447 MPIRUN_OPTIONS=${MPIRUN_OPTIONS:=""}448 fi449 450 442 if [ ${DRYRUN} -le 1 ] ; then 451 443 REAL_DATE_INIT=$( date ) 452 echo "EXECUTION of : ${MPIRUN_COMMAND} ${MPIRUN_OPTIONS} ./${config_Executable_Name} > ${Exe_Output} 2>&1"453 444 echo > ${Exe_Output} 454 445 echo "#######################################" >> ${Exe_Output} 455 echo "EXECUTION of : ${ MPIRUN_COMMAND} ${MPIRUN_OPTIONS} ./${config_Executable_Name}">> ${Exe_Output}446 echo "EXECUTION of : ${EXECUTION}" >> ${Exe_Output} 456 447 echo >> ${Exe_Output} 457 448 typeset RET 458 449 RUN_DATE_BEGIN=$( date '+%Y-%m-%dT%H:%M:%S' ) 459 ${ MPIRUN_COMMAND} ${MPIRUN_OPTIONS} ./${config_Executable_Name}>> ${Exe_Output} 2>&1450 ${EXECUTION} >> ${Exe_Output} 2>&1 460 451 RET=$? 461 452 RUN_DATE_END=$( date '+%Y-%m-%dT%H:%M:%S' ) … … 473 464 echo >> ${Exe_Output} 474 465 else 475 echo "EXECUTION of : ${MPIRUN_COMMAND} ${MPIRUN_OPTIONS} ./${config_Executable_Name} simulated for DRYRUN = " $DRYRUN 466 echo "EXECUTION of : ${EXECUTION} simulated for DRYRUN = " $DRYRUN 467 echo "EXECUTION of : ${EXECUTION} simulated for DRYRUN = " $DRYRUN > ${Exe_Output} 476 468 if ( $DEBUG_debug ) ; then 477 469 echo "FOR EXECUTION DRYRUN mode = " $DRYRUN >> stack -
branches/libIGCM_MPI_OpenMP/libIGCM_config/libIGCM_config.ksh
r473 r490 108 108 eval Script_Output_Prefix=${config_UserChoices_Script_Output_Prefix:='Script_Output'} 109 109 IGCM_debug_Print 3 "Script_Output_Prefix = ${Script_Output_Prefix}" 110 eval Exe_Output=out_${config_Executable_Name} 111 IGCM_debug_Print 3 "Exe_Output = ${Exe_Output}" 110 #MM : obsolete ! 111 # eval Exe_Output=out_${config_Executable_Name} 112 eval Exe_Output=out_execution 113 # IGCM_debug_Print 3 "Exe_Output = ${Exe_Output}" 112 114 113 115 #================================== … … 536 538 fi 537 539 540 typeset ExeNameIn 541 typeset comp i 542 typeset tempvar tempvarMPI tempvarNOD NbElts j 543 544 (( PROCESSUS_NUMBER = 0 )) 545 (( i=0 )) 546 547 OK_PARA_MPI=false 548 OK_PARA_OMP=false 549 OK_PARA_NOD=false 550 for comp in ${config_ListOfComponents[*]} ; do 551 552 IGCM_debug_Print 1 ${comp} 553 554 eval ExeNameIn=\${config_Executable_${comp}[0]} 555 556 # NO order in config.card for parallelized values ! 557 # just use suffix : MPI , OMP and NOD (for number of NODes.) 558 559 # NOD is the number of NODes allocated 560 eval ${comp}_PROC_NOD=0 561 562 # MPI is the number of MPI processus per nodes 563 eval ${comp}_PROC_MPI=0 564 565 # OMP is the number of OpenMP threads per MPI processus 566 eval ${comp}_PROC_OMP=0 567 568 # Only if we really have an executable for the component : 569 if [ X${ExeNameIn} != X\"\" ] ; then 570 eval NbElts=\${#config_Executable_${comp}[@]} 571 if [ ${NbElts} -gt 2 ] ; then 572 (( j = 2 )) 573 eval ${comp}_PROC_MPI=1 574 eval ${comp}_PROC_OMP=1 575 eval ${comp}_PROC_NOD=1 576 577 while [ $j -lt ${NbElts} ] ; do 578 eval tempvar=\${config_Executable_${comp}[${j}]} 579 580 echo 581 #MM BUG impossible ?! 582 if [ X${tempvar} == X ] ; then 583 echo "Error reading MPI/OMP parameters !!!" 584 echo "Check your config.card." 585 # MM : Ã enlever : 586 exit 1 587 #break 588 fi 589 590 # pb eval ${comp}_PROC_MPI=1 591 # pb eval ${comp}_PROC_OMP=1 592 # pb eval ${comp}_PROC_NOD=1 593 594 eval IGCM_debug_Print 2 ${tempvar} 595 596 case ${tempvar} in 597 *MPI) 598 # Read MPI parameter for composante 599 eval ${comp}_PROC_MPI=$( echo ${tempvar} | sed -e "s/MPI//" ) 600 OK_PARA_MPI=true;; 601 *OMP) 602 # Read OMP parameter for composante 603 eval ${comp}_PROC_OMP=$( echo ${tempvar} | sed -e "s/OMP//" ) 604 OK_PARA_OMP=true;; 605 *NOD) 606 # Read NOD (NumBer of Nodes) parameter for composante 607 eval ${comp}_PROC_NOD=$( echo ${tempvar} | sed -e "s/NOD//" ) 608 OK_PARA_NOD=true 609 OK_PARA_MPI=true;; 610 esac 611 612 (( j = j + 1 )) 613 done 614 615 eval tempvarMPI=\${${comp}_PROC_MPI} 616 eval tempvarNOD=\${${comp}_PROC_NOD} 617 eval tempvarOMP=\${${comp}_PROC_OMP} 618 (( PROCESSUS_NUMBER = PROCESSUS_NUMBER + tempvarMPI * tempvarNOD * tempvarOMP )) 619 fi 620 fi 621 622 (( i=i+1 )) 623 done 624 625 # sequential case ! 626 if [ ${PROCESSUS_NUMBER} -eq 0 ] ; then 627 (( PROCESSUS_NUMBER = 1 )) 628 echo "PROCESSUS_NUMBER is all 0 (sequential use of old definition in config->Executable->list)." 629 echo "We set it to 1." 630 fi 631 632 IGCM_debug_Print 1 "MPI/OMP treatment PROCESSUS_NUMBER = ${PROCESSUS_NUMBER}" 633 634 # Verification with PBS parameter 635 if [ X${BATCH_NUM_PROC_TOT} != X ] ; then 636 if [ ${BATCH_NUM_PROC_TOT} -ne ${PROCESSUS_NUMBER} ] ; then 637 echo "Warning with parallelization parameters !" 638 echo "PBS variable BATCH_NUM_PROC_TOT = ${BATCH_NUM_PROC_TOT} " 639 echo " is the total number of _processors_ reserved." 640 echo "It is not equal to the sum of _processus_ in config.card = ${PROCESSUS_NUMBER}." 641 fi 642 if ( ! ${OK_PARA_MPI} ) ; then 643 if [ ${BATCH_NUM_PROC_TOT} -gt 1 ] ; then 644 echo "You have given a reservation of ${BATCH_NUM_PROC_TOT} processors," 645 echo "but you havn't filled the parallel parameter in config->Executable->list." 646 echo "This is the old method and it is no more supported." 647 echo "Please read documentation or config specific comments." 648 echo "We stop now." 649 exit 1 650 else 651 PROCESSUS_NUMBER=1 652 fi 653 fi 654 else 655 if ( ${OK_PARA_MPI} ) ; then 656 echo "You have not given a value of ${BATCH_NUM_PROC_TOT} processors," 657 echo "but you have filled the parallel parameter in config->Executable->list." 658 echo "This is the old method and it is no more supported." 659 echo "Please read documentation or config specific comments." 660 echo "We stop now." 661 exit 1 662 fi 663 fi 664 665 # if ( ${OK_PARA_MPI} || ${OK_PARA_OMP} || ${OK_PARA_NOD} ) ; then 666 IGCM_sys_build_execution_scripts 667 # fi 668 538 669 ExecutionFail=false 539 670 -
branches/libIGCM_MPI_OpenMP/libIGCM_sys/libIGCM_sys_titane.ksh
r468 r490 167 167 #- Max number of arguments passed to nco operator or demigration command 168 168 UNIX_MAX_LIMIT=360 169 170 NUM_COREPERNODE=8 171 169 172 170 173 #D-#================================================== … … 1266 1269 echo "IGCM_sys_activ_variables" 1267 1270 fi 1268 ulimit -s 2097152 1271 # ulimit -s 2097152 1272 1273 typeset max_omp 1274 1275 ulimit -s unlimited 1276 1277 (( max_omp = 0 )) 1278 for comp in ${config_ListOfComponents[*]} ; do 1279 1280 eval ExeNameIn=\${config_Executable_${comp}[0]} 1281 eval ExeNameOut=\${config_Executable_${comp}[1]} 1282 1283 1284 # Only if we really have an executable for the component : 1285 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 1286 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1287 1288 if [ ${comp_proc_omp_loc} -gt ${max_omp} ] ; then 1289 1290 (( max_omp = comp_proc_omp_loc )) 1291 fi 1292 fi 1293 done 1294 if [ ${max_omp} -gt 1 ] ; then 1295 module load openmp/${max_omp}thds 1296 fi 1297 1269 1298 IGCM_debug_PopStack "IGCM_sys_activ_variables" 1270 1299 } … … 1281 1310 } 1282 1311 1312 1313 # function IGCM_sys_build_run_file { 1283 1314 ############################################################ 1284 1315 # Build run file 1285 1316 1286 function IGCM_sys_build_run_file { 1287 IGCM_debug_PushStack "IGCM_sys_build_run_file" $@ 1288 if ( $DEBUG_sys ) ; then 1289 echo "IGCM_sys_build_run_file " $@ 1290 fi 1291 1292 # set Number of processors for OCE here 1293 NUM_PROC_OCE=5 1294 1295 (( NUM_PROC_ATM = $BATCH_NUM_PROC_TOT - NUM_PROC_OCE - 1)) 1296 (( nb_tot_m1 = $BATCH_NUM_PROC_TOT - NUM_PROC_OCE )) 1297 if [ $1 = MPI1 ]; then 1298 cat <<EOF > run_file 1299 -np 1 ./oasis 1300 -np ${NUM_PROC_ATM} ./lmdz.x 1301 -np ${NUM_PROC_OCE} ./opa.xx 1302 EOF 1303 config_UserChoices_JobRunOptions='"--app"' 1304 IGCM_sys_Chmod u+x run_file 1305 fi 1306 IGCM_debug_PopStack "IGCM_sys_build_run_file" 1317 # IGCM_debug_PushStack "IGCM_sys_build_run_file" $@ 1318 # if ( $DEBUG_sys ) ; then 1319 # echo "IGCM_sys_build_run_file " $@ 1320 # fi 1321 1322 # # set Number of processors for OCE here 1323 # NUM_PROC_OCE=5 1324 1325 # (( NUM_PROC_ATM = $BATCH_NUM_PROC_TOT - NUM_PROC_OCE - 1)) 1326 # (( nb_tot_m1 = $BATCH_NUM_PROC_TOT - NUM_PROC_OCE )) 1327 # if [ $1 = MPI1 ]; then 1328 # cat <<EOF > run_file 1329 # -np 1 ./oasis 1330 # -np ${NUM_PROC_ATM} ./lmdz.x 1331 # -np ${NUM_PROC_OCE} ./opa.xx 1332 # EOF 1333 # config_UserChoices_JobRunOptions='"--app"' 1334 # IGCM_sys_Chmod u+x run_file 1335 # fi 1336 # IGCM_debug_PopStack "IGCM_sys_build_run_file" 1307 1337 1308 } 1338 # } 1339 1340 ############################################################ 1341 # Build MPI/OMP scripts 1342 function IGCM_sys_build_execution_scripts 1343 { 1344 IGCM_debug_PushStack "IGCM_sys_build_execution_scripts" $@ 1345 if ( $DEBUG_sys ) ; then 1346 echo "IGCM_sys_build_execution_scripts " $@ 1347 fi 1348 1349 # Verification with PBS parameter 1350 if [ X${BATCH_NUM_PROC_TOT} != X ] ; then 1351 1352 if ( ${OK_PARA_MPI} ) ; then 1353 EXECUTION=${HOST_MPIRUN_COMMAND} 1354 else 1355 if [ ${BATCH_NUM_PROC_TOT} -eq 1 ] ; then 1356 EXECUTION=${MPIRUN_COMMAND:="time "} 1357 fi 1358 fi 1359 else 1360 if ( ! ${OK_PARA_MPI} ) ; then 1361 EXECUTION=${MPIRUN_COMMAND:="time "} 1362 fi 1363 fi 1364 1365 if [ ${PROCESSUS_NUMBER} -gt 1 ] ; then 1366 1367 # Hosts treatment 1368 1369 ${EXECUTION} hostname | sort | uniq > hosts.tmp 1370 1371 i=0 1372 rm -f hosts 1373 IGCM_debug_Print 1 "sys Titane, Hosts avaible :" 1374 for nodes in `cat hosts.tmp` 1375 do 1376 host[$i]=$nodes 1377 echo "${host[$i]} slots=8 max_slots=8" >> hosts 1378 IGCM_debug_Print 1 ${host[$i]} 1379 i=$((i+1)) 1380 done 1381 rm -f hosts.tmp 1382 1383 listnodes=${host[*]} 1384 1385 EXECUTION="${EXECUTION} -hostfile hosts" 1386 fi 1387 1388 # Initialisation 1389 1390 init_node=y 1391 node_num_current=0 1392 start_num=0 1393 init_exec=n 1394 1395 1396 # Test : if oasis is there, we put it at the first position 1397 1398 for comp in ${config_ListOfComponents[*]} ; do 1399 1400 if [ "X${comp}" = "XCPL" ] ; then 1401 1402 eval ExeNameIn=\${config_Executable_${comp}[0]} 1403 eval ExeNameOut=\${config_Executable_${comp}[1]} 1404 1405 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1406 echo "" >> script_${ExeNameOut}.ksh 1407 echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1408 echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh 1409 echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh 1410 echo "./${ExeNameOut} > out_${ExeNameOut}.out 2>out_${ExeNameOut}.err" >> script_${ExeNameOut}.ksh 1411 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1412 1413 init_node=n 1414 1415 (( nombre_restant_node = NUM_COREPERNODE - 1 )) 1416 node_num_current=0 1417 node_current=${host[${node_num_current}]} 1418 1419 EXECUTION="${EXECUTION} -H ${node_current} -np 1 ./script_${ExeNameOut}.ksh" 1420 1421 init_exec=y 1422 start_num=1 1423 1424 fi 1425 1426 done 1427 1428 # Then loop on the components (except for oasis) 1429 1430 for comp in ${config_ListOfComponents[*]} ; do 1431 1432 eval ExeNameIn=\${config_Executable_${comp}[0]} 1433 eval ExeNameOut=\${config_Executable_${comp}[1]} 1434 1435 1436 # Only if we really have an executable for the component : 1437 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 1438 1439 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1440 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1441 1442 if ( ${OK_PARA_MPI} ) ; then 1443 1444 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1445 # echo "set -vx" >> script_${ExeNameOut}.ksh 1446 echo "" >> script_${ExeNameOut}.ksh 1447 echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1448 echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh 1449 echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh 1450 echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1451 echo "(( MYMPIRANK = OMPI_COMM_WORLD_RANK - ${start_num})) " >> script_${ExeNameOut}.ksh 1452 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${MYMPIRANK} 2>out_${ExeNameOut}.err.\${MYMPIRANK}" >> script_${ExeNameOut}.ksh 1453 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1454 1455 node_num=0 1456 1457 # We define the number of MPI process to be assigned for the component 1458 1459 nombre_restant_comp=${comp_proc_mpi_loc} 1460 1461 # Loop on the allocated nodes 1462 1463 for node in ${listnodes} ; do 1464 1465 # We go to the current node 1466 1467 if [ ${node_num} = ${node_num_current} ] ; then 1468 1469 node_current=${host[${node_num_current}]} 1470 1471 # If first time on the node : initialisation 1472 1473 if [ ${init_node} = y ] ; then 1474 nombre_restant_node=${NUM_COREPERNODE} 1475 fi 1476 1477 # Test on the number of OMP threads 1478 1479 if [ ${comp_proc_omp_loc} -gt ${nombre_restant_node} ] ; then 1480 (( node_num = node_num + 1 )) 1481 node_num_current=${node_num} 1482 init_node=y 1483 continue 1484 fi 1485 1486 # Number of MPI process to assign 1487 1488 (( num_corempi = nombre_restant_node / comp_proc_omp_loc )) 1489 1490 if [ ${num_corempi} -gt ${nombre_restant_comp} ] ; then 1491 num_corempi=${nombre_restant_comp} 1492 fi 1493 1494 (( nombre_restant_node = nombre_restant_node - num_corempi * comp_proc_omp_loc )) 1495 (( nombre_restant_comp = nombre_restant_comp - num_corempi )) 1496 1497 if [ ${init_exec} = y ] ; then 1498 EXECUTION="${EXECUTION} : -H ${node_current} -np ${num_corempi} ./script_${ExeNameOut}.ksh" 1499 else 1500 EXECUTION="${EXECUTION} -H ${node_current} -np ${num_corempi} ./script_${ExeNameOut}.ksh" 1501 init_exec=y 1502 fi 1503 1504 (( start_num = num_corempi + start_num )) 1505 1506 else 1507 1508 (( node_num = node_num + 1 )) 1509 continue 1510 fi 1511 1512 # Test on the number of core/process remaining on the node/component 1513 1514 if [ ${nombre_restant_node} = 0 ] ; then 1515 (( node_num = node_num + 1 )) 1516 node_num_current=${node_num} 1517 init_node=y 1518 1519 if [ ${nombre_restant_comp} = 0 ] ; then 1520 break 1 1521 fi 1522 else 1523 1524 node_num_current=${node_num} 1525 init_node=n 1526 1527 if [ ${nombre_restant_comp} = 0 ] ; then 1528 break 1 1529 fi 1530 fi 1531 done 1532 else 1533 EXECUTION="${EXECUTION} ./${ExeNameOut}" 1534 fi 1535 fi 1536 done 1537 1538 IGCM_debug_Print 1 "sys Titane : La commande d execution est " 1539 IGCM_debug_Print 1 $EXECUTION 1540 1541 IGCM_debug_PopStack "IGCM_sys_build_execution_scripts" 1542 } 1543 1544 1309 1545 1310 1546 ##############################################################
Note: See TracChangeset
for help on using the changeset viewer.