Changeset 1628
- Timestamp:
- 02/20/24 14:44:49 (3 months ago)
- Location:
- branches/libIGCM_concurrent
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/libIGCM_concurrent/AA_job
r1619 r1628 35 35 #-Q- irene #MSUB -c ::openMPthreads:: # Number of openMP threads. To specify only for SMPD 36 36 #-Q- irene #MSUB -x # exclusive node. To specify only for MPMD together with the one below 37 #-Q- irene #MSUB -N ::NodeNumber:: 37 38 #-Q- irene #MSUB -E '--cpu_bind=none' 38 39 #-Q- irene #MSUB -T ::WallTime:: # Wall clock limit (seconds) … … 547 548 typeset RET 548 549 RUN_DATE_BEGIN=$( date '+%Y-%m-%dT%H:%M:%S' ) 550 if [ -f EXECUTION.exe ] ; then 551 cat EXECUTION.exe 552 ./EXECUTION.exe 553 IGCM_sys_Cd ${RUN_DIR} 554 else 549 555 ${EXECUTION} >> ${Exe_Output} 2>&1 556 fi 550 557 RET=$? 551 558 RUN_DATE_END=$( date '+%Y-%m-%dT%H:%M:%S' ) … … 583 590 echo "#######################################" 584 591 echo 592 if [ -f EXECUTION.exe ] ; then 593 echo "Main DIR" 585 594 ls -lrt 595 echo "2nd RUNDIR" 596 ls -lrt RUNDIR_2 597 else 598 ls -lrt 599 fi 586 600 587 601 #D- -
branches/libIGCM_concurrent/libIGCM_comp/libIGCM_comp.ksh
r1624 r1628 316 316 typeset file_in_ file_in file_out_ file_out do_init 317 317 for comp in ${config_ListOfComponents[*]} ; do 318 number_rundir=$(echo ${comp} | sed 's/[^0-9]*//g') 319 if [ X${number_rundir} != X ] ; then 320 [ ! -d RUNDIR_${number_rundir} ] && mkdir RUNDIR_${number_rundir} 321 cd RUNDIR_${number_rundir} ; 322 fi 318 323 # Initialize 319 324 do_init="y" … … 366 371 done 367 372 fi 373 fi 374 if [ X${number_rundir} != X ] ; then 375 cd $RUN_DIR 368 376 fi 369 377 done … … 440 448 441 449 for comp in ${config_ListOfComponents[*]} ; do 450 number_rundir=$(echo ${comp} | sed 's/[^0-9]*//g') 451 if [ X${number_rundir} != X ] ; then 452 [ ! -d RUNDIR_${number_rundir} ] && mkdir RUNDIR_${number_rundir} 453 cd RUNDIR_${number_rundir} ; 454 fi 442 455 # Define component 443 456 eval compname=\${config_ListOfComponents_${comp}[0]} > /dev/null 2>&1 … … 490 503 done 491 504 fi 505 if [ X${number_rundir} != X ] ; then 506 cd $RUN_DIR 507 fi 492 508 done 493 509 … … 513 529 514 530 for comp in ${config_ListOfComponents[*]} ; do 531 number_rundir=$(echo ${comp} | sed 's/[^0-9]*//g') 532 if [ X${number_rundir} != X ] ; then 533 [ ! -d RUNDIR_${number_rundir} ] && mkdir RUNDIR_${number_rundir} 534 cd RUNDIR_${number_rundir} ; 535 fi 515 536 516 537 # Define component … … 586 607 fi 587 608 fi 609 if [ X${number_rundir} != X ] ; then 610 cd $RUN_DIR 611 fi 588 612 done 589 613 … … 619 643 typeset comp compname comptagname card ListFilesName FileName0 NbFiles i i_ file_in file_out 620 644 for comp in ${config_ListOfComponents[*]} ; do 645 number_rundir=$(echo ${comp} | sed 's/[^0-9]*//g') 646 if [ X${number_rundir} != X ] ; then 647 [ ! -d RUNDIR_${number_rundir} ] && mkdir RUNDIR_${number_rundir} 648 cd RUNDIR_${number_rundir} ; 649 fi 621 650 # Define component 622 651 eval compname=\${config_ListOfComponents_${comp}[0]} > /dev/null 2>&1 … … 654 683 done 655 684 fi 685 if [ X${number_rundir} != X ] ; then 686 cd $RUN_DIR 687 fi 656 688 657 689 done … … 680 712 681 713 for comp in ${config_ListOfComponents[*]} ; do 714 number_rundir=$(echo ${comp} | sed 's/[^0-9]*//g') 715 if [ X${number_rundir} != X ] ; then 716 [ ! -d RUNDIR_${number_rundir} ] && mkdir RUNDIR_${number_rundir} 717 cd RUNDIR_${number_rundir} ; 718 fi 719 682 720 # Define component 683 721 eval compname=\${config_ListOfComponents_${comp}[0]} > /dev/null 2>&1 … … 1047 1085 fi 1048 1086 fi 1087 if [ X${number_rundir} != X ] ; then 1088 cd $RUN_DIR 1089 fi 1049 1090 done 1050 1091 … … 1074 1115 byPass=false 1075 1116 for comp in ${config_ListOfComponents[*]} ; do 1117 number_rundir=$(echo ${comp} | sed 's/[^0-9]*//g') 1118 if [ X${number_rundir} != X ] ; then 1119 [ ! -d RUNDIR_${number_rundir} ] && mkdir RUNDIR_${number_rundir} 1120 cd RUNDIR_${number_rundir} ; 1121 fi 1122 1076 1123 # Define component 1077 1124 … … 1111 1158 IGCM_debug_Print 1 "Compiler is ${compilerVersion}" 1112 1159 fi 1160 if [ X${number_rundir} != X ] ; then 1161 cd $RUN_DIR 1162 fi 1163 1113 1164 done 1114 1165 … … 1606 1657 typeset comp compname comptagname 1607 1658 for comp in ${config_ListOfComponents[*]} ; do 1659 number_rundir=$(echo ${comp} | sed 's/[^0-9]*//g') 1660 if [ X${number_rundir} != X ] ; then 1661 [ ! -d RUNDIR_${number_rundir} ] && mkdir RUNDIR_${number_rundir} 1662 cd RUNDIR_${number_rundir} ; 1663 fi 1608 1664 # Define component 1609 1665 eval compname=\${config_ListOfComponents_${comp}[0]} > /dev/null 2>&1 … … 1657 1713 fi 1658 1714 fi 1715 if [ X${number_rundir} != X ] ; then 1716 cd $RUN_DIR 1717 fi 1659 1718 done 1660 1719 … … 1709 1768 fi 1710 1769 fi 1711 1712 for comp in ${config_ListOfComponents[*]} ; do 1770 for comp in ${config_ListOfComponents[*]} ; do 1771 number_rundir=$(echo ${comp} | sed 's/[^0-9]*//g') 1772 if [ X${number_rundir} != X ] ; then 1773 [ ! -d RUNDIR_${number_rundir} ] && mkdir RUNDIR_${number_rundir} 1774 cd RUNDIR_${number_rundir} ; 1775 fi 1776 1713 1777 # Define component 1714 1778 eval compname=\${config_ListOfComponents_${comp}[0]} > /dev/null 2>&1 … … 2042 2106 fi 2043 2107 echo 2108 if [ X${number_rundir} != X ] ; then 2109 cd $RUN_DIR 2110 fi 2044 2111 done 2045 2112 # Append the sync call and the copy sequence to the IGCM_FlushRebuild function if needed -
branches/libIGCM_concurrent/libIGCM_config/libIGCM_config.ksh
r1609 r1628 805 805 openMPthreads=0 806 806 NbExec=0 807 first_slurm_comp=0 807 808 808 809 OK_PARA_MPI=false … … 812 813 813 814 for comp in ${config_ListOfComponents[*]} ; do 815 816 # Add a node in case of many execution in concurrent mode 817 number_rundir=$(echo ${comp} | sed 's/[^0-9]*//g') 818 if [ X${number_rundir} != X ] ; then 819 if [ ${first_slurm_comp} = "0" ] ; then 820 if [ $(( $coreNumber % $NB_CORE_PER_NODE )) -ne 0 ] ; then 821 (( coreNumber = coreNumber + NB_CORE_PER_NODE - coreNumber % NB_CORE_PER_NODE )) 822 fi 823 echo $coreNumber 824 first_slurm_comp=1 ; 825 fi 826 fi 814 827 815 828 # Manage component executable -
branches/libIGCM_concurrent/libIGCM_sys/libIGCM_sys_irene.ksh
r1624 r1628 1181 1181 typeset file 1182 1182 file=$1 1183 1184 if [ ${executionType} -eq 1 ] ; then 1185 # MPMD + MPI 1186 sed -e "/::openMPthreads::/d" \1187 -e "s/::JobNumProcTot::/${ coreNumber}/"\1188 -e "/#MSUB -x/d"\1183 # MPMD + MPI + OMP : mpirun/ccc_mprun/error 1184 (( nodeNumber = coreNumber / NB_CORE_PER_NODE )) 1185 [ $(( ${coreNumber} % ${NB_CORE_PER_NODE} )) -ne 0 ] && (( nodeNumber = nodeNumber + 1 )) 1186 sed -e "/::openMPthreads::/d" \ 1187 -e "s/::JobNumProcTot::/${mpiTasks}/" \ 1188 -e "s/::NodeNumber::/${nodeNumber}/" \ 1189 1189 -e "/--cpu_bind=none/d" \ 1190 1190 ${file} > ${file}.tmp 1191 1192 elif [ ${executionType} -eq 2 ] ; then1193 # MPMD + MPI + OMP : mpirun/ccc_mprun/error1194 if ( [ "X${config_UserChoices_ExecutionModeOnCurie}" = "Xmpirun" ] ) ; then1195 sed -e "/::openMPthreads::/d" \1196 -e "s/::JobNumProcTot::/${coreNumber}/" \1197 ${file} > ${file}.tmp1198 elif ( [ "X${config_UserChoices_ExecutionModeOnCurie}" = "X" ] || [ "X${config_UserChoices_ExecutionModeOnCurie}" = "Xccc_mprun" ] ) ; then1199 sed -e "/::openMPthreads::/d" \1200 -e "s/::JobNumProcTot::/${coreNumber}/" \1201 -e "/--cpu_bind=none/d" \1202 ${file} > ${file}.tmp1203 else1204 IGCM_debug_Print 1 "You have to set ExecutionModeOnCurie=ccc_mprun or mpirun in config.card"1205 IGCM_debug_Exit "IGCM_sys_updateHeaders"1206 fi1207 elif [ ${executionType} -eq 3 ] ; then1208 # SPMD + MPI/OMP1209 sed -e "s/::openMPthreads::/${openMPthreads}/" \1210 -e "s/::JobNumProcTot::/${mpiTasks}/" \1211 -e "/#MSUB -x/d" \1212 -e "/--cpu_bind=none/d" \1213 ${file} > ${file}.tmp1214 1215 elif [ ${executionType} -eq 4 ] ; then1216 # SPMD + MPI only1217 sed -e "s/::JobNumProcTot::/${mpiTasks}/" \1218 -e "/::openMPthreads::/d" \1219 -e "/#MSUB -x/d" \1220 -e "/--cpu_bind=none/d" \1221 ${file} > ${file}.tmp1222 1223 elif [ ${executionType} -eq 5 ] ; then1224 # SPMD + OMP only1225 sed -e "s/::openMPthreads::/${openMPthreads}/" \1226 -e "/::JobNumProcTot::/d" \1227 -e "/#MSUB -x/d" \1228 -e "/--cpu_bind=none/d" \1229 ${file} > ${file}.tmp1230 1231 elif [ ${executionType} -eq 6 ] ; then1232 # SEQUENTIAL THEN1233 sed -e "s/::JobNumProcTot::/1/" \1234 -e "/::openMPthreads::/d" \1235 -e "/#MSUB -x/d" \1236 -e "/--cpu_bind=none/d" \1237 ${file} > ${file}.tmp1238 1239 fi1240 1241 1191 IGCM_sys_Mv ${file}.tmp ${file} 1242 1192 … … 1270 1220 echo "IGCM_sys_build_execution_scripts " $@ 1271 1221 fi 1222 if ( [ "X${config_UserChoices_ExecutionMode}" = "Xslurm" ] ) ; then 1223 1224 EXECUTION="/usr/bin/time srun " 1225 1226 if ( ${OK_PARA_MPMD} ) ; then 1227 1228 # MPMD mode 1229 # 1 MPI only : executionType=1 1230 # 2 MPI/OpenMP : executionType=2 1231 1232 if [ -f run_file ] ; then 1233 IGCM_sys_Rm -f run_file 1234 fi 1235 if [ -f RUNDIR_2/run_file ] ; then 1236 IGCM_sys_Rm -f RUNDIR_2/run_file 1237 fi 1238 touch run_file 1239 1240 1241 1242 # 2 MPI/OpenMP : executionType=2 1243 1244 # MPI-OpenMP (MPMD) 1245 # export SLURM_HOSTFILE=./hostlist 1246 # srun --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file 1247 # example of hostlist file : 1248 # r3i3n33 1249 # r3i3n33 1250 # ... 1251 # example of run_file : 1252 # 0-70 ./prog_lmdz.x.sh %o %t 1253 # 71-430 ./prog_opa.xx.sh %o %t 1254 # 431-431 ./prog_xios.x.sh %o %t 1255 # examples of prog_file : 1256 # prog_lmdz.x.sh : 1257 # (( init = 0 + $1 )) 1258 # (( index = init * 10 )) 1259 # (( slot = index % 40 )) 1260 # taskset -c $slot-$((slot + 10 - 1)) ./script_lmdz.x.ksh 1261 # that will become 1262 # taskset -c 0-9 ./script_lmdz.x.ksh 1263 # ... 1264 # with script_lmdz.x.ksh 1265 # export OMP_STACKSIZE=3g 1266 # export OMP_PLACES=cores 1267 # export OMP_NUM_THREADS=10 1268 # ./lmdz.x > out_lmdz.x.out.${SLURM_PROCID} 2>out_lmdz.x.err.${SLURM_PROCID} 1269 1270 # Hosts treatment 1271 _bkIFS=$IFS; 1272 IFS=$'\n'; set -f 1273 listnodes=($(< <( scontrol show hostnames $SLURM_JOB_NODELIST ))) 1274 IFS=$_bkIFS; set +f 1275 rm -f hostlist 1276 1277 # Loop on the components to build run_file and script_exec files 1278 rank=0 1279 current_core=0 1280 current_core_mpi=0 1281 current_core_tmp=0 1282 current_core_mpi_tmp=0 1283 first_slurm_comp=0 1284 1285 for comp in ${config_ListOfComponents[*]} ; do 1286 1287 number_rundir=$(echo ${comp} | sed 's/[^0-9]*//g') 1288 if [ X${number_rundir} != X ] ; then 1289 [ ! -d RUNDIR_${number_rundir} ] && mkdir RUNDIR_${number_rundir} 1290 cd RUNDIR_${number_rundir} 1291 if [ ${first_slurm_comp} = "0" ] ; then 1292 (( NbMPItasks_run1 = current_core_mpi_tmp )) 1293 ### On change de noeud pour le prochain srun 1294 if [ $(( $current_core % $NB_CORE_PER_NODE )) -ne 0 ] ; then 1295 (( current_core = current_core + NB_CORE_PER_NODE - current_core % NB_CORE_PER_NODE )) 1296 fi 1297 current_core_tmp=0 ; current_core_mpi_tmp=0 ; first_slurm_comp=1 ; 1298 fi 1299 fi 1300 1301 eval ExeNameIn=\${config_Executable_${comp}[0]} 1302 eval ExeNameOut=\${config_Executable_${comp}[1]} 1303 1304 # Not possible if oasis has an executable (i.e old version of oasis3) 1305 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" = "XCPL" ] ) ; then 1306 IGCM_debug_Exit "ERROR MPMD with hybrid MPI-OpenMP is not available with oasis3 version" 1307 IGCM_debug_Print 2 "Only available with oasis3-MCT version coupler" 1308 IGCM_debug_Verif_Exit 1309 fi 1310 1311 # Only if we really have an executable for the component : 1312 if [ "X${ExeNameOut}" != X\"\" ] ; then 1313 1314 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1315 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1316 eval comp_proc_nod_loc=\${${comp}_PROC_NOD} 1317 1318 1319 # Build script files 1320 1321 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1322 echo "" >> script_${ExeNameOut}.ksh 1323 if [ ${comp_proc_omp_loc} -gt 1 ] ; then 1324 echo "export OMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1325 echo "export OMP_PLACES=cores" >> script_${ExeNameOut}.ksh 1326 echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1327 fi 1328 1329 # to have out/err per process on different files 1330 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${SLURM_PROCID} 2>out_${ExeNameOut}.err.\${SLURM_PROCID}" >> script_${ExeNameOut}.ksh 1331 1332 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1333 1334 # Build run_file 1335 # Only if the component has an executable 1336 if ( [ "X${ExeNameOut}" != X\"\" ] ) ; then 1337 1338 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1339 (( end_core = ${current_core_mpi_tmp} + ${comp_proc_mpi_loc} - 1 )) 1340 echo "${current_core_mpi_tmp}-${end_core} ./prog_${ExeNameOut}.sh %o %t" >> run_file 1341 (( current_core_mpi_tmp = ${end_core} + 1 )) 1342 fi 1343 1344 if [ ${comp_proc_nod_loc} -gt 1 ] ; then 1345 (( offset_comp_proc_loc = NB_CORE_PER_NODE / (comp_proc_mpi_loc / comp_proc_nod_loc) )) 1346 else 1347 (( offset_comp_proc_loc = comp_proc_omp_loc )) 1348 fi 1349 1350 # Build configuration file 1351 1352 echo "#!/bin/sh" > prog_${ExeNameOut}.sh 1353 echo "(( init = $current_core_tmp + \$1 ))" >> prog_${ExeNameOut}.sh 1354 echo "(( index = init * $comp_proc_omp_loc ))" >> prog_${ExeNameOut}.sh 1355 echo "(( slot = index % 40 ))" >> prog_${ExeNameOut}.sh 1356 echo "echo ${ExeNameOut} taskset -c \$slot"-"\$((slot + $comp_proc_omp_loc - 1))" >> prog_${ExeNameOut}.sh 1357 echo "taskset -c \$slot"-"\$((slot + $comp_proc_omp_loc - 1)) ./script_${ExeNameOut}.ksh" >> prog_${ExeNameOut}.sh 1358 1359 IGCM_sys_Chmod u+x prog_${ExeNameOut}.sh 1360 1361 # Build hostlist file 1362 1363 for nb_proc_mpi in `seq 0 $(($comp_proc_mpi_loc-1))`; do 1364 (( index_host = current_core / NB_CORE_PER_NODE )) 1365 host_value=${listnodes[${index_host}]} 1366 echo "$host_value" >> hostlist 1367 if [ ${DRYRUN_DEBUG} = 4 ] ; then 1368 echo "node_${index_host}_X" >> hostlist_template 1369 fi 1370 (( current_core = current_core + offset_comp_proc_loc )) 1371 (( current_core_tmp = current_core_tmp + offset_comp_proc_loc )) 1372 done 1373 fi 1374 if [ X${number_rundir} != X ] ; then 1375 cd $RUN_DIR ; 1376 fi 1377 done 1378 1379 ## variable added to stop after 60s instead of 600s by default. 1380 ## This is used when no error comes from executables and when something stopped an executable without notice. 1381 export SLURM_WAIT=60 1382 1383 if [ X${number_rundir} != X ] ; then 1384 echo "cd $RUN_DIR ; export SLURM_HOSTFILE=./hostlist ; /usr/bin/time srun --ntasks=${NbMPItasks_run1} --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file > out_execution 2>&1 &" > EXECUTION.exe 1385 echo "cd RUNDIR_2 ; export SLURM_HOSTFILE=./hostlist ; /usr/bin/time srun --ntasks=${current_core_mpi_tmp} --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file > out_execution 2>&1 &" >> EXECUTION.exe 1386 echo "wait" >> EXECUTION.exe 1387 chmod u+x EXECUTION.exe 1388 else 1389 EXECUTION="/usr/bin/time srun --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file" 1390 fi 1391 IGCM_sys_Chmod u+x run_file 1392 if ( $DEBUG_sys ) ; then 1393 echo "run_file contains : " 1394 cat run_file 1395 fi 1396 1397 # fi # if ${OK_PARA_MPMD} 1398 1399 else 1400 # Only one executable (SPMD mode): executionType=3, 4, 5 and 6 1401 1402 for comp in ${config_ListOfComponents[*]} ; do 1403 1404 # Only if we really have an executable for the component : 1405 eval ExeNameOut=\${config_Executable_${comp}[1]} 1406 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${ExeNameOut}" != "Xinca.dat" ] ) ; then 1407 1408 # Build script files 1409 1410 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1411 echo "" >> script_${ExeNameOut}.ksh 1412 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1413 1414 if ( ${OK_PARA_OMP} ) ; then 1415 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1416 # Check if the number of threads is correct 1417 case ${comp_proc_omp_loc} in 1418 2|4|5|10|20) 1419 IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" 1420 ;; 1421 *) 1422 IGCM_debug_Exit "ERROR with OMP parameters !" 1423 IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" 1424 IGCM_debug_Print 2 "Only 2,4,5,10,20 as number of OMP threads are possible " 1425 IGCM_debug_Verif_Exit 1426 ;; 1427 esac 1428 echo "" >> script_${ExeNameOut}.ksh 1429 echo "export OMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1430 echo "export OMP_PLACES=cores" >> script_${ExeNameOut}.ksh 1431 echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1432 fi 1433 1434 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1435 1436 # To have out/err per process on different files 1437 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${SLURM_PROCID} 2>out_${ExeNameOut}.err.\${SLURM_PROCID}" >> script_${ExeNameOut}.ksh 1438 EXECUTION="/usr/bin/time srun ./script_${ExeNameOut}.ksh" 1439 1440 IGCM_debug_Print 1 "sys Jean-Zay : script_${ExeNameOut}.ksh contains" 1441 cat script_${ExeNameOut}.ksh 1442 1443 fi 1444 1445 done 1446 1447 fi # ${OK_PARA_MPMD} 1448 1449 else 1272 1450 1273 1451 EXECUTION=${HOST_MPIRUN_COMMAND} … … 1590 1768 fi # ${OK_PARA_MPMD} 1591 1769 1770 fi 1592 1771 IGCM_debug_Print 1 "sys Irene : execution command is " 1593 1772 IGCM_debug_Print 1 "$EXECUTION"
Note: See TracChangeset
for help on using the changeset viewer.