- Timestamp:
- 02/20/24 14:44:49 (3 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/libIGCM_concurrent/libIGCM_sys/libIGCM_sys_irene.ksh
r1624 r1628 1181 1181 typeset file 1182 1182 file=$1 1183 1184 if [ ${executionType} -eq 1 ] ; then 1185 # MPMD + MPI 1186 sed -e "/::openMPthreads::/d" \1187 -e "s/::JobNumProcTot::/${ coreNumber}/"\1188 -e "/#MSUB -x/d"\1183 # MPMD + MPI + OMP : mpirun/ccc_mprun/error 1184 (( nodeNumber = coreNumber / NB_CORE_PER_NODE )) 1185 [ $(( ${coreNumber} % ${NB_CORE_PER_NODE} )) -ne 0 ] && (( nodeNumber = nodeNumber + 1 )) 1186 sed -e "/::openMPthreads::/d" \ 1187 -e "s/::JobNumProcTot::/${mpiTasks}/" \ 1188 -e "s/::NodeNumber::/${nodeNumber}/" \ 1189 1189 -e "/--cpu_bind=none/d" \ 1190 1190 ${file} > ${file}.tmp 1191 1192 elif [ ${executionType} -eq 2 ] ; then1193 # MPMD + MPI + OMP : mpirun/ccc_mprun/error1194 if ( [ "X${config_UserChoices_ExecutionModeOnCurie}" = "Xmpirun" ] ) ; then1195 sed -e "/::openMPthreads::/d" \1196 -e "s/::JobNumProcTot::/${coreNumber}/" \1197 ${file} > ${file}.tmp1198 elif ( [ "X${config_UserChoices_ExecutionModeOnCurie}" = "X" ] || [ "X${config_UserChoices_ExecutionModeOnCurie}" = "Xccc_mprun" ] ) ; then1199 sed -e "/::openMPthreads::/d" \1200 -e "s/::JobNumProcTot::/${coreNumber}/" \1201 -e "/--cpu_bind=none/d" \1202 ${file} > ${file}.tmp1203 else1204 IGCM_debug_Print 1 "You have to set ExecutionModeOnCurie=ccc_mprun or mpirun in config.card"1205 IGCM_debug_Exit "IGCM_sys_updateHeaders"1206 fi1207 elif [ ${executionType} -eq 3 ] ; then1208 # SPMD + MPI/OMP1209 sed -e "s/::openMPthreads::/${openMPthreads}/" \1210 -e "s/::JobNumProcTot::/${mpiTasks}/" \1211 -e "/#MSUB -x/d" \1212 -e "/--cpu_bind=none/d" \1213 ${file} > ${file}.tmp1214 1215 elif [ ${executionType} -eq 4 ] ; then1216 # SPMD + MPI only1217 sed -e "s/::JobNumProcTot::/${mpiTasks}/" \1218 -e "/::openMPthreads::/d" \1219 -e "/#MSUB -x/d" \1220 -e "/--cpu_bind=none/d" \1221 ${file} > ${file}.tmp1222 1223 elif [ ${executionType} -eq 5 ] ; then1224 # SPMD + OMP only1225 sed -e "s/::openMPthreads::/${openMPthreads}/" \1226 -e "/::JobNumProcTot::/d" \1227 -e "/#MSUB -x/d" \1228 -e "/--cpu_bind=none/d" \1229 ${file} > ${file}.tmp1230 1231 elif [ ${executionType} -eq 6 ] ; then1232 # SEQUENTIAL THEN1233 sed -e "s/::JobNumProcTot::/1/" \1234 -e "/::openMPthreads::/d" \1235 -e "/#MSUB -x/d" \1236 -e "/--cpu_bind=none/d" \1237 ${file} > ${file}.tmp1238 1239 fi1240 1241 1191 IGCM_sys_Mv ${file}.tmp ${file} 1242 1192 … … 1270 1220 echo "IGCM_sys_build_execution_scripts " $@ 1271 1221 fi 1222 if ( [ "X${config_UserChoices_ExecutionMode}" = "Xslurm" ] ) ; then 1223 1224 EXECUTION="/usr/bin/time srun " 1225 1226 if ( ${OK_PARA_MPMD} ) ; then 1227 1228 # MPMD mode 1229 # 1 MPI only : executionType=1 1230 # 2 MPI/OpenMP : executionType=2 1231 1232 if [ -f run_file ] ; then 1233 IGCM_sys_Rm -f run_file 1234 fi 1235 if [ -f RUNDIR_2/run_file ] ; then 1236 IGCM_sys_Rm -f RUNDIR_2/run_file 1237 fi 1238 touch run_file 1239 1240 1241 1242 # 2 MPI/OpenMP : executionType=2 1243 1244 # MPI-OpenMP (MPMD) 1245 # export SLURM_HOSTFILE=./hostlist 1246 # srun --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file 1247 # example of hostlist file : 1248 # r3i3n33 1249 # r3i3n33 1250 # ... 1251 # example of run_file : 1252 # 0-70 ./prog_lmdz.x.sh %o %t 1253 # 71-430 ./prog_opa.xx.sh %o %t 1254 # 431-431 ./prog_xios.x.sh %o %t 1255 # examples of prog_file : 1256 # prog_lmdz.x.sh : 1257 # (( init = 0 + $1 )) 1258 # (( index = init * 10 )) 1259 # (( slot = index % 40 )) 1260 # taskset -c $slot-$((slot + 10 - 1)) ./script_lmdz.x.ksh 1261 # that will become 1262 # taskset -c 0-9 ./script_lmdz.x.ksh 1263 # ... 1264 # with script_lmdz.x.ksh 1265 # export OMP_STACKSIZE=3g 1266 # export OMP_PLACES=cores 1267 # export OMP_NUM_THREADS=10 1268 # ./lmdz.x > out_lmdz.x.out.${SLURM_PROCID} 2>out_lmdz.x.err.${SLURM_PROCID} 1269 1270 # Hosts treatment 1271 _bkIFS=$IFS; 1272 IFS=$'\n'; set -f 1273 listnodes=($(< <( scontrol show hostnames $SLURM_JOB_NODELIST ))) 1274 IFS=$_bkIFS; set +f 1275 rm -f hostlist 1276 1277 # Loop on the components to build run_file and script_exec files 1278 rank=0 1279 current_core=0 1280 current_core_mpi=0 1281 current_core_tmp=0 1282 current_core_mpi_tmp=0 1283 first_slurm_comp=0 1284 1285 for comp in ${config_ListOfComponents[*]} ; do 1286 1287 number_rundir=$(echo ${comp} | sed 's/[^0-9]*//g') 1288 if [ X${number_rundir} != X ] ; then 1289 [ ! -d RUNDIR_${number_rundir} ] && mkdir RUNDIR_${number_rundir} 1290 cd RUNDIR_${number_rundir} 1291 if [ ${first_slurm_comp} = "0" ] ; then 1292 (( NbMPItasks_run1 = current_core_mpi_tmp )) 1293 ### On change de noeud pour le prochain srun 1294 if [ $(( $current_core % $NB_CORE_PER_NODE )) -ne 0 ] ; then 1295 (( current_core = current_core + NB_CORE_PER_NODE - current_core % NB_CORE_PER_NODE )) 1296 fi 1297 current_core_tmp=0 ; current_core_mpi_tmp=0 ; first_slurm_comp=1 ; 1298 fi 1299 fi 1300 1301 eval ExeNameIn=\${config_Executable_${comp}[0]} 1302 eval ExeNameOut=\${config_Executable_${comp}[1]} 1303 1304 # Not possible if oasis has an executable (i.e old version of oasis3) 1305 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" = "XCPL" ] ) ; then 1306 IGCM_debug_Exit "ERROR MPMD with hybrid MPI-OpenMP is not available with oasis3 version" 1307 IGCM_debug_Print 2 "Only available with oasis3-MCT version coupler" 1308 IGCM_debug_Verif_Exit 1309 fi 1310 1311 # Only if we really have an executable for the component : 1312 if [ "X${ExeNameOut}" != X\"\" ] ; then 1313 1314 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1315 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1316 eval comp_proc_nod_loc=\${${comp}_PROC_NOD} 1317 1318 1319 # Build script files 1320 1321 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1322 echo "" >> script_${ExeNameOut}.ksh 1323 if [ ${comp_proc_omp_loc} -gt 1 ] ; then 1324 echo "export OMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1325 echo "export OMP_PLACES=cores" >> script_${ExeNameOut}.ksh 1326 echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1327 fi 1328 1329 # to have out/err per process on different files 1330 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${SLURM_PROCID} 2>out_${ExeNameOut}.err.\${SLURM_PROCID}" >> script_${ExeNameOut}.ksh 1331 1332 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1333 1334 # Build run_file 1335 # Only if the component has an executable 1336 if ( [ "X${ExeNameOut}" != X\"\" ] ) ; then 1337 1338 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1339 (( end_core = ${current_core_mpi_tmp} + ${comp_proc_mpi_loc} - 1 )) 1340 echo "${current_core_mpi_tmp}-${end_core} ./prog_${ExeNameOut}.sh %o %t" >> run_file 1341 (( current_core_mpi_tmp = ${end_core} + 1 )) 1342 fi 1343 1344 if [ ${comp_proc_nod_loc} -gt 1 ] ; then 1345 (( offset_comp_proc_loc = NB_CORE_PER_NODE / (comp_proc_mpi_loc / comp_proc_nod_loc) )) 1346 else 1347 (( offset_comp_proc_loc = comp_proc_omp_loc )) 1348 fi 1349 1350 # Build configuration file 1351 1352 echo "#!/bin/sh" > prog_${ExeNameOut}.sh 1353 echo "(( init = $current_core_tmp + \$1 ))" >> prog_${ExeNameOut}.sh 1354 echo "(( index = init * $comp_proc_omp_loc ))" >> prog_${ExeNameOut}.sh 1355 echo "(( slot = index % 40 ))" >> prog_${ExeNameOut}.sh 1356 echo "echo ${ExeNameOut} taskset -c \$slot"-"\$((slot + $comp_proc_omp_loc - 1))" >> prog_${ExeNameOut}.sh 1357 echo "taskset -c \$slot"-"\$((slot + $comp_proc_omp_loc - 1)) ./script_${ExeNameOut}.ksh" >> prog_${ExeNameOut}.sh 1358 1359 IGCM_sys_Chmod u+x prog_${ExeNameOut}.sh 1360 1361 # Build hostlist file 1362 1363 for nb_proc_mpi in `seq 0 $(($comp_proc_mpi_loc-1))`; do 1364 (( index_host = current_core / NB_CORE_PER_NODE )) 1365 host_value=${listnodes[${index_host}]} 1366 echo "$host_value" >> hostlist 1367 if [ ${DRYRUN_DEBUG} = 4 ] ; then 1368 echo "node_${index_host}_X" >> hostlist_template 1369 fi 1370 (( current_core = current_core + offset_comp_proc_loc )) 1371 (( current_core_tmp = current_core_tmp + offset_comp_proc_loc )) 1372 done 1373 fi 1374 if [ X${number_rundir} != X ] ; then 1375 cd $RUN_DIR ; 1376 fi 1377 done 1378 1379 ## variable added to stop after 60s instead of 600s by default. 1380 ## This is used when no error comes from executables and when something stopped an executable without notice. 1381 export SLURM_WAIT=60 1382 1383 if [ X${number_rundir} != X ] ; then 1384 echo "cd $RUN_DIR ; export SLURM_HOSTFILE=./hostlist ; /usr/bin/time srun --ntasks=${NbMPItasks_run1} --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file > out_execution 2>&1 &" > EXECUTION.exe 1385 echo "cd RUNDIR_2 ; export SLURM_HOSTFILE=./hostlist ; /usr/bin/time srun --ntasks=${current_core_mpi_tmp} --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file > out_execution 2>&1 &" >> EXECUTION.exe 1386 echo "wait" >> EXECUTION.exe 1387 chmod u+x EXECUTION.exe 1388 else 1389 EXECUTION="/usr/bin/time srun --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file" 1390 fi 1391 IGCM_sys_Chmod u+x run_file 1392 if ( $DEBUG_sys ) ; then 1393 echo "run_file contains : " 1394 cat run_file 1395 fi 1396 1397 # fi # if ${OK_PARA_MPMD} 1398 1399 else 1400 # Only one executable (SPMD mode): executionType=3, 4, 5 and 6 1401 1402 for comp in ${config_ListOfComponents[*]} ; do 1403 1404 # Only if we really have an executable for the component : 1405 eval ExeNameOut=\${config_Executable_${comp}[1]} 1406 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${ExeNameOut}" != "Xinca.dat" ] ) ; then 1407 1408 # Build script files 1409 1410 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1411 echo "" >> script_${ExeNameOut}.ksh 1412 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1413 1414 if ( ${OK_PARA_OMP} ) ; then 1415 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1416 # Check if the number of threads is correct 1417 case ${comp_proc_omp_loc} in 1418 2|4|5|10|20) 1419 IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" 1420 ;; 1421 *) 1422 IGCM_debug_Exit "ERROR with OMP parameters !" 1423 IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" 1424 IGCM_debug_Print 2 "Only 2,4,5,10,20 as number of OMP threads are possible " 1425 IGCM_debug_Verif_Exit 1426 ;; 1427 esac 1428 echo "" >> script_${ExeNameOut}.ksh 1429 echo "export OMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1430 echo "export OMP_PLACES=cores" >> script_${ExeNameOut}.ksh 1431 echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1432 fi 1433 1434 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1435 1436 # To have out/err per process on different files 1437 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${SLURM_PROCID} 2>out_${ExeNameOut}.err.\${SLURM_PROCID}" >> script_${ExeNameOut}.ksh 1438 EXECUTION="/usr/bin/time srun ./script_${ExeNameOut}.ksh" 1439 1440 IGCM_debug_Print 1 "sys Jean-Zay : script_${ExeNameOut}.ksh contains" 1441 cat script_${ExeNameOut}.ksh 1442 1443 fi 1444 1445 done 1446 1447 fi # ${OK_PARA_MPMD} 1448 1449 else 1272 1450 1273 1451 EXECUTION=${HOST_MPIRUN_COMMAND} … … 1590 1768 fi # ${OK_PARA_MPMD} 1591 1769 1770 fi 1592 1771 IGCM_debug_Print 1 "sys Irene : execution command is " 1593 1772 IGCM_debug_Print 1 "$EXECUTION"
Note: See TracChangeset
for help on using the changeset viewer.