Changeset 1500 for trunk/libIGCM
- Timestamp:
- 11/19/19 16:35:09 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/libIGCM/libIGCM_sys/libIGCM_sys_jeanzay.ksh
r1498 r1500 1136 1136 elif [ ${executionType} -eq 2 ] ; then 1137 1137 # MPMD + MPI + OMP 1138 sed -e "s/::openMPthreads::/${openMPthreads}/" \ 1139 -e "s/::JobNumProcTot::/${mpiTasks}/" \ 1140 ${file} > ${file}.tmp 1141 1138 (( nodeNumber = coreNumber / NB_CORE_PER_NODE )) 1139 [ $(( ${coreNumber} % ${NB_CORE_PER_NODE} )) -ne 0 ] && (( nodeNumber = nodeNumber + 1 )) 1140 sed -e "/::openMPthreads::/d" \ 1141 -e "s/::JobNumProcTot::/${mpiTasks}/" \ 1142 -e "/ntasks/i\#SBATCH --nodes=${nodeNumber}"\ 1143 -e "/ntasks/i\#SBATCH --exclusive" \ 1144 ${file} > ${file}.tmp 1142 1145 elif [ ${executionType} -eq 3 ] ; then 1143 1146 # SPMD + MPI/OMP … … 1245 1248 1246 1249 # MPI-OpenMP (MPMD) 1247 # example of run_file 1248 # 0-70 env OMP_NUM_THREADS=8 ./script_lmdz.x.ksh 1249 # 71-71 env OMP_NUM_THREADS=1 ./script_xios.x.ksh 1250 1251 # Loop on the components to build run_file and script_exec files 1252 current_core=0 1250 # export SLURM_HOSTFILE=./hostlist 1251 # srun --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file 1252 # example of hostlist file : 1253 # r3i3n33 1254 # r3i3n33 1255 # ... 1256 # example of run_file : 1257 # 0 taskset -c 0-4 ./script_lmdz.x.ksh 1258 # 1 taskset -c 5-9 ./script_lmdz.x.ksh 1259 #... 1260 # 32 taskset -c 0 ./script_opa.xx.ksh 1261 # 33 taskset -c 1 ./script_opa.xx.ksh 1262 #... 1263 # 62 taskset -c 30 ./script_xios.x.ksh 1264 1265 # Hosts treatment 1266 IFS=$'\n'; set -f 1267 listnodes=($(< <( scontrol show hostnames $SLURM_JOB_NODELIST ))) 1268 unset IFS; set +f 1269 rm -f hostlist 1270 1271 # Loop on the components to build run_file and script_exec files 1272 rank=0 1273 current_core=0 1274 1253 1275 for comp in ${config_ListOfComponents[*]} ; do 1254 1276 … … 1268 1290 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1269 1291 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1292 eval comp_proc_nod_loc=\${${comp}_PROC_NOD} 1293 1270 1294 1271 1295 # Build script files … … 1274 1298 echo "" >> script_${ExeNameOut}.ksh 1275 1299 if [ ${comp_proc_omp_loc} -gt 1 ] ; then 1300 # Check if the number of threads is correct 1301 case ${comp_proc_omp_loc} in 1302 2|4|5|10|20) 1303 IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" 1304 ;; 1305 *) 1306 IGCM_debug_Exit "ERROR with OMP parameters !" 1307 IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" 1308 IGCM_debug_Print 2 "Only 2,4,5,10,20 as number of OMP threads are possible " 1309 IGCM_debug_Verif_Exit 1310 ;; 1311 esac 1276 1312 echo "export OMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1277 1313 echo "export OMP_PLACES=cores" >> script_${ExeNameOut}.ksh 1314 echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1315 1278 1316 fi 1279 1317 … … 1284 1322 1285 1323 # Complete run_file 1286 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1287 (( end_core = ${current_core} + ${comp_proc_mpi_loc} - 1 )) 1288 echo "${current_core}-${end_core} env OMP_NUM_THREADS=${comp_proc_omp_loc} ./script_${ExeNameOut}.ksh " >> run_file 1289 (( current_core = ${end_core} + 1 )) 1290 1324 if [ ${comp_proc_nod_loc} -gt 1 ] ; then 1325 (( offset_comp_proc_loc = NB_CORE_PER_NODE / (comp_proc_mpi_loc / comp_proc_nod_loc) )) 1326 else 1327 (( offset_comp_proc_loc = comp_proc_omp_loc )) 1328 fi 1329 1330 for nb_proc_mpi in `seq 0 $(($comp_proc_mpi_loc-1))`; do 1331 (( index_host = current_core / NB_CORE_PER_NODE )) 1332 host_value=${listnodes[${index_host}]} 1333 (( slot = current_core % NB_CORE_PER_NODE )) 1334 echo "$host_value" >> hostlist 1335 echo "$rank taskset -c $slot-$((slot+comp_proc_omp_loc-1)) ./script_${ExeNameOut}.ksh" >> run_file 1336 (( rank = rank + 1 )) 1337 (( current_core = current_core + offset_comp_proc_loc )) 1338 done 1291 1339 fi 1292 1340 done … … 1295 1343 ## This is used when no error comes from executables and when something stopped an executable without notice. 1296 1344 export SLURM_WAIT=60 1297 1298 EXECUTION="${HOST_MPIRUN_COMMAND} --multi-prog ./run_file" 1345 export SLURM_HOSTFILE=./hostlist 1346 1347 EXECUTION="${HOST_MPIRUN_COMMAND} --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file" 1299 1348 1300 1349 IGCM_sys_Chmod u+x run_file
Note: See TracChangeset
for help on using the changeset viewer.