Changeset 1381 for trunk/libIGCM


Ignore:
Timestamp:
03/03/17 17:52:43 (7 years ago)
Author:
mafoipsl
Message:

See ticket #323. Let use ccc_mprun instead of mpirun for MPMD/MPI/OpenMP execution on curie as suggested by hotline. More reliable and more simple to use.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/libIGCM/libIGCM_sys/libIGCM_sys_curie.ksh

    r1380 r1381  
    178178#==================================================== 
    179179#- HOST_MPIRUN_COMMAND 
    180 typeset -r HOST_MPIRUN_COMMAND=${HOST_MPIRUN_COMMAND:="/usr/bin/time ccc_mprun -E-K1"} 
     180typeset -r HOST_MPIRUN_COMMAND=${HOST_MPIRUN_COMMAND:="/usr/bin/time ccc_mprun "} 
    181181 
    182182#==================================================== 
     
    11871187  if ( ${OK_PARA_MPMD} ) ; then 
    11881188 
     1189    if [ -f run_file ] ; then 
     1190      IGCM_sys_Rm -f run_file 
     1191    fi 
     1192    touch run_file 
     1193 
    11891194    # Only MPI (MPMD) 
    11901195    if  ( ! ${OK_PARA_OMP} ) ; then 
    1191  
    1192       if [ -f run_file ] ; then 
    1193         IGCM_sys_Rm -f run_file 
    1194       fi 
    1195       touch run_file 
    11961196 
    11971197      # Build run_file 
     
    12281228      done 
    12291229 
    1230       EXECUTION="${HOST_MPIRUN_COMMAND} -f ./run_file" 
    1231  
    1232       IGCM_sys_Chmod u+x run_file 
    1233       if ( $DEBUG_sys ) ; then 
    1234         echo "run_file contains : " 
    1235         cat run_file 
    1236       fi 
    1237  
    12381230    # MPI-OpenMP (MPMD) 
     1231    # example of run_file 
     1232    # 71-8 env OMP_NUM_THREADS=8 ./script_lmdz.x.ksh 
     1233    # 480-1 env OMP_NUM_THREADS=1 ./script_opa.xx.ksh 
     1234    # 1-1 env OMP_NUM_THREADS=1 ./script_xios.x.ksh 
     1235 
    12391236    else 
    1240  
    1241       # Use of mpirun instead of ccc_mprun 
    1242       EXECUTION="time mpirun" 
    1243  
    1244       #  Hosts treatment 
    1245       ${EXECUTION} hostname | sort | uniq > hosts.tmp 
    1246  
    1247       i=0 
    1248       rm -f hosts rankfile 
    1249       IGCM_debug_Print 1 "sys Curie, Hosts available :" 
    1250       for nodes in `cat hosts.tmp` 
    1251       do 
    1252         host[$i]=$nodes 
    1253         echo "${host[$i]}" >> hosts 
    1254         IGCM_debug_Print 1 ${host[$i]} 
    1255         i=$((i+1)) 
    1256       done 
    1257       rm -f hosts.tmp 
    1258  
    1259       listnodes=${host[*]} 
    1260  
    1261       EXECUTION="${EXECUTION} -hostfile hosts -rankfile rankfile" 
    1262  
    1263       # Initialisation 
    1264       rank=0 
    1265       current_core=0 
    1266       core_per_node=16 
    1267       init_exec=n 
    12681237 
    12691238      # Loop on the components 
     
    12851254          eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 
    12861255          eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 
     1256 
     1257          # Build script files 
    12871258 
    12881259          echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 
     
    13021273              ;; 
    13031274            esac 
     1275            ### only for LMDZ? 
    13041276            echo "export KMP_STACKSIZE=3g"  >> script_${ExeNameOut}.ksh 
    13051277            echo "export KMP_LIBRARY=turnaround"  >> script_${ExeNameOut}.ksh 
    13061278            echo "export MKL_SERIAL=YES"  >> script_${ExeNameOut}.ksh 
    13071279            echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 
     1280 
    13081281          fi 
    13091282 
     1283          # to have out/err per process on different files 
     1284          echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${SLURM_PROCID} 2>out_${ExeNameOut}.err.\${SLURM_PROCID}" >> script_${ExeNameOut}.ksh 
    13101285          #echo "./${ExeNameOut}" >> script_${ExeNameOut}.ksh 
    1311           echo "(( MYMPIRANK = OMPI_COMM_WORLD_RANK )) " >> script_${ExeNameOut}.ksh 
    1312           echo "MYMPIRANK=\$(printf '%3.3d\n' \${MYMPIRANK})" >> script_${ExeNameOut}.ksh 
    1313           echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${MYMPIRANK} 2>out_${ExeNameOut}.err.\${MYMPIRANK}" >> script_${ExeNameOut}.ksh 
    1314           IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 
    1315  
    1316           if [ ${init_exec} = y ] ; then 
    1317             EXECUTION="${EXECUTION} : -np ${comp_proc_mpi_loc} ./script_${ExeNameOut}.ksh" 
    1318           else 
    1319             EXECUTION="${EXECUTION} -np ${comp_proc_mpi_loc} ./script_${ExeNameOut}.ksh" 
    1320             init_exec=y 
    1321           fi 
    1322  
    1323           # Build rankfile : method used to assign cores and nodes for the MPI process 
    1324           # Ex : 
    1325           #rank 0=curie5296 slot=0,1,2,3 
    1326           #rank 1=curie5296 slot=4,5,6,7 
    1327           # Example of final command : 
    1328           # mpirun -hostfile hosts -rankfile rankfile -np 27 ./script_lmdz.x.ksh : -np 5 ./script_opa.xx.ksh 
    1329           # with script_lmdz.x.ksh : 
    1330           # #!/bin/ksh 
    1331           #export KMP_STACKSIZE=3g 
    1332           #export KMP_LIBRARY=turnaround 
    1333           #export MKL_SERIAL=YES 
    1334           #OMP_NUM_THREADS=4 
    1335           #./lmdz.x 
    1336  
    1337           for nb_proc_mpi in `seq 0 $(($comp_proc_mpi_loc-1))`; do 
    1338             (( index_host = current_core / core_per_node )) 
    1339             host_value=${host[${index_host}]} 
    1340             (( slot =  current_core % core_per_node )) 
    1341             virg="," 
    1342             string_final="" 
    1343             for index in `seq $slot $(($slot+$comp_proc_omp_loc-1))`; do 
    1344               string=$index$virg 
    1345               string_final=$string_final$string 
    1346             done 
    1347             string_final=$( echo $string_final | sed "s/.$//" ) 
    1348             echo "rank $rank=$host_value slot=$string_final" >> rankfile 
    1349             (( rank = rank + 1 )) 
    1350             (( current_core = current_core + comp_proc_omp_loc )) 
    1351           done 
     1286 
     1287          IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 
     1288 
     1289          # Complete run_file 
     1290 
     1291          echo "${comp_proc_mpi_loc}-${comp_proc_omp_loc} env OMP_NUM_THREADS=${comp_proc_omp_loc} ./script_${ExeNameOut}.ksh " >>run_file 
     1292 
    13521293        fi 
    13531294      done 
     1295 
     1296      # export system variables required with MPMD/MPI/OpenMP ccc_mprun method 
     1297 
     1298      export BRIDGE_HETEROGENEOUS_MPMD=1 
     1299      export OMPI_MCA_ess=pmi                  # Necessaire pour bullxmpi 
     1300 
     1301    fi 
     1302 
     1303    # for both MPMD/MPI and MPMD/MPI/OpenMP mode 
     1304 
     1305    EXECUTION="${HOST_MPIRUN_COMMAND} -f ./run_file" 
     1306 
     1307    IGCM_sys_Chmod u+x run_file 
     1308    if ( $DEBUG_sys ) ; then 
     1309      echo "run_file contains : " 
     1310      cat run_file 
    13541311    fi 
    13551312 
     
    13621319      eval ExeNameOut=\${config_Executable_${comp}[1]} 
    13631320      if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${ExeNameOut}" != "Xinca.dat" ] ) ; then 
     1321 
     1322        # Build script files 
    13641323 
    13651324        echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 
Note: See TracChangeset for help on using the changeset viewer.