Changeset 1558 for trunk


Ignore:
Timestamp:
09/15/22 14:42:26 (20 months ago)
Author:
jgipsl
Message:

First version possible to use at spirit and spiritx on IPSL ESPRI MESO cluster.

The execution part in libIGCM_sys_mesoipsl.ksh comes from libIGCM_sys_jeanzay.ksh. It has only been tested for the case MPMD MPI only and SPMD MPI only.

Location:
trunk/libIGCM
Files:
5 edited
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/libIGCM/AA_create_se

    r1525 r1558  
    8181#-Q- lxiv8 #PBS -o SE.$$ 
    8282#-Q- lxiv8 #PBS -S /bin/ksh 
     83#-Q- mesoipsl #!/bin/ksh 
     84#-Q- mesoipsl ###################### 
     85#-Q- mesoipsl ## MESO ESPRI IPSL  ## 
     86#-Q- mesoipsl ###################### 
     87#-Q- mesoipsl #SBATCH --job-name=TS         # Job Name 
     88#-Q- mesoipsl #SBATCH --output=TS.out_%I    # standard output 
     89#-Q- mesoipsl #SBATCH --error=TS.out_%I     # error output 
     90#-Q- mesoipsl #SBATCH -N  1                        # Number of core 
     91#-Q- mesoipsl #SBATCH --time=10:00:00       # Wall clock limit (seconds) 
    8392#-Q- ifort_CICLAD ###################### 
    8493#-Q- ifort_CICLAD ##   CICLAD    IPSL ## 
  • trunk/libIGCM/AA_create_ts

    r1549 r1558  
    7979#-Q- lxiv8 #PBS -o TS.$$ 
    8080#-Q- lxiv8 #PBS -S /bin/ksh 
     81#-Q- mesoipsl #!/bin/ksh 
     82#-Q- mesoipsl ###################### 
     83#-Q- mesoipsl ## MESO ESPRI IPSL  ## 
     84#-Q- mesoipsl ###################### 
     85#-Q- mesoipsl #SBATCH --job-name=TS         # Job Name 
     86#-Q- mesoipsl #SBATCH --output=TS.out_%I    # standard output 
     87#-Q- mesoipsl #SBATCH --error=TS.out_%I     # error output 
     88#-Q- mesoipsl #SBATCH -N  1                        # Number of core 
     89#-Q- mesoipsl #SBATCH --time=10:00:00       # Wall clock limit (seconds) 
    8190#-Q- ifort_CICLAD ###################### 
    8291#-Q- ifort_CICLAD ##   CICLAD    IPSL ## 
  • trunk/libIGCM/AA_job

    r1555 r1558  
    110110#-Q- lxiv8 #PBS -v BATCH_NUM_PROC_TOT=::JobNumProcTot:: 
    111111#-Q- lxiv8 #PBS -l nodes=1:ppn=::JobNumProcTot:: 
     112#-Q- mesoipsl #!/bin/ksh 
     113#-Q- mesoipsl ###################### 
     114#-Q- mesoipsl ## MESO ESPRI IPSL  ## 
     115#-Q- mesoipsl ###################### 
     116#-Q- mesoipsl #SBATCH --job-name=::Jobname::        # Job Name 
     117#-Q- mesoipsl #SBATCH --output=Script_Output_::Jobname::.000001    # standard output 
     118#-Q- mesoipsl #SBATCH --error=Script_Output_::Jobname::.000001    # error output 
     119#-Q- mesoipsl #SBATCH --ntasks=::JobNumProcTot::   # Number of MPI tasks 
     120#-Q- mesoipsl #SBATCH --cpus-per-task=::openMPthreads::  # Number of openMP threads. 
     121#-Q- mesoipsl #SBATCH --hint=nomultithread         # 1 processus MPI par par physical core (no hyperthreading)  
     122#-Q- mesoipsl #SBATCH --time=30                    # Wall clock limit (minutes) 
     123#-Q- mesoipsl set +x 
    112124#-Q- ifort_CICLAD ###################### 
    113125#-Q- ifort_CICLAD ##   CICLAD    IPSL ## 
  • trunk/libIGCM/AA_monitoring

    r1525 r1558  
    8282#-Q- lxiv8 #PBS -o MONITORING.$$ 
    8383#-Q- lxiv8 #PBS -S /bin/ksh 
     84#-Q- mesoipsl #!/bin/ksh  
     85#-Q- mesoipsl ###################### 
     86#-Q- mesoipsl ## MESO ESPRI IPSL  ## 
     87#-Q- mesoipsl ###################### 
     88#-Q- mesoipsl #SBATCH --job-name=MONITORING         # Job Name 
     89#-Q- mesoipsl #SBATCH --output=MONITORING.out_%J    # standard output 
     90#-Q- mesoipsl #SBATCH --error=MONITORING.out_%J     # error output 
     91#-Q- mesoipsl #SBATCH --ntasks=1                    # Number of core 
     92#-Q- mesoipsl #SBATCH --hint=nomultithread          # 1 processus MPI par coeur physique (pas d'hyperthreading) 
     93#-Q- mesoipsl #SBATCH --time=10:00:00               # Wall clock limit (seconds) 
     94#-Q- mesoipsl set +x 
    8495#-Q- ifort_CICLAD ###################### 
    8596#-Q- ifort_CICLAD ##   CICLAD    IPSL ## 
  • trunk/libIGCM/libIGCM_sys/libIGCM_sys.ksh

    r1521 r1558  
    8383        SYSTEM=lxiv8 
    8484        . ${libIGCM}/libIGCM_sys/libIGCM_sys_obelix.ksh;; 
     85    spiritx*) 
     86            [ ! X${TaskType} = Xchecking ] && echo "Source machine dependent settings for spiritx at MESO ESPRI IPSL cluster." 
     87        CENTER=spiritx 
     88        SYSTEM=mesoipsl 
     89        . ${libIGCM}/libIGCM_sys/libIGCM_sys_mesoipsl.ksh;; 
     90    spirit*) 
     91            [ ! X${TaskType} = Xchecking ] && echo "Source machine dependent settings for spirit at MESO ESPRI IPSL cluster." 
     92        CENTER=spirit 
     93        SYSTEM=mesoipsl 
     94        . ${libIGCM}/libIGCM_sys/libIGCM_sys_mesoipsl.ksh;; 
    8595    ciclad*) 
    8696        [ ! X${TaskType} = Xchecking ] && echo "Source machine dependent settings for ciclad for running at ciclad." 
  • trunk/libIGCM/libIGCM_sys/libIGCM_sys_mesoipsl.ksh

    r1552 r1558  
    1919 
    2020#D-#================================================== 
    21 #D-LibIGCM_sys for ciclad 
     21#D-LibIGCM_sys for IPSL ESPRI MESO cluster: spirit and spiritx 
    2222#D-#================================================== 
    2323#D- 
     
    7272typeset PROJECT=NONE 
    7373# jobWarningDelay in seconds 
    74 typeset jobWarningDelay=${PBS_WALLTIME} 
     74#typeset jobWarningDelay=${PBS_WALLTIME} 
    7575 
    7676#D- 
     
    8080 
    8181# Submit command 
    82 typeset SUBMIT=${SUBMIT:=qsub} 
     82typeset SUBMIT=${SUBMIT:=sbatch} 
    8383# rsync with path 
    8484typeset -r RSYNC=/usr/bin/rsync 
     
    9191# Access to module command 
    9292#==================================================== 
    93 . /usr/share/Modules/init/ksh 
     93. /etc/profile.d/modules.sh 
    9494 
    9595#==================================================== 
     
    9999    IGCM_debug_Print 1 "Modules will be loaded later in IGCM_sys_activ_variables." 
    100100else 
    101     if [ $CENTER == IPSL-ciclad ] ; then 
    102         # At ciclad 
    103         . /home/igcmg/MachineEnvironment/ciclad/atlas_env_ciclad  
    104     else 
    105         # At climserv use the same files stored at ciclad but adapt the path 
    106         . /ciclad-home/igcmg/MachineEnvironment/climserv/atlas_env_climserv 
     101    if [ $CENTER == spirit ] ; then 
     102        # At spirit 
     103        . /home/igcmg/MachineEnvironment/mesoipsl/atlas_env_mesoipsl 
     104    elif [ $CENTER == spiritx ] ; then 
     105        # At spiritx 
     106        . /ciclad-home/igcmg/MachineEnvironment/mesoipsl/atlas_env_mesoipsl 
    107107    fi 
    108108fi 
    109  
    110 # Load python 
    111 module load python/2.7-anaconda > /dev/null 2>&1 
    112109 
    113110[ ! X${TaskType} = Xchecking ] && IGCM_debug_Print 1 "List of loaded modules:" 
     
    118115#==================================================== 
    119116# For rebuild 
    120 if [ $CENTER == IPSL-ciclad ] ; then 
    121     export PATH=${PATH}:/home/igcmg/rebuild/src_X64_CICLAD/modipsl_v2_2_3_netcdf4.2/bin/ 
     117if [ $CENTER == spirit ] ; then 
     118    export PATH=${PATH}:/home/igcmg/rebuild/spirit/modipsl/modeles/IOIPSL/bin/ 
    122119else 
    123     export PATH=${PATH}:/ciclad-home/igcmg/rebuild/src_X64_CICLAD/modipsl_v2_2_3_netcdf4.2/bin/ 
     120    export PATH=${PATH}:/ciclad-home/igcmg/rebuild/spirit/modipsl/modeles/IOIPSL/bin/ 
    124121fi 
    125122#==================================================== 
     
    137134#==================================================== 
    138135#- SUBMIT_DIR : submission dir 
    139 if [ X${PBS_O_WORKDIR} != X ] ; then 
    140   typeset -x SUBMIT_DIR=${SUBMIT_DIR:=${PBS_O_WORKDIR}} 
     136if [ X${SLURM_SUMBIT_DIR} != X ] ; then 
     137  typeset -x SUBMIT_DIR=${SUBMIT_DIR:=${SLURM_SUBMIT_DIR}} 
    141138else 
    142139  typeset -x SUBMIT_DIR=${SUBMIT_DIR:=${PWD}} 
     
    148145 
    149146#==================================================== 
    150 #- RUN_DIR_PATH : Temporary working directory (=> TMP) 
    151 if [ X${PBS_JOBID} != X ] ; then 
    152     if [ $CENTER == IPSL-ciclad ] ; then 
    153         typeset -r RUN_DIR_PATH=${RUN_DIR_PATH:=/data/${LOGIN}/RUN_DIR/${PBS_JOBID}_${$}} 
    154     else 
    155         typeset -r RUN_DIR_PATH=${RUN_DIR_PATH:=/homedata/${LOGIN}/RUN_DIR/${PBS_JOBID}_${$}} 
    156     fi 
     147#- RUN_DIR_PATH : Temporary working directory 
     148 
     149if [ $CENTER == spirit ] ; then 
     150    RUN_DIR_PATH=${RUN_DIR_PATH:=/scratchu/${LOGIN}/RUN_DIR} 
    157151else 
    158   typeset -r RUN_DIR_PATH=${RUN_DIR_PATH:=/tmp/tmp$$} 
     152    RUN_DIR_PATH=${RUN_DIR_PATH:=/scratchx/${LOGIN}/RUN_DIR} 
    159153fi 
     154if [ X${SLURM_JOBID} != X ] ; then 
     155    typeset -r RUN_DIR_PATH=${RUN_DIR_PATH}/${SLURM_JOBID}_${$} 
     156else 
     157    typeset -r RUN_DIR_PATH=${RUN_DIR_PATH}/${$} 
     158fi 
    160159 
    161160#==================================================== 
    162161#- OUTCOMMAND_PATH : tmp place to store command lines standard error and outputs 
    163 typeset -r OUTCOMMAND_PATH=/tmp 
     162# Note that typeset -r can not be set at spirit because it sets the path as read-only 
     163typeset OUTCOMMAND_PATH=/tmp 
    164164 
    165165#==================================================== 
     
    172172 
    173173#==================================================== 
    174 #- set PackDefault to false on ciclad 
     174#- set PackDefault to false on meso-ipsl  
    175175PackDefault=false 
    176176 
     
    204204    #==================================================== 
    205205    #- ARCHIVE (dedicated to large files) 
    206     if [ $CENTER == IPSL-ciclad ] ; then 
    207         # At ciclad 
     206    if [ $CENTER == spirit ] ; then 
     207        # At spirit 
    208208        ARCHIVE=${ARCHIVE:=/data/${LOGIN}} 
    209209    else 
    210         # At climserv 
     210        # At spiritx 
    211211        ARCHIVE=${ARCHIVE:=/homedata/${LOGIN}} 
    212212    fi 
     
    223223  fi 
    224224 
    225   # ON CICLAD NO SPECIAL CASE WHEN X${config_UserChoices_SpaceName} = XTEST 
     225  # ON MESO-IPSL NO SPECIAL CASE WHEN X${config_UserChoices_SpaceName} = XTEST 
    226226 
    227227  #==================================================== 
     
    392392  fi 
    393393  typeset options status 
    394   options="-o ${SUBMIT_DIR}/${Script_Output}" 
    395   /usr/bin/qsub ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
     394  options="-o ${SUBMIT_DIR}/${Script_Output} -e ${SUBMIT_DIR}/${Script_Output}" 
     395  /usr/bin/time sbatch ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
    396396  status=$? 
    397397 
     
    401401    IGCM_debug_Exit "IGCM_sys_Qsub" 
    402402  else 
    403     JobID=$( gawk {'print $1'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) 
     403    JobID=$( gawk {'print $4'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) 
    404404  fi 
    405405  IGCM_debug_PopStack "IGCM_sys_Qsub" 
     
    417417  fi 
    418418  typeset options status 
    419   options="-o ${POST_DIR}/${Script_Post_Output}.out -v ${listVarEnv}" 
    420   /usr/bin/qsub ${options} ${libIGCM_POST}/$1.job > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
     419  options="-o ${POST_DIR}/${Script_Post_Output}.out  -e ${POST_DIR}/${Script_Post_Output}.out" 
     420  sbatch ${options} ${libIGCM_POST}/$1.job > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
    421421  status=$? 
    422422 
     
    426426    IGCM_debug_Exit "IGCM_sys_QsubPost" 
    427427  else 
    428     JobID=$( gawk {'print $1'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) 
     428    JobID=$( gawk {'print $4'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) 
    429429  fi 
    430430  IGCM_debug_PopStack "IGCM_sys_QsubPost" 
     
    792792  else 
    793793      IGCM_debug_Print 1 "IGCM_sys_active_variables : Default modules will be used" 
    794       if [ $CENTER == IPSL-ciclad ] ; then 
    795           # At ciclad 
    796           EnvFile=/home/igcmg/.atlas_env_ciclad_ksh  
     794      if [ $CENTER == spirit ] ; then 
     795          # At spirit 
     796          EnvFile=/home/igcmg/MachineEnvironment/mesoipsl/atlas_env_mesoipsl 
    797797      else 
    798           # At climserv use the same files stored at ciclad but adapt the path 
    799           EnvFile=/ciclad-home/igcmg/.atlas_env_ciclad_ksh 
     798          # At spirit 
     799          EnvFile=/ciclad-home/igcmg/MachineEnvironment/mesoipsl/atlas_env_mesoipsl 
    800800      fi 
    801801  fi 
     
    804804  . ${EnvFile} 
    805805 
    806   IGCM_debug_Print 1 "IGCM_sys_active_variables : Now loaded modules for ciclad/climserv. " 
     806 
     807  IGCM_debug_Print 1 "IGCM_sys_active_variables : Now loaded modules for meso-ipsl . " 
    807808  module list 
     809   
    808810 
    809811# -------------------------------------------------------------------- 
     
    868870  elif [ ${executionType} -eq 2 ] ; then 
    869871    # MPMD + MPI + OMP 
    870     sed -e "s/::openMPthreads::/${openMPthreads}/" \ 
    871         -e "s/::JobNumProcTot::/${coreNumber}/"    \ 
    872       ${file} > ${file}.tmp 
    873  
     872      (( nodeNumber = coreNumber / NB_CORE_PER_NODE )) 
     873      [ $(( ${coreNumber} % ${NB_CORE_PER_NODE} )) -ne 0 ] && (( nodeNumber = nodeNumber + 1 )) 
     874      sed -e "/::openMPthreads::/d"                 \ 
     875        -e "s/::JobNumProcTot::/${mpiTasks}/"       \ 
     876        -e "/ntasks/i\#SBATCH --nodes=${nodeNumber}"\ 
     877        -e "/ntasks/i\#SBATCH --exclusive"          \ 
     878         ${file} > ${file}.tmp 
    874879  elif [ ${executionType} -eq 3 ] ; then 
    875880    # SPMD + MPI/OMP 
     
    929934    echo "IGCM_sys_build_execution_scripts " $@ 
    930935  fi 
    931  
     936  IGCM_debug_Print 1 "executionType= ${executionType}" 
    932937  EXECUTION=${HOST_MPIRUN_COMMAND} 
    933938 
    934   # MPMD mode 
    935939  if ( ${OK_PARA_MPMD} ) ; then 
    936940 
    937     # Only MPI (MPMD) 
     941    # MPMD mode 
     942    # 1 MPI only : executionType=1 
     943    # 2 MPI/OpenMP : executionType=2 
     944 
     945    if [ -f run_file ] ; then 
     946      IGCM_sys_Rm -f run_file 
     947    fi 
     948    touch run_file 
     949 
     950    # case 1 : Only MPI (MPMD) 
    938951    if  ( ! ${OK_PARA_OMP} ) ; then 
    939  
    940       if [ -f run_file ] ; then 
    941         IGCM_sys_Rm -f run_file 
    942       fi 
    943       touch run_file 
    944  
    945952      # Build run_file 
    946  
     953      current_core=0 
    947954      # First loop on the components for the coupler ie oasis (only if oasis3) 
    948955      # the coupler ie oasis3 must be the first one 
     
    952959        eval ExeNameOut=\${config_Executable_${comp}[1]} 
    953960 
    954         # for CPL component only 
    955         if [ "X${comp}" = "XCPL" ]  && [ "X${ExeNameOut}" != X\"\" ] ; then 
    956           eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 
    957           eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 
    958           echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut} " >> run_file 
    959         fi 
    960       done 
    961  
    962       # Then second loop on the components 
    963       for comp in ${config_ListOfComponents[*]} ; do 
    964  
    965         eval ExeNameIn=\${config_Executable_${comp}[0]} 
    966         eval ExeNameOut=\${config_Executable_${comp}[1]} 
    967  
    968         # Only if we really have an executable for the component and not the coupler ie oasis: 
    969         if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 
    970  
     961        # Only if the component has an executable 
     962        if ( [ "X${ExeNameOut}" != X\"\" ] ) ; then 
     963 
     964#          eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 
     965#          (( end_core = ${current_core} + ${comp_proc_mpi_loc} - 1 )) 
     966#          echo "${current_core}-${end_core} ./${ExeNameOut}" >> run_file 
     967#          (( current_core = ${end_core} + 1 )) 
    971968          eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 
    972969          eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 
    973970          echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut}" >> run_file 
    974         fi 
     971 
     972        fi 
    975973      done 
    976974 
     
    982980        cat run_file 
    983981      fi 
    984   
     982 
     983    else  
     984 
     985    # 2 MPI/OpenMP : executionType=2 
     986 
    985987    # MPI-OpenMP (MPMD) 
    986     else 
    987  
    988       #  Hosts treatment 
    989       ${HOST_MPIRUN_COMMAND} hostname | sort | uniq > hosts.tmp 
    990  
    991       i=0 
    992       rm -f hosts 
    993       IGCM_debug_Print 1 "sys Ciclad, Hosts available :" 
    994       for nodes in `cat hosts.tmp` ; do 
    995         host[$i]=$nodes 
    996         echo "${host[$i]} slots=1 max_slots=1" >> hosts 
    997         IGCM_debug_Print 1 ${host[$i]} 
    998         i=$((i+1)) 
    999       done 
    1000       rm -f hosts.tmp 
    1001  
    1002       listnodes=${host[*]} 
    1003  
    1004       EXECUTION="${HOST_MPIRUN_COMMAND} -hostfile hosts" 
    1005  
    1006       # Initialisation 
    1007       rank=0 
    1008       current_core=0 
    1009       core_per_node=8 
    1010       init_exec=n 
    1011  
    1012       # Loop on the components 
     988    # export SLURM_HOSTFILE=./hostlist  
     989    # srun --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file 
     990    # example of  hostlist file : 
     991    # r3i3n33 
     992    # r3i3n33 
     993    # ... 
     994    # example of run_file : 
     995    # 0-70 ./prog_lmdz.x.sh %o %t 
     996    # 71-430 ./prog_opa.xx.sh %o %t 
     997    # 431-431 ./prog_xios.x.sh %o %t 
     998    # examples of prog_file : 
     999    # prog_lmdz.x.sh : 
     1000    # (( init = 0 + $1 )) 
     1001    # (( index = init * 10 )) 
     1002    # (( slot = index % 40 )) 
     1003    # taskset -c $slot-$((slot + 10 - 1)) ./script_lmdz.x.ksh 
     1004    # that will become 
     1005    # taskset -c 0-9 ./script_lmdz.x.ksh 
     1006    # ... 
     1007    # with script_lmdz.x.ksh 
     1008    # export OMP_STACKSIZE=3g 
     1009    # export OMP_PLACES=cores 
     1010    # export OMP_NUM_THREADS=10 
     1011    # ./lmdz.x > out_lmdz.x.out.${SLURM_PROCID} 2>out_lmdz.x.err.${SLURM_PROCID} 
     1012 
     1013    #  Hosts treatment 
     1014    _bkIFS=$IFS; 
     1015        IFS=$'\n'; set -f 
     1016        listnodes=($(< <( scontrol show hostnames $SLURM_JOB_NODELIST ))) 
     1017        IFS=$_bkIFS; set +f 
     1018        rm -f hostlist 
     1019 
     1020     # Loop on the components to build run_file and script_exec files 
     1021        rank=0 
     1022        current_core=0 
     1023        current_core_mpi=0 
     1024 
    10131025      for comp in ${config_ListOfComponents[*]} ; do 
    10141026 
     
    10281040          eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 
    10291041          eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 
     1042          eval comp_proc_nod_loc=\${${comp}_PROC_NOD} 
     1043      
     1044 
     1045          # Build script files 
    10301046 
    10311047          echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 
    10321048          echo ""  >> script_${ExeNameOut}.ksh 
    10331049          if [ ${comp_proc_omp_loc} -gt 1 ] ; then 
    1034  
    1035             # Check if the number of threads is correct 
    1036             case ${comp_proc_omp_loc} in 
    1037             2|4|8) 
    1038               IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" 
    1039               ;; 
    1040             *) 
    1041               IGCM_debug_Exit "ERROR with OMP parameters !" 
    1042               IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" 
    1043               IGCM_debug_Print 2 "Only 2,4,8 as number of OMP threads are possible " 
    1044               IGCM_debug_Verif_Exit 
    1045               ;; 
    1046             esac 
    1047             #echo "export KMP_STACKSIZE=3g"  >> script_${ExeNameOut}.ksh 
    1048             #echo "export KMP_LIBRARY=turnaround"  >> script_${ExeNameOut}.ksh 
    1049             #echo "export MKL_SERIAL=YES"  >> script_${ExeNameOut}.ksh 
    1050             echo "export OMP_STACKSIZE=200M" >> script_${ExeNameOut}.ksh  
    1051             echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 
     1050          # Check if the number of threads is correct 
     1051              case ${comp_proc_omp_loc} in 
     1052                  2|4|5|10|20) 
     1053                      IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" 
     1054                      ;; 
     1055                  *) 
     1056                      IGCM_debug_Exit "ERROR with OMP parameters !" 
     1057                      IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" 
     1058                      IGCM_debug_Print 2 "Only 2,4,5,10,20 as number of OMP threads are possible " 
     1059                      IGCM_debug_Verif_Exit 
     1060                      ;; 
     1061              esac 
     1062            echo "export OMP_STACKSIZE=3g"  >> script_${ExeNameOut}.ksh 
     1063            echo "export OMP_PLACES=cores"  >> script_${ExeNameOut}.ksh 
     1064            echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}"  >> script_${ExeNameOut}.ksh 
     1065 
    10521066          fi 
    10531067 
    1054           #echo "./${ExeNameOut}" >> script_${ExeNameOut}.ksh 
    1055           echo "(( MYMPIRANK = OMPI_COMM_WORLD_RANK )) " >> script_${ExeNameOut}.ksh 
    1056           echo "MYMPIRANK=\$(printf '%3.3d\n' \${MYMPIRANK})" >> script_${ExeNameOut}.ksh 
    1057           echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${MYMPIRANK} 2>out_${ExeNameOut}.err.\${MYMPIRANK}" >> script_${ExeNameOut}.ksh 
     1068          # to have out/err per process on different files 
     1069          echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${SLURM_PROCID} 2>out_${ExeNameOut}.err.\${SLURM_PROCID}" >> script_${ExeNameOut}.ksh 
     1070 
    10581071          IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 
    10591072 
    1060           if [ ${init_exec} = y ] ; then 
    1061             EXECUTION="${EXECUTION} : -np ${comp_proc_mpi_loc} ./script_${ExeNameOut}.ksh" 
     1073        # Build run_file 
     1074        # Only if the component has an executable 
     1075        if ( [ "X${ExeNameOut}" != X\"\" ] ) ; then 
     1076 
     1077          eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 
     1078          (( end_core = ${current_core_mpi} + ${comp_proc_mpi_loc} - 1 )) 
     1079          echo "${current_core_mpi}-${end_core} ./prog_${ExeNameOut}.sh %o %t" >> run_file 
     1080          (( current_core_mpi = ${end_core} + 1 )) 
     1081        fi 
     1082 
     1083          if [ ${comp_proc_nod_loc} -gt 1 ] ; then 
     1084              (( offset_comp_proc_loc =  NB_CORE_PER_NODE / (comp_proc_mpi_loc / comp_proc_nod_loc) )) 
    10621085          else 
    1063             EXECUTION="${EXECUTION} -np ${comp_proc_mpi_loc} ./script_${ExeNameOut}.ksh" 
    1064             init_exec=y 
     1086              (( offset_comp_proc_loc =  comp_proc_omp_loc )) 
    10651087          fi 
    10661088 
    1067           # Build rankfile : method used to assign cores and nodes for the MPI process 
    1068           # Ex : 
    1069           #rank 0=curie5296 slot=0,1,2,3 
    1070           #rank 1=curie5296 slot=4,5,6,7 
    1071           # Example of final command : 
    1072           # mpirun -hostfile hosts -rankfile rankfile -np 27 ./script_lmdz.x.ksh : -np 5 ./script_opa.xx.ksh 
    1073           # with script_lmdz.x.ksh : 
    1074           # #!/bin/ksh 
    1075           #export OMP_STACKSIZE=200M 
    1076           #export OMP_NUM_THREADS=4 
    1077           #./lmdz.x 
     1089          # Build configuration file 
     1090  
     1091              echo "#!/bin/sh" > prog_${ExeNameOut}.sh 
     1092          echo "(( init = $current_core + \$1 ))" >> prog_${ExeNameOut}.sh 
     1093          echo "(( index = init * $comp_proc_omp_loc ))" >> prog_${ExeNameOut}.sh 
     1094          echo "(( slot = index % 40 ))" >> prog_${ExeNameOut}.sh 
     1095              echo "echo ${ExeNameOut} taskset -c \$slot"-"\$((slot + $comp_proc_omp_loc - 1))" >> prog_${ExeNameOut}.sh 
     1096          echo "taskset -c \$slot"-"\$((slot + $comp_proc_omp_loc - 1)) ./script_${ExeNameOut}.ksh" >> prog_${ExeNameOut}.sh 
     1097 
     1098          IGCM_sys_Chmod u+x prog_${ExeNameOut}.sh 
     1099 
     1100        # Build hostlist file 
    10781101 
    10791102          for nb_proc_mpi in `seq 0 $(($comp_proc_mpi_loc-1))`; do 
    1080             (( index_host = current_core / core_per_node )) 
    1081             host_value=${host[${index_host}]} 
    1082             (( slot =  current_core % core_per_node )) 
    1083             virg="," 
    1084             string_final="" 
    1085             for index in `seq $slot $(($slot+$comp_proc_omp_loc-1))`; do 
    1086               string=$index$virg 
    1087               string_final=$string_final$string 
    1088             done 
    1089             string_final=$( echo $string_final | sed "s/.$//" ) 
    1090             echo "rank $rank=$host_value slot=$string_final" >> rankfile 
    1091             (( rank = rank + 1 )) 
    1092             (( current_core = current_core + comp_proc_omp_loc )) 
     1103             (( index_host = current_core / NB_CORE_PER_NODE )) 
     1104             host_value=${listnodes[${index_host}]} 
     1105               echo "$host_value" >> hostlist 
     1106             (( current_core = current_core + offset_comp_proc_loc )) 
    10931107          done 
    10941108        fi 
    10951109      done 
    1096     fi 
    1097  
    1098   # Only one executable (SPMD mode). 
     1110 
     1111      ## variable added to stop after 60s instead of 600s by default.  
     1112      ## This is used when no error comes from executables and when something stopped an executable without notice.  
     1113      export SLURM_WAIT=60 
     1114 
     1115      EXECUTION="${HOST_MPIRUN_COMMAND} --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file" 
     1116 
     1117      IGCM_sys_Chmod u+x run_file 
     1118      if ( $DEBUG_sys ) ; then 
     1119        echo "run_file contains : " 
     1120        cat run_file 
     1121      fi 
     1122 
     1123    fi # if ${OK_PARA_MPMD} 
     1124 
    10991125  else 
     1126  # Only one executable (SPMD mode):  executionType=3, 4, 5 and 6 
    11001127 
    11011128    for comp in ${config_ListOfComponents[*]} ; do 
     
    11051132      if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${ExeNameOut}" != "Xinca.dat" ] ) ; then 
    11061133 
     1134        # Build script files 
     1135 
    11071136        echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 
    11081137        echo ""  >> script_${ExeNameOut}.ksh 
     1138        IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 
     1139 
    11091140        if ( ${OK_PARA_OMP} ) ; then 
    1110           eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 
    1111           echo "export OMP_STACKSIZE=200M" >> script_${ExeNameOut}.ksh 
    1112           echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 
     1141            eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 
     1142            # Check if the number of threads is correct 
     1143            case ${comp_proc_omp_loc} in 
     1144                2|4|5|10|20) 
     1145                    IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" 
     1146                    ;; 
     1147                *) 
     1148                    IGCM_debug_Exit "ERROR with OMP parameters !" 
     1149                    IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" 
     1150                    IGCM_debug_Print 2 "Only 2,4,5,10,20 as number of OMP threads are possible " 
     1151                    IGCM_debug_Verif_Exit 
     1152                    ;; 
     1153            esac  
     1154            echo ""  >> script_${ExeNameOut}.ksh 
     1155            echo "export OMP_STACKSIZE=3g"  >> script_${ExeNameOut}.ksh 
     1156            echo "export OMP_PLACES=cores"  >> script_${ExeNameOut}.ksh 
     1157            echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 
    11131158        fi 
    1114         if  ( ${OK_PARA_MPI} ) ; then 
    1115           # Default : mpirun used if nb_proc gt 1 
    1116           # pour sortie out/err par process 
    1117           echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${OMPI_COMM_WORLD_RANK} 2>out_${ExeNameOut}.err.\${OMPI_COMM_WORLD_RANK}"  >> script_${ExeNameOut}.ksh 
    1118           #echo "./${ExeNameOut}" >> script_${ExeNameOut}.ksh 
    1119           IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 
    1120           EXECUTION="${HOST_MPIRUN_COMMAND} ./script_${ExeNameOut}.ksh" 
    1121         else 
    1122           # Default : mpirun is NOT used if nb_proc eq 1 
    1123           # pour sortie out/err par process 
    1124           echo "./${ExeNameOut} > out_${ExeNameOut}.out 2>out_${ExeNameOut}.err" >> script_${ExeNameOut}.ksh 
    1125           #echo "./${ExeNameOut}" >> script_${ExeNameOut}.ksh 
    1126           IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 
    1127           EXECUTION="time ./script_${ExeNameOut}.ksh" 
    1128         fi 
    1129  
    1130         IGCM_debug_Print 1 "sys Ciclad : script_${ExeNameOut}.ksh contains" 
     1159 
     1160        eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 
     1161         
     1162        # To have out/err per process on different files 
     1163        echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${SLURM_PROCID} 2>out_${ExeNameOut}.err.\${SLURM_PROCID}"  >> script_${ExeNameOut}.ksh 
     1164        EXECUTION="${HOST_MPIRUN_COMMAND} ./script_${ExeNameOut}.ksh" 
     1165 
     1166        IGCM_debug_Print 1 "sys Jean-Zay : script_${ExeNameOut}.ksh contains" 
    11311167        cat script_${ExeNameOut}.ksh 
    11321168 
     
    11351171    done 
    11361172 
    1137   fi 
    1138  
    1139   IGCM_debug_Print 1 "sys Ciclad : execution command is" 
     1173  fi # ${OK_PARA_MPMD} 
     1174 
     1175  IGCM_debug_Print 1 "sys meso-ipsl : execution command is " 
    11401176  IGCM_debug_Print 1 "$EXECUTION" 
    11411177 
Note: See TracChangeset for help on using the changeset viewer.