- Timestamp:
- 09/15/22 14:42:26 (20 months ago)
- Location:
- trunk/libIGCM
- Files:
-
- 5 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/libIGCM/AA_create_se
r1525 r1558 81 81 #-Q- lxiv8 #PBS -o SE.$$ 82 82 #-Q- lxiv8 #PBS -S /bin/ksh 83 #-Q- mesoipsl #!/bin/ksh 84 #-Q- mesoipsl ###################### 85 #-Q- mesoipsl ## MESO ESPRI IPSL ## 86 #-Q- mesoipsl ###################### 87 #-Q- mesoipsl #SBATCH --job-name=TS # Job Name 88 #-Q- mesoipsl #SBATCH --output=TS.out_%I # standard output 89 #-Q- mesoipsl #SBATCH --error=TS.out_%I # error output 90 #-Q- mesoipsl #SBATCH -N 1 # Number of core 91 #-Q- mesoipsl #SBATCH --time=10:00:00 # Wall clock limit (seconds) 83 92 #-Q- ifort_CICLAD ###################### 84 93 #-Q- ifort_CICLAD ## CICLAD IPSL ## -
trunk/libIGCM/AA_create_ts
r1549 r1558 79 79 #-Q- lxiv8 #PBS -o TS.$$ 80 80 #-Q- lxiv8 #PBS -S /bin/ksh 81 #-Q- mesoipsl #!/bin/ksh 82 #-Q- mesoipsl ###################### 83 #-Q- mesoipsl ## MESO ESPRI IPSL ## 84 #-Q- mesoipsl ###################### 85 #-Q- mesoipsl #SBATCH --job-name=TS # Job Name 86 #-Q- mesoipsl #SBATCH --output=TS.out_%I # standard output 87 #-Q- mesoipsl #SBATCH --error=TS.out_%I # error output 88 #-Q- mesoipsl #SBATCH -N 1 # Number of core 89 #-Q- mesoipsl #SBATCH --time=10:00:00 # Wall clock limit (seconds) 81 90 #-Q- ifort_CICLAD ###################### 82 91 #-Q- ifort_CICLAD ## CICLAD IPSL ## -
trunk/libIGCM/AA_job
r1555 r1558 110 110 #-Q- lxiv8 #PBS -v BATCH_NUM_PROC_TOT=::JobNumProcTot:: 111 111 #-Q- lxiv8 #PBS -l nodes=1:ppn=::JobNumProcTot:: 112 #-Q- mesoipsl #!/bin/ksh 113 #-Q- mesoipsl ###################### 114 #-Q- mesoipsl ## MESO ESPRI IPSL ## 115 #-Q- mesoipsl ###################### 116 #-Q- mesoipsl #SBATCH --job-name=::Jobname:: # Job Name 117 #-Q- mesoipsl #SBATCH --output=Script_Output_::Jobname::.000001 # standard output 118 #-Q- mesoipsl #SBATCH --error=Script_Output_::Jobname::.000001 # error output 119 #-Q- mesoipsl #SBATCH --ntasks=::JobNumProcTot:: # Number of MPI tasks 120 #-Q- mesoipsl #SBATCH --cpus-per-task=::openMPthreads:: # Number of openMP threads. 121 #-Q- mesoipsl #SBATCH --hint=nomultithread # 1 processus MPI par par physical core (no hyperthreading) 122 #-Q- mesoipsl #SBATCH --time=30 # Wall clock limit (minutes) 123 #-Q- mesoipsl set +x 112 124 #-Q- ifort_CICLAD ###################### 113 125 #-Q- ifort_CICLAD ## CICLAD IPSL ## -
trunk/libIGCM/AA_monitoring
r1525 r1558 82 82 #-Q- lxiv8 #PBS -o MONITORING.$$ 83 83 #-Q- lxiv8 #PBS -S /bin/ksh 84 #-Q- mesoipsl #!/bin/ksh 85 #-Q- mesoipsl ###################### 86 #-Q- mesoipsl ## MESO ESPRI IPSL ## 87 #-Q- mesoipsl ###################### 88 #-Q- mesoipsl #SBATCH --job-name=MONITORING # Job Name 89 #-Q- mesoipsl #SBATCH --output=MONITORING.out_%J # standard output 90 #-Q- mesoipsl #SBATCH --error=MONITORING.out_%J # error output 91 #-Q- mesoipsl #SBATCH --ntasks=1 # Number of core 92 #-Q- mesoipsl #SBATCH --hint=nomultithread # 1 processus MPI par coeur physique (pas d'hyperthreading) 93 #-Q- mesoipsl #SBATCH --time=10:00:00 # Wall clock limit (seconds) 94 #-Q- mesoipsl set +x 84 95 #-Q- ifort_CICLAD ###################### 85 96 #-Q- ifort_CICLAD ## CICLAD IPSL ## -
trunk/libIGCM/libIGCM_sys/libIGCM_sys.ksh
r1521 r1558 83 83 SYSTEM=lxiv8 84 84 . ${libIGCM}/libIGCM_sys/libIGCM_sys_obelix.ksh;; 85 spiritx*) 86 [ ! X${TaskType} = Xchecking ] && echo "Source machine dependent settings for spiritx at MESO ESPRI IPSL cluster." 87 CENTER=spiritx 88 SYSTEM=mesoipsl 89 . ${libIGCM}/libIGCM_sys/libIGCM_sys_mesoipsl.ksh;; 90 spirit*) 91 [ ! X${TaskType} = Xchecking ] && echo "Source machine dependent settings for spirit at MESO ESPRI IPSL cluster." 92 CENTER=spirit 93 SYSTEM=mesoipsl 94 . ${libIGCM}/libIGCM_sys/libIGCM_sys_mesoipsl.ksh;; 85 95 ciclad*) 86 96 [ ! X${TaskType} = Xchecking ] && echo "Source machine dependent settings for ciclad for running at ciclad." -
trunk/libIGCM/libIGCM_sys/libIGCM_sys_mesoipsl.ksh
r1552 r1558 19 19 20 20 #D-#================================================== 21 #D-LibIGCM_sys for ciclad21 #D-LibIGCM_sys for IPSL ESPRI MESO cluster: spirit and spiritx 22 22 #D-#================================================== 23 23 #D- … … 72 72 typeset PROJECT=NONE 73 73 # jobWarningDelay in seconds 74 typeset jobWarningDelay=${PBS_WALLTIME}74 #typeset jobWarningDelay=${PBS_WALLTIME} 75 75 76 76 #D- … … 80 80 81 81 # Submit command 82 typeset SUBMIT=${SUBMIT:= qsub}82 typeset SUBMIT=${SUBMIT:=sbatch} 83 83 # rsync with path 84 84 typeset -r RSYNC=/usr/bin/rsync … … 91 91 # Access to module command 92 92 #==================================================== 93 . / usr/share/Modules/init/ksh93 . /etc/profile.d/modules.sh 94 94 95 95 #==================================================== … … 99 99 IGCM_debug_Print 1 "Modules will be loaded later in IGCM_sys_activ_variables." 100 100 else 101 if [ $CENTER == IPSL-ciclad] ; then102 # At ciclad103 . /home/igcmg/MachineEnvironment/ ciclad/atlas_env_ciclad104 el se105 # At climserv use the same files stored at ciclad but adapt the path106 . /ciclad-home/igcmg/MachineEnvironment/ climserv/atlas_env_climserv101 if [ $CENTER == spirit ] ; then 102 # At spirit 103 . /home/igcmg/MachineEnvironment/mesoipsl/atlas_env_mesoipsl 104 elif [ $CENTER == spiritx ] ; then 105 # At spiritx 106 . /ciclad-home/igcmg/MachineEnvironment/mesoipsl/atlas_env_mesoipsl 107 107 fi 108 108 fi 109 110 # Load python111 module load python/2.7-anaconda > /dev/null 2>&1112 109 113 110 [ ! X${TaskType} = Xchecking ] && IGCM_debug_Print 1 "List of loaded modules:" … … 118 115 #==================================================== 119 116 # For rebuild 120 if [ $CENTER == IPSL-ciclad] ; then121 export PATH=${PATH}:/home/igcmg/rebuild/s rc_X64_CICLAD/modipsl_v2_2_3_netcdf4.2/bin/117 if [ $CENTER == spirit ] ; then 118 export PATH=${PATH}:/home/igcmg/rebuild/spirit/modipsl/modeles/IOIPSL/bin/ 122 119 else 123 export PATH=${PATH}:/ciclad-home/igcmg/rebuild/s rc_X64_CICLAD/modipsl_v2_2_3_netcdf4.2/bin/120 export PATH=${PATH}:/ciclad-home/igcmg/rebuild/spirit/modipsl/modeles/IOIPSL/bin/ 124 121 fi 125 122 #==================================================== … … 137 134 #==================================================== 138 135 #- SUBMIT_DIR : submission dir 139 if [ X${ PBS_O_WORKDIR} != X ] ; then140 typeset -x SUBMIT_DIR=${SUBMIT_DIR:=${ PBS_O_WORKDIR}}136 if [ X${SLURM_SUMBIT_DIR} != X ] ; then 137 typeset -x SUBMIT_DIR=${SUBMIT_DIR:=${SLURM_SUBMIT_DIR}} 141 138 else 142 139 typeset -x SUBMIT_DIR=${SUBMIT_DIR:=${PWD}} … … 148 145 149 146 #==================================================== 150 #- RUN_DIR_PATH : Temporary working directory (=> TMP) 151 if [ X${PBS_JOBID} != X ] ; then 152 if [ $CENTER == IPSL-ciclad ] ; then 153 typeset -r RUN_DIR_PATH=${RUN_DIR_PATH:=/data/${LOGIN}/RUN_DIR/${PBS_JOBID}_${$}} 154 else 155 typeset -r RUN_DIR_PATH=${RUN_DIR_PATH:=/homedata/${LOGIN}/RUN_DIR/${PBS_JOBID}_${$}} 156 fi 147 #- RUN_DIR_PATH : Temporary working directory 148 149 if [ $CENTER == spirit ] ; then 150 RUN_DIR_PATH=${RUN_DIR_PATH:=/scratchu/${LOGIN}/RUN_DIR} 157 151 else 158 typeset -r RUN_DIR_PATH=${RUN_DIR_PATH:=/tmp/tmp$$}152 RUN_DIR_PATH=${RUN_DIR_PATH:=/scratchx/${LOGIN}/RUN_DIR} 159 153 fi 154 if [ X${SLURM_JOBID} != X ] ; then 155 typeset -r RUN_DIR_PATH=${RUN_DIR_PATH}/${SLURM_JOBID}_${$} 156 else 157 typeset -r RUN_DIR_PATH=${RUN_DIR_PATH}/${$} 158 fi 160 159 161 160 #==================================================== 162 161 #- OUTCOMMAND_PATH : tmp place to store command lines standard error and outputs 163 typeset -r OUTCOMMAND_PATH=/tmp 162 # Note that typeset -r can not be set at spirit because it sets the path as read-only 163 typeset OUTCOMMAND_PATH=/tmp 164 164 165 165 #==================================================== … … 172 172 173 173 #==================================================== 174 #- set PackDefault to false on ciclad174 #- set PackDefault to false on meso-ipsl 175 175 PackDefault=false 176 176 … … 204 204 #==================================================== 205 205 #- ARCHIVE (dedicated to large files) 206 if [ $CENTER == IPSL-ciclad] ; then207 # At ciclad206 if [ $CENTER == spirit ] ; then 207 # At spirit 208 208 ARCHIVE=${ARCHIVE:=/data/${LOGIN}} 209 209 else 210 # At climserv210 # At spiritx 211 211 ARCHIVE=${ARCHIVE:=/homedata/${LOGIN}} 212 212 fi … … 223 223 fi 224 224 225 # ON CICLADNO SPECIAL CASE WHEN X${config_UserChoices_SpaceName} = XTEST225 # ON MESO-IPSL NO SPECIAL CASE WHEN X${config_UserChoices_SpaceName} = XTEST 226 226 227 227 #==================================================== … … 392 392 fi 393 393 typeset options status 394 options="-o ${SUBMIT_DIR}/${Script_Output} "395 /usr/bin/ qsub${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1394 options="-o ${SUBMIT_DIR}/${Script_Output} -e ${SUBMIT_DIR}/${Script_Output}" 395 /usr/bin/time sbatch ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 396 396 status=$? 397 397 … … 401 401 IGCM_debug_Exit "IGCM_sys_Qsub" 402 402 else 403 JobID=$( gawk {'print $ 1'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ )403 JobID=$( gawk {'print $4'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) 404 404 fi 405 405 IGCM_debug_PopStack "IGCM_sys_Qsub" … … 417 417 fi 418 418 typeset options status 419 options="-o ${POST_DIR}/${Script_Post_Output}.out -v ${listVarEnv}"420 /usr/bin/qsub${options} ${libIGCM_POST}/$1.job > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1419 options="-o ${POST_DIR}/${Script_Post_Output}.out -e ${POST_DIR}/${Script_Post_Output}.out" 420 sbatch ${options} ${libIGCM_POST}/$1.job > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 421 421 status=$? 422 422 … … 426 426 IGCM_debug_Exit "IGCM_sys_QsubPost" 427 427 else 428 JobID=$( gawk {'print $ 1'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ )428 JobID=$( gawk {'print $4'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) 429 429 fi 430 430 IGCM_debug_PopStack "IGCM_sys_QsubPost" … … 792 792 else 793 793 IGCM_debug_Print 1 "IGCM_sys_active_variables : Default modules will be used" 794 if [ $CENTER == IPSL-ciclad] ; then795 # At ciclad796 EnvFile=/home/igcmg/ .atlas_env_ciclad_ksh794 if [ $CENTER == spirit ] ; then 795 # At spirit 796 EnvFile=/home/igcmg/MachineEnvironment/mesoipsl/atlas_env_mesoipsl 797 797 else 798 # At climserv use the same files stored at ciclad but adapt the path799 EnvFile=/ciclad-home/igcmg/ .atlas_env_ciclad_ksh798 # At spirit 799 EnvFile=/ciclad-home/igcmg/MachineEnvironment/mesoipsl/atlas_env_mesoipsl 800 800 fi 801 801 fi … … 804 804 . ${EnvFile} 805 805 806 IGCM_debug_Print 1 "IGCM_sys_active_variables : Now loaded modules for ciclad/climserv. " 806 807 IGCM_debug_Print 1 "IGCM_sys_active_variables : Now loaded modules for meso-ipsl . " 807 808 module list 809 808 810 809 811 # -------------------------------------------------------------------- … … 868 870 elif [ ${executionType} -eq 2 ] ; then 869 871 # MPMD + MPI + OMP 870 sed -e "s/::openMPthreads::/${openMPthreads}/" \ 871 -e "s/::JobNumProcTot::/${coreNumber}/" \ 872 ${file} > ${file}.tmp 873 872 (( nodeNumber = coreNumber / NB_CORE_PER_NODE )) 873 [ $(( ${coreNumber} % ${NB_CORE_PER_NODE} )) -ne 0 ] && (( nodeNumber = nodeNumber + 1 )) 874 sed -e "/::openMPthreads::/d" \ 875 -e "s/::JobNumProcTot::/${mpiTasks}/" \ 876 -e "/ntasks/i\#SBATCH --nodes=${nodeNumber}"\ 877 -e "/ntasks/i\#SBATCH --exclusive" \ 878 ${file} > ${file}.tmp 874 879 elif [ ${executionType} -eq 3 ] ; then 875 880 # SPMD + MPI/OMP … … 929 934 echo "IGCM_sys_build_execution_scripts " $@ 930 935 fi 931 936 IGCM_debug_Print 1 "executionType= ${executionType}" 932 937 EXECUTION=${HOST_MPIRUN_COMMAND} 933 938 934 # MPMD mode935 939 if ( ${OK_PARA_MPMD} ) ; then 936 940 937 # Only MPI (MPMD) 941 # MPMD mode 942 # 1 MPI only : executionType=1 943 # 2 MPI/OpenMP : executionType=2 944 945 if [ -f run_file ] ; then 946 IGCM_sys_Rm -f run_file 947 fi 948 touch run_file 949 950 # case 1 : Only MPI (MPMD) 938 951 if ( ! ${OK_PARA_OMP} ) ; then 939 940 if [ -f run_file ] ; then941 IGCM_sys_Rm -f run_file942 fi943 touch run_file944 945 952 # Build run_file 946 953 current_core=0 947 954 # First loop on the components for the coupler ie oasis (only if oasis3) 948 955 # the coupler ie oasis3 must be the first one … … 952 959 eval ExeNameOut=\${config_Executable_${comp}[1]} 953 960 954 # for CPL component only 955 if [ "X${comp}" = "XCPL" ] && [ "X${ExeNameOut}" != X\"\" ] ; then 956 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 957 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 958 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut} " >> run_file 959 fi 960 done 961 962 # Then second loop on the components 963 for comp in ${config_ListOfComponents[*]} ; do 964 965 eval ExeNameIn=\${config_Executable_${comp}[0]} 966 eval ExeNameOut=\${config_Executable_${comp}[1]} 967 968 # Only if we really have an executable for the component and not the coupler ie oasis: 969 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 970 961 # Only if the component has an executable 962 if ( [ "X${ExeNameOut}" != X\"\" ] ) ; then 963 964 # eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 965 # (( end_core = ${current_core} + ${comp_proc_mpi_loc} - 1 )) 966 # echo "${current_core}-${end_core} ./${ExeNameOut}" >> run_file 967 # (( current_core = ${end_core} + 1 )) 971 968 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 972 969 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 973 970 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut}" >> run_file 974 fi 971 972 fi 975 973 done 976 974 … … 982 980 cat run_file 983 981 fi 984 982 983 else 984 985 # 2 MPI/OpenMP : executionType=2 986 985 987 # MPI-OpenMP (MPMD) 986 else 987 988 # Hosts treatment 989 ${HOST_MPIRUN_COMMAND} hostname | sort | uniq > hosts.tmp 990 991 i=0 992 rm -f hosts 993 IGCM_debug_Print 1 "sys Ciclad, Hosts available :" 994 for nodes in `cat hosts.tmp` ; do 995 host[$i]=$nodes 996 echo "${host[$i]} slots=1 max_slots=1" >> hosts 997 IGCM_debug_Print 1 ${host[$i]} 998 i=$((i+1)) 999 done 1000 rm -f hosts.tmp 1001 1002 listnodes=${host[*]} 1003 1004 EXECUTION="${HOST_MPIRUN_COMMAND} -hostfile hosts" 1005 1006 # Initialisation 1007 rank=0 1008 current_core=0 1009 core_per_node=8 1010 init_exec=n 1011 1012 # Loop on the components 988 # export SLURM_HOSTFILE=./hostlist 989 # srun --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file 990 # example of hostlist file : 991 # r3i3n33 992 # r3i3n33 993 # ... 994 # example of run_file : 995 # 0-70 ./prog_lmdz.x.sh %o %t 996 # 71-430 ./prog_opa.xx.sh %o %t 997 # 431-431 ./prog_xios.x.sh %o %t 998 # examples of prog_file : 999 # prog_lmdz.x.sh : 1000 # (( init = 0 + $1 )) 1001 # (( index = init * 10 )) 1002 # (( slot = index % 40 )) 1003 # taskset -c $slot-$((slot + 10 - 1)) ./script_lmdz.x.ksh 1004 # that will become 1005 # taskset -c 0-9 ./script_lmdz.x.ksh 1006 # ... 1007 # with script_lmdz.x.ksh 1008 # export OMP_STACKSIZE=3g 1009 # export OMP_PLACES=cores 1010 # export OMP_NUM_THREADS=10 1011 # ./lmdz.x > out_lmdz.x.out.${SLURM_PROCID} 2>out_lmdz.x.err.${SLURM_PROCID} 1012 1013 # Hosts treatment 1014 _bkIFS=$IFS; 1015 IFS=$'\n'; set -f 1016 listnodes=($(< <( scontrol show hostnames $SLURM_JOB_NODELIST ))) 1017 IFS=$_bkIFS; set +f 1018 rm -f hostlist 1019 1020 # Loop on the components to build run_file and script_exec files 1021 rank=0 1022 current_core=0 1023 current_core_mpi=0 1024 1013 1025 for comp in ${config_ListOfComponents[*]} ; do 1014 1026 … … 1028 1040 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1029 1041 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1042 eval comp_proc_nod_loc=\${${comp}_PROC_NOD} 1043 1044 1045 # Build script files 1030 1046 1031 1047 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1032 1048 echo "" >> script_${ExeNameOut}.ksh 1033 1049 if [ ${comp_proc_omp_loc} -gt 1 ] ; then 1034 1035 # Check if the number of threads is correct 1036 case ${comp_proc_omp_loc} in 1037 2|4|8) 1038 IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" 1039 ;; 1040 *) 1041 IGCM_debug_Exit "ERROR with OMP parameters !" 1042 IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" 1043 IGCM_debug_Print 2 "Only 2,4,8 as number of OMP threads are possible " 1044 IGCM_debug_Verif_Exit 1045 ;; 1046 esac 1047 #echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1048 #echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh 1049 #echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh 1050 echo "export OMP_STACKSIZE=200M" >> script_${ExeNameOut}.ksh 1051 echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1050 # Check if the number of threads is correct 1051 case ${comp_proc_omp_loc} in 1052 2|4|5|10|20) 1053 IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" 1054 ;; 1055 *) 1056 IGCM_debug_Exit "ERROR with OMP parameters !" 1057 IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" 1058 IGCM_debug_Print 2 "Only 2,4,5,10,20 as number of OMP threads are possible " 1059 IGCM_debug_Verif_Exit 1060 ;; 1061 esac 1062 echo "export OMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1063 echo "export OMP_PLACES=cores" >> script_${ExeNameOut}.ksh 1064 echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1065 1052 1066 fi 1053 1067 1054 #echo "./${ExeNameOut}" >> script_${ExeNameOut}.ksh 1055 echo "(( MYMPIRANK = OMPI_COMM_WORLD_RANK )) " >> script_${ExeNameOut}.ksh 1056 echo "MYMPIRANK=\$(printf '%3.3d\n' \${MYMPIRANK})" >> script_${ExeNameOut}.ksh 1057 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${MYMPIRANK} 2>out_${ExeNameOut}.err.\${MYMPIRANK}" >> script_${ExeNameOut}.ksh 1068 # to have out/err per process on different files 1069 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${SLURM_PROCID} 2>out_${ExeNameOut}.err.\${SLURM_PROCID}" >> script_${ExeNameOut}.ksh 1070 1058 1071 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1059 1072 1060 if [ ${init_exec} = y ] ; then 1061 EXECUTION="${EXECUTION} : -np ${comp_proc_mpi_loc} ./script_${ExeNameOut}.ksh" 1073 # Build run_file 1074 # Only if the component has an executable 1075 if ( [ "X${ExeNameOut}" != X\"\" ] ) ; then 1076 1077 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1078 (( end_core = ${current_core_mpi} + ${comp_proc_mpi_loc} - 1 )) 1079 echo "${current_core_mpi}-${end_core} ./prog_${ExeNameOut}.sh %o %t" >> run_file 1080 (( current_core_mpi = ${end_core} + 1 )) 1081 fi 1082 1083 if [ ${comp_proc_nod_loc} -gt 1 ] ; then 1084 (( offset_comp_proc_loc = NB_CORE_PER_NODE / (comp_proc_mpi_loc / comp_proc_nod_loc) )) 1062 1085 else 1063 EXECUTION="${EXECUTION} -np ${comp_proc_mpi_loc} ./script_${ExeNameOut}.ksh" 1064 init_exec=y 1086 (( offset_comp_proc_loc = comp_proc_omp_loc )) 1065 1087 fi 1066 1088 1067 # Build rankfile : method used to assign cores and nodes for the MPI process 1068 # Ex : 1069 #rank 0=curie5296 slot=0,1,2,3 1070 #rank 1=curie5296 slot=4,5,6,7 1071 # Example of final command : 1072 # mpirun -hostfile hosts -rankfile rankfile -np 27 ./script_lmdz.x.ksh : -np 5 ./script_opa.xx.ksh 1073 # with script_lmdz.x.ksh : 1074 # #!/bin/ksh 1075 #export OMP_STACKSIZE=200M 1076 #export OMP_NUM_THREADS=4 1077 #./lmdz.x 1089 # Build configuration file 1090 1091 echo "#!/bin/sh" > prog_${ExeNameOut}.sh 1092 echo "(( init = $current_core + \$1 ))" >> prog_${ExeNameOut}.sh 1093 echo "(( index = init * $comp_proc_omp_loc ))" >> prog_${ExeNameOut}.sh 1094 echo "(( slot = index % 40 ))" >> prog_${ExeNameOut}.sh 1095 echo "echo ${ExeNameOut} taskset -c \$slot"-"\$((slot + $comp_proc_omp_loc - 1))" >> prog_${ExeNameOut}.sh 1096 echo "taskset -c \$slot"-"\$((slot + $comp_proc_omp_loc - 1)) ./script_${ExeNameOut}.ksh" >> prog_${ExeNameOut}.sh 1097 1098 IGCM_sys_Chmod u+x prog_${ExeNameOut}.sh 1099 1100 # Build hostlist file 1078 1101 1079 1102 for nb_proc_mpi in `seq 0 $(($comp_proc_mpi_loc-1))`; do 1080 (( index_host = current_core / core_per_node )) 1081 host_value=${host[${index_host}]} 1082 (( slot = current_core % core_per_node )) 1083 virg="," 1084 string_final="" 1085 for index in `seq $slot $(($slot+$comp_proc_omp_loc-1))`; do 1086 string=$index$virg 1087 string_final=$string_final$string 1088 done 1089 string_final=$( echo $string_final | sed "s/.$//" ) 1090 echo "rank $rank=$host_value slot=$string_final" >> rankfile 1091 (( rank = rank + 1 )) 1092 (( current_core = current_core + comp_proc_omp_loc )) 1103 (( index_host = current_core / NB_CORE_PER_NODE )) 1104 host_value=${listnodes[${index_host}]} 1105 echo "$host_value" >> hostlist 1106 (( current_core = current_core + offset_comp_proc_loc )) 1093 1107 done 1094 1108 fi 1095 1109 done 1096 fi 1097 1098 # Only one executable (SPMD mode). 1110 1111 ## variable added to stop after 60s instead of 600s by default. 1112 ## This is used when no error comes from executables and when something stopped an executable without notice. 1113 export SLURM_WAIT=60 1114 1115 EXECUTION="${HOST_MPIRUN_COMMAND} --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file" 1116 1117 IGCM_sys_Chmod u+x run_file 1118 if ( $DEBUG_sys ) ; then 1119 echo "run_file contains : " 1120 cat run_file 1121 fi 1122 1123 fi # if ${OK_PARA_MPMD} 1124 1099 1125 else 1126 # Only one executable (SPMD mode): executionType=3, 4, 5 and 6 1100 1127 1101 1128 for comp in ${config_ListOfComponents[*]} ; do … … 1105 1132 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${ExeNameOut}" != "Xinca.dat" ] ) ; then 1106 1133 1134 # Build script files 1135 1107 1136 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 1108 1137 echo "" >> script_${ExeNameOut}.ksh 1138 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1139 1109 1140 if ( ${OK_PARA_OMP} ) ; then 1110 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1111 echo "export OMP_STACKSIZE=200M" >> script_${ExeNameOut}.ksh 1112 echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1141 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1142 # Check if the number of threads is correct 1143 case ${comp_proc_omp_loc} in 1144 2|4|5|10|20) 1145 IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" 1146 ;; 1147 *) 1148 IGCM_debug_Exit "ERROR with OMP parameters !" 1149 IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" 1150 IGCM_debug_Print 2 "Only 2,4,5,10,20 as number of OMP threads are possible " 1151 IGCM_debug_Verif_Exit 1152 ;; 1153 esac 1154 echo "" >> script_${ExeNameOut}.ksh 1155 echo "export OMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 1156 echo "export OMP_PLACES=cores" >> script_${ExeNameOut}.ksh 1157 echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1113 1158 fi 1114 if ( ${OK_PARA_MPI} ) ; then 1115 # Default : mpirun used if nb_proc gt 1 1116 # pour sortie out/err par process 1117 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${OMPI_COMM_WORLD_RANK} 2>out_${ExeNameOut}.err.\${OMPI_COMM_WORLD_RANK}" >> script_${ExeNameOut}.ksh 1118 #echo "./${ExeNameOut}" >> script_${ExeNameOut}.ksh 1119 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1120 EXECUTION="${HOST_MPIRUN_COMMAND} ./script_${ExeNameOut}.ksh" 1121 else 1122 # Default : mpirun is NOT used if nb_proc eq 1 1123 # pour sortie out/err par process 1124 echo "./${ExeNameOut} > out_${ExeNameOut}.out 2>out_${ExeNameOut}.err" >> script_${ExeNameOut}.ksh 1125 #echo "./${ExeNameOut}" >> script_${ExeNameOut}.ksh 1126 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 1127 EXECUTION="time ./script_${ExeNameOut}.ksh" 1128 fi 1129 1130 IGCM_debug_Print 1 "sys Ciclad : script_${ExeNameOut}.ksh contains" 1159 1160 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1161 1162 # To have out/err per process on different files 1163 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${SLURM_PROCID} 2>out_${ExeNameOut}.err.\${SLURM_PROCID}" >> script_${ExeNameOut}.ksh 1164 EXECUTION="${HOST_MPIRUN_COMMAND} ./script_${ExeNameOut}.ksh" 1165 1166 IGCM_debug_Print 1 "sys Jean-Zay : script_${ExeNameOut}.ksh contains" 1131 1167 cat script_${ExeNameOut}.ksh 1132 1168 … … 1135 1171 done 1136 1172 1137 fi 1138 1139 IGCM_debug_Print 1 "sys Ciclad : execution command is"1173 fi # ${OK_PARA_MPMD} 1174 1175 IGCM_debug_Print 1 "sys meso-ipsl : execution command is " 1140 1176 IGCM_debug_Print 1 "$EXECUTION" 1141 1177
Note: See TracChangeset
for help on using the changeset viewer.