[695] | 1 | #!/bin/ksh |
---|
[1154] | 2 | # $Id$ |
---|
[695] | 3 | ###################################################### |
---|
[1301] | 4 | # Original : C. Talandier for NEMO team |
---|
| 5 | # Contact : nemo_st@locean-ipsl.upmc.fr |
---|
[695] | 6 | # |
---|
[1574] | 7 | # INPUT ARGS: 9 |
---|
[695] | 8 | # - CONF : configuration name (ORCA2_LIM, GYRE ..) |
---|
| 9 | # - MTIME : timing/no timing (timing or notiming) |
---|
[807] | 10 | # - JOBTOL: jobs to launch: all, long, 1_short or gtime |
---|
[1301] | 11 | # - OWNEXP: name of the experience performed |
---|
[1574] | 12 | # - PRC : total number of processors |
---|
[1412] | 13 | # - REFTAG: reference tag name to be compare to |
---|
| 14 | # - USE_IOMPUT: true of false (default false) |
---|
| 15 | # - USE_IOSERVER: true of false (default false) |
---|
| 16 | # - NB_PROCS_IOSERVER: (default 0) |
---|
[695] | 17 | # |
---|
| 18 | # WORK: This script aims to (for each standard configuration) : |
---|
| 19 | # |
---|
| 20 | # I. Take into account target's specificities |
---|
| 21 | # Variables below must be filled for a new target |
---|
| 22 | # - W_XX = name of target |
---|
| 23 | # - LAUN = name of jobs launcher command |
---|
| 24 | # - LLJOBS = name of jobs listing command |
---|
| 25 | # - LSUB = name of running command |
---|
| 26 | # - LPERF = name of timing command for the whole run |
---|
| 27 | # - LJTIM = required time for a one year run (/proc) |
---|
| 28 | # - LJTIMJ = required time for a one year run (job) |
---|
| 29 | # - CMDGET = name of specific command to retrieve files |
---|
| 30 | # - SYMBOL = identifier associated to a platform which |
---|
| 31 | # is used to name all outputs files |
---|
| 32 | # Ex: For Brodie, this symbol could be "B_" so |
---|
| 33 | # file names looks like B_solver.stat ... |
---|
| 34 | # |
---|
| 35 | # II. Build jobs |
---|
| 36 | # - based on specific header depending on target, it must |
---|
| 37 | # be provided for a new target following samples |
---|
| 38 | # jhd_[target_name]_mon for mono-processors runs |
---|
| 39 | # jhd_[target_name]_mpi for multi-processors MPI runs |
---|
| 40 | # jhd_[target_name]_omp for multi-processors OPen-MP runs |
---|
| 41 | # |
---|
| 42 | # III. Launch jobs |
---|
| 43 | # to test both reproductibility and restartability |
---|
| 44 | # for each configuration NAME_CONF |
---|
| 45 | # & for each run type [mon, mpi, omp] |
---|
| 46 | # 3 runs will be launched: |
---|
| 47 | # - LONG |
---|
| 48 | # - 1_SHORT |
---|
| 49 | # - 2_SHORT |
---|
| 50 | # |
---|
| 51 | # IV. Launch cron job |
---|
| 52 | # Since no jobs (for a given configuration) are in |
---|
| 53 | # the batch queue, this cron will launch the assessment.ksh |
---|
| 54 | # script |
---|
| 55 | # |
---|
| 56 | ########################################################## |
---|
| 57 | #set -xv |
---|
| 58 | ########################################################## |
---|
| 59 | ##### Begin Users modifications |
---|
| 60 | ########################################################## |
---|
[1301] | 61 | # IODIR : directory where to get ORCA2_LIM_nemo_v3.1.tar |
---|
[695] | 62 | ########################################################## |
---|
[1356] | 63 | IODIR=/u/rech/eee/reee512/NEMO/ |
---|
| 64 | TARGET=sx8brodie |
---|
[695] | 65 | ########################################################## |
---|
| 66 | ##### End Users modifications |
---|
| 67 | ########################################################## |
---|
| 68 | |
---|
| 69 | CONF=$1 |
---|
| 70 | MTIME=$2 |
---|
[807] | 71 | JOBTOL=$3 |
---|
[1301] | 72 | OWNEXP=$4 |
---|
[1574] | 73 | PRC=$5 |
---|
| 74 | REFTAG=$6 |
---|
| 75 | USE_IOMPUT=${7:-false} |
---|
| 76 | USE_IOSERVER=${8:-false} |
---|
| 77 | NB_PROCS_IOSERVER=${9:-0} |
---|
[1412] | 78 | [ "${USE_IOMPUT}" = "false" ] && USE_IOSERVER=false |
---|
| 79 | [ "${USE_IOSERVER}" = "false" ] && NB_PROCS_IOSERVER=0 |
---|
[695] | 80 | RUN=$(basename `pwd`) |
---|
[1412] | 81 | [ "${RUN}" = "mon" ] && PRC=1 |
---|
| 82 | if [[ ( "${TARGET}" = "sx8brodie" ) || ( "${TARGET}" = "sx8mercure" ) ]] |
---|
| 83 | then |
---|
| 84 | if [ $(( ${PRC} + ${NB_PROCS_IOSERVER} )) -gt 8 ] |
---|
| 85 | then |
---|
| 86 | echo NVTK is not made to use more than 1 node on SX8 machines... |
---|
| 87 | echo please, reduce the number of cpu used by NEMO or/and the ioserver |
---|
| 88 | exit 1 |
---|
| 89 | fi |
---|
| 90 | fi |
---|
[695] | 91 | # --------------------------------------------------------------- |
---|
| 92 | # I. Take into account target's specificities |
---|
| 93 | # Select appropriate target lines in Job_${CONF}.ksh |
---|
| 94 | # --------------------------------------------------------------- |
---|
| 95 | if [ ${TARGET} = 'sx8brodie' ]; then |
---|
| 96 | W_XX='#-T- sx8brodie' |
---|
[744] | 97 | LAUN='qsub ' |
---|
[695] | 98 | LLJOBS=qstat |
---|
| 99 | LSUB="mpirun -np ${PRC}" |
---|
| 100 | LPERF= |
---|
| 101 | LJTIM='01:20:00' |
---|
| 102 | LJTIMJ='01:40:00' |
---|
| 103 | CMDGET=mfget |
---|
| 104 | SYMBOL=B_ |
---|
[1365] | 105 | elif [ ${TARGET} = 'sx8mercure' ]; then |
---|
| 106 | W_XX='#-T- sx8mercure' |
---|
| 107 | LAUN='qsub ' |
---|
| 108 | LLJOBS=qstat |
---|
| 109 | LSUB="mpirun -np ${PRC}" |
---|
| 110 | LPERF= |
---|
| 111 | LJTIM='01:20:00' |
---|
| 112 | LJTIMJ='01:40:00' |
---|
| 113 | CMDGET=cp |
---|
| 114 | SYMBOL=M_ |
---|
[1297] | 115 | elif [ ${TARGET} = 'aix6' ]; then |
---|
| 116 | W_XX='#-T- aix6' |
---|
[744] | 117 | LAUN='llsubmit ' |
---|
[695] | 118 | LLJOBS=Qstat |
---|
| 119 | LSUB= |
---|
| 120 | LPERF="poe hpmcount -o perfs_mpi.txt" |
---|
[1297] | 121 | LJTIM='01:10:00' |
---|
[695] | 122 | LJTIMJ= |
---|
| 123 | CMDGET=mfget |
---|
[1297] | 124 | SYMBOL=V_ |
---|
[695] | 125 | elif [ ${TARGET} = 'osxxlf' ]; then |
---|
| 126 | W_XX='#-T- osxxlf' |
---|
[744] | 127 | LAUN='./' |
---|
[695] | 128 | LLJOBS= |
---|
| 129 | LSUB="mpirun -np ${PRC}" |
---|
| 130 | LPERF= |
---|
| 131 | LJTIM= |
---|
| 132 | LJTIMJ= |
---|
| 133 | CMDGET=cp |
---|
| 134 | SYMBOL=D_ |
---|
| 135 | else |
---|
| 136 | echo ' #######################################' |
---|
| 137 | echo ' You must give a target name :' |
---|
[1402] | 138 | echo ' sx8brodie, aix6 or add one' |
---|
[695] | 139 | echo ' Have a look in lance_batch.ksh script' |
---|
| 140 | echo ' #######################################' |
---|
| 141 | stop |
---|
| 142 | fi |
---|
| 143 | |
---|
[1301] | 144 | sed -e "s%^$W_XX *%%" job_${CONF}.ksh | grep -v '^#-T- ' > job_${CONF}.$$ |
---|
[695] | 145 | |
---|
| 146 | # -------------------------------------------------------------- |
---|
| 147 | # II. Build jobs |
---|
| 148 | # Adapt header (batch jobs) and concatanate it with core job |
---|
| 149 | # -------------------------------------------------------------- |
---|
[1356] | 150 | if [ ${CONF} == 'ORCA2_LIM' ] ; then |
---|
[1453] | 151 | CONFS=ORCA2 |
---|
[1464] | 152 | elif [ ${CONF} == 'ORCA2_LIM3' ] ; then |
---|
| 153 | CONFS=OR2L3 |
---|
[1356] | 154 | elif [ ${CONF} == 'GYRE' ] ; then |
---|
[1453] | 155 | CONFS=GYREO |
---|
[1356] | 156 | elif [ ${CONF} == 'GYRE_LOBSTER' ] ; then |
---|
[1453] | 157 | CONFS=GYREL |
---|
[1356] | 158 | elif [ ${CONF} == 'ORCA2_LIM_PISCES' ] ; then |
---|
[1453] | 159 | CONFS=OR2LP |
---|
[1356] | 160 | elif [ ${CONF} == 'ORCA2_OFF_PISCES' ] ; then |
---|
[1453] | 161 | CONFS=OR2OP |
---|
[1356] | 162 | elif [ ${CONF} == 'ZAGRIF' ] ; then |
---|
[1453] | 163 | CONFS=ZAGRI |
---|
[1356] | 164 | else |
---|
[1453] | 165 | CONFS=${CONF} |
---|
[1356] | 166 | fi |
---|
| 167 | |
---|
[695] | 168 | case ${TARGET} in |
---|
[1453] | 169 | 'osxxlf' ) |
---|
| 170 | cp jhd_${TARGET}_${RUN} jhd_${TARGET}_${RUN}_tmp ;; |
---|
| 171 | 'aix6' ) |
---|
| 172 | CAR_JNAM=$( grep '\# @ job_name' jhd_${TARGET}_${RUN} ) |
---|
| 173 | CAR_JMPI=$( grep '\# @ total_tasks' jhd_${TARGET}_${RUN} ) |
---|
| 174 | sed -e "s%$CAR_JNAM%\# @ job_name = ${RUN}${CONFS}%" \ |
---|
| 175 | -e "s%$CAR_JMPI%\# @ total_tasks = $(( ${PRC} + ${NB_PROCS_IOSERVER} ))%" \ |
---|
| 176 | jhd_${TARGET}_${RUN} > jhd_${TARGET}_${RUN}_tmp ;; |
---|
| 177 | 'sx8brodie' ) |
---|
| 178 | CAR_JNAM=$( grep '\#PBS -N ' jhd_${TARGET}_${RUN} ) |
---|
| 179 | CAR_JNAMO=$( grep '\#PBS -o ' jhd_${TARGET}_${RUN} ) |
---|
| 180 | CAR_JNAME=$( grep '\#PBS -e ' jhd_${TARGET}_${RUN} ) |
---|
| 181 | if [ ${RUN} = 'mpi' -o ${RUN} = 'omp' ] ; then |
---|
| 182 | CAR_JMPI=$( grep '\#PBS -l cpunum_job' jhd_${TARGET}_${RUN} ) |
---|
| 183 | sed -e "s%$CAR_JNAM%\#PBS -N ${RUN}${CONFS}%" \ |
---|
| 184 | -e "s%$CAR_JNAMO%\#PBS -o ${RUN}${CONFS}_std%" \ |
---|
| 185 | -e "s%$CAR_JNAME%\#PBS -e ${RUN}${CONFS}_err%" \ |
---|
| 186 | -e "s%$CAR_JMPI%\#PBS -l cpunum_job=$(( ${PRC} + ${NB_PROCS_IOSERVER} ))%" \ |
---|
| 187 | jhd_${TARGET}_${RUN} > jhd_${TARGET}_${RUN}_tmp |
---|
| 188 | else |
---|
| 189 | sed -e "s%$CAR_JNAM%\#PBS -N ${RUN}${CONFS}%" \ |
---|
| 190 | -e "s%$CAR_JNAMO%\#PBS -o ${RUN}${CONFS}_std%" \ |
---|
| 191 | -e "s%$CAR_JNAME%\#PBS -e ${RUN}${CONFS}_err%" \ |
---|
| 192 | jhd_${TARGET}_${RUN} > jhd_${TARGET}_${RUN}_tmp |
---|
| 193 | fi ;; |
---|
| 194 | 'sx8mercure' ) |
---|
| 195 | CAR_JNAM=$( grep '\#PBS -N ' jhd_${TARGET}_${RUN} ) |
---|
| 196 | CAR_JNAMO=$( grep '\#PBS -o ' jhd_${TARGET}_${RUN} ) |
---|
| 197 | CAR_JNAME=$( grep '\#PBS -e ' jhd_${TARGET}_${RUN} ) |
---|
| 198 | if [ ${RUN} = 'mpi' -o ${RUN} = 'omp' ] ; then |
---|
| 199 | CAR_JMPI=$( grep '\#PBS -l cpunum_job' jhd_${TARGET}_${RUN} ) |
---|
| 200 | sed -e "s%$CAR_JNAM%\#PBS -N ${RUN}${CONFS}%" \ |
---|
| 201 | -e "s%$CAR_JNAMO%\#PBS -o ${RUN}${CONFS}_std%" \ |
---|
| 202 | -e "s%$CAR_JNAME%\#PBS -e ${RUN}${CONFS}_err%" \ |
---|
| 203 | -e "s%$CAR_JMPI%\#PBS -l cpunum_job=$(( ${PRC} + ${NB_PROCS_IOSERVER} ))%" \ |
---|
| 204 | jhd_${TARGET}_${RUN} > jhd_${TARGET}_${RUN}_tmp |
---|
| 205 | else |
---|
| 206 | sed -e "s%$CAR_JNAM%\#PBS -N ${RUN}${CONFS}%" \ |
---|
| 207 | -e "s%$CAR_JNAMO%\#PBS -o ${RUN}${CONFS}_std%" \ |
---|
| 208 | -e "s%$CAR_JNAME%\#PBS -e ${RUN}${CONFS}_err%" \ |
---|
| 209 | jhd_${TARGET}_${RUN} > jhd_${TARGET}_${RUN}_tmp |
---|
| 210 | fi ;; |
---|
[695] | 211 | esac |
---|
| 212 | |
---|
| 213 | cat jhd_${TARGET}_${RUN}_tmp job_${CONF}.$$ > job_tmp_${RUN}.ksh |
---|
| 214 | rm -f job_${CONF}.$$ jhd_${TARGET}_${RUN}_tmp |
---|
| 215 | |
---|
| 216 | # ----------------------------------------------------------------------------- |
---|
| 217 | # III. Launch jobs |
---|
| 218 | # The 3 following jobs are created for the run [type]=mon,mpi,omp : |
---|
| 219 | # - "job_[type]_long.ksh" ---> long run |
---|
| 220 | # - "job_[type]_1_short.ksh" ---> stream 1 run |
---|
| 221 | # - "job_[type]_2_short.ksh" ---> stream 2 run |
---|
| 222 | # - "job_[type]_gtime.ksh" ---> one year run |
---|
| 223 | # The first 2 as the last one are immediatly launched whereas "job_[type]_2_short.ksh" |
---|
| 224 | # is launched only when the "job_[type]_1_short.ksh" short run is finished |
---|
| 225 | # ----------------------------------------------------------------------------- |
---|
| 226 | jobnam[0]=long ; jobnam[1]=1_short ; jobnam[2]=2_short ; jobnam[3]=gtime |
---|
| 227 | dirjob[0]=LONG ; dirjob[1]=1_SHORT ; dirjob[2]=2_SHORT ; dirjob[3]=GTIME |
---|
| 228 | |
---|
[807] | 229 | case ${JOBTOL} in |
---|
[1453] | 230 | 'all' ) |
---|
| 231 | if [ ${MTIME} = 'timing' ] ; then |
---|
| 232 | ind=0 |
---|
| 233 | iloop=4 |
---|
| 234 | else |
---|
| 235 | ind=0 |
---|
| 236 | iloop=3 |
---|
| 237 | fi |
---|
| 238 | ;; |
---|
| 239 | 'long' ) |
---|
| 240 | ind=0 |
---|
| 241 | iloop=1 ;; |
---|
| 242 | 'short' ) |
---|
| 243 | ind=1 |
---|
| 244 | iloop=3 ;; |
---|
| 245 | 'gtime' ) |
---|
| 246 | ind=3 |
---|
| 247 | iloop=4 ;; |
---|
[807] | 248 | esac |
---|
[695] | 249 | |
---|
[1453] | 250 | chwd () { |
---|
| 251 | word=$( grep "^ *${1}" job_tmp_${RUN}.ksh | cut -f 1 -d " " ) |
---|
| 252 | if [ "${word}" != "" ] |
---|
[1452] | 253 | then |
---|
[1453] | 254 | sed -e "s@${word}@${1%=*}=\"${2}\"@" job_tmp_${RUN}.ksh > tmp$$ |
---|
| 255 | mv tmp$$ job_tmp_${RUN}.ksh |
---|
| 256 | fi |
---|
| 257 | } |
---|
[695] | 258 | |
---|
[1453] | 259 | chwd "RUN=xxx" "${RUN}" |
---|
| 260 | chwd "NB_PROCS=xxx" ${PRC} |
---|
| 261 | chwd "LAUNCH=xxx" "${LAUN}" |
---|
| 262 | chwd "LEXE=xxx" "${LSUB}" |
---|
| 263 | chwd "CPERF=xxx" "${LPERF}" |
---|
| 264 | chwd "D_INPUT=xxx" "${IODIR}" |
---|
| 265 | chwd "D_GET=xxx" "${CMDGET}" |
---|
| 266 | chwd "REF_TAG=xxx" "${REFTAG}" |
---|
| 267 | chwd "MYO_EXP=xxx" "${OWNEXP}" |
---|
| 268 | chwd "TSYMB=xxx" "${SYMBOL}" |
---|
| 269 | chwd "USE_IOMPUT=xxx" ${USE_IOMPUT} |
---|
| 270 | chwd "USE_IOSERVER=xxx" ${USE_IOSERVER} |
---|
| 271 | chwd "NB_PROCS_IOSERVER=xxx" ${NB_PROCS_IOSERVER} |
---|
[1412] | 272 | |
---|
[1453] | 273 | while [ $ind -lt ${iloop} ] |
---|
| 274 | do |
---|
| 275 | |
---|
| 276 | chwd "type=" "${jobnam[$ind]}" |
---|
| 277 | cp job_tmp_${RUN}.ksh job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh |
---|
| 278 | |
---|
| 279 | if [ ${jobnam[$ind]} = 'gtime' ] ; then |
---|
| 280 | |
---|
| 281 | case ${TARGET} in |
---|
| 282 | 'aix6' ) |
---|
| 283 | CAR_JTIM=$( grep '\# @ wall_clock_limit' job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh ) |
---|
| 284 | sed -e "s%$CAR_JTIM%\# @ wall_clock_limit = ${LJTIM}%" \ |
---|
| 285 | job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh > job_${RUN}_${jobnam[$ind]}.ksh_tmp ;; |
---|
| 286 | 'sx8brodie' ) |
---|
| 287 | CAR_JTIMP=$( grep '\#PBS -l cputim_prc' job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh ) |
---|
| 288 | CAR_JTIMJ=$( grep '\#PBS -l cputim_job' job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh ) |
---|
| 289 | sed -e "s%$CAR_JTIMP%\#PBS -l cputim_prc=${LJTIM}%" \ |
---|
| 290 | -e "s%$CAR_JTIMJ%\#PBS -l cputim_job=${LJTIMJ}%" \ |
---|
| 291 | job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh > job_${RUN}_${jobnam[$ind]}.ksh_tmp ;; |
---|
| 292 | 'sx8mercure' ) |
---|
| 293 | CAR_JTIMP=$( grep '\#PBS -l cputim_prc' job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh ) |
---|
| 294 | CAR_JTIMJ=$( grep '\#PBS -l cputim_job' job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh ) |
---|
| 295 | sed -e "s%$CAR_JTIMP%\#PBS -l cputim_prc=${LJTIM}%" \ |
---|
| 296 | -e "s%$CAR_JTIMJ%\#PBS -l cputim_job=${LJTIMJ}%" \ |
---|
| 297 | job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh > job_${RUN}_${jobnam[$ind]}.ksh_tmp ;; |
---|
| 298 | esac |
---|
| 299 | mv job_${RUN}_${jobnam[$ind]}.ksh_tmp job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh |
---|
| 300 | |
---|
| 301 | fi |
---|
| 302 | |
---|
| 303 | chmod 750 job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh |
---|
| 304 | |
---|
| 305 | if [ ! -d ${dirjob[$ind]}/$OWNEXP ] ; then |
---|
| 306 | mkdir ${dirjob[$ind]}/$OWNEXP |
---|
| 307 | fi |
---|
| 308 | mv job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh ${dirjob[$ind]}/${OWNEXP}/. |
---|
| 309 | |
---|
| 310 | if [ ${jobnam[$ind]} != '2_short' ] ; then |
---|
| 311 | cd ${dirjob[$ind]}/${OWNEXP} |
---|
| 312 | ${LAUN}job_${RUN}_${jobnam[$ind]}_${OWNEXP}.ksh & |
---|
| 313 | cd ../.. |
---|
| 314 | fi |
---|
| 315 | |
---|
| 316 | let ind=$ind+1 |
---|
[695] | 317 | done |
---|
| 318 | rm -f job_tmp_${RUN}.ksh |
---|
| 319 | |
---|
| 320 | # List all jobs submitted |
---|
| 321 | ${LLJOBS} |
---|
| 322 | |
---|
| 323 | # ----------------------------------------------------------------------------- |
---|
| 324 | # IV. Launch cron to be able to launch the assessment step |
---|
| 325 | # ----------------------------------------------------------------------------- |
---|
| 326 | if [ ${RUN} == 'mon' ] ; then |
---|
| 327 | cd .. |
---|
| 328 | |
---|
| 329 | CAR_LJOB_tmp=$( grep 'LJOBS=' cron_jobs.ksh ) |
---|
| 330 | CAR_LJOB=`echo $CAR_LJOB_tmp | cut -f 1 -d " " ` |
---|
| 331 | |
---|
[1301] | 332 | sed -e "s%$CAR_LJOB%LJOBS=${LLJOBS}%" cron_jobs.ksh > cron_jobs.$$ |
---|
[695] | 333 | mv cron_jobs.$$ cron_jobs.ksh && rm -rf cron_jobs.$$ |
---|
| 334 | chmod 744 cron_jobs.ksh |
---|
[1453] | 335 | |
---|
[695] | 336 | if [ ${TARGET} != 'osxxlf' ]; then |
---|
[1576] | 337 | ./cron_jobs.ksh ${OWNEXP} ${CONFS} & |
---|
[695] | 338 | fi |
---|
| 339 | fi |
---|