Changeset 5464 for branches/2015
- Timestamp:
- 2015-06-22T21:04:36+02:00 (9 years ago)
- Location:
- branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/config/arch_template.cfg
r5355 r5464 4 4 5 5 # Available softwares: 6 CDO='' # CDO (Climate Data Operators) (root directory name in $LOADEDMODULES or $PATH) 7 COMPILER='' # ifort/gfortran/pgfortran/... ( "" " " "" "" "" "" " " ) 8 MPI='' # MPI libraries ( "" " " "" "" "" "" " " ) 9 NETCDF='' # NetCDF " " ( "" " " "" "" "" "" " " ) 6 CDO='' # CDO (Climate Data Operators) 7 # (root directory name in $LOADEDMODULES or $PATH) 8 COMPILER='' # ifort/gfortran/pgfortran/... 9 # (to get compiler release with command `$COMPILER --version`) 10 MPI='' # MPI libraries 11 # ( "" " " "" "" "" "" " " ) 12 NETCDF='' # NetCDF " " 13 # ( "" " " "" "" "" "" " " ) 10 14 11 # Job commands :15 # Job commands (works with job ID): 12 16 JOB_SUBM='' # Submit a job && get his ID in return 13 JOB_LIST='' # List submitted jobs (to know if the job is in progress with his ID)14 JOB_VIEW='' # Get job informations (to increment a log during computing from job ID)15 JOB_DELE='' # Stop a job (not used currently)17 JOB_LIST='' # List all submitted jobs to search for job ID with `grep` 18 JOB_VIEW='' # Get job informations (to fill in a log during computing) 19 JOB_DELE='' # Kill a job (in case of crash or time limit reaching) 16 20 17 21 # Compiling & computing settings 18 [ -z "$NPROC" ] && NPROC=1 # Number of processing units 19 [ -z "${TIME_LIMI}" ] && TIME_LIMI='' # Timeout in s (pending & running) 22 [ -z "$NPROC" ] && NPROC=1 # Processing units for compilation 23 [ -z "${TIME_LIMI}" ] && TIME_LIMI='' # Utmost time in seconds to wait for finished job 24 # (pending & running) 20 25 21 26 # Architecture environment (be careful with compliance with XIOS) -
branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting.env
r5453 r5464 4 4 # Time-stamp 5 5 date_test=$( date ) 6 export DATE _STR=$( LC_ALL=C date -d"${date_test}" +%Y%m%dT%H%M )6 export DATESTR=$( date -ud "${date_test}" +%Y%m%d%Z%H%M ) 7 7 8 8 # Sourcing user configuration … … 14 14 export DIR_XIOS #DIR_OASI 15 15 export NEMO_FORC FORC_TARF 16 export EMAIL 16 17 17 18 # Only interest for essential NEMO directories -
branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting.sh
r5459 r5464 6 6 NEMO_TRUS=$( pwd $( dirname $0 ) ) 7 7 DEBUG=0; PUBLISH=0 8 STATUS='FAILED'; STEP='' # Only at end status will change to OK8 STATUS='FAILED'; STEP='' # Only at end status will change to OK 9 9 10 10 # Get options for replacing some initials settings … … 13 13 case $1 in 14 14 -b|--branch ) NEMO_BRAN=$2 ; shift 2;; -d|--debug ) set -vx; DEBUG=1 ; shift ;; 15 -e|--email ) MAIL=$2; shift 2;; -j|--job ) NPROC=$2 ; shift 2;;15 -e|--email ) EMAIL=$2 ; shift 2;; -j|--job ) NPROC=$2 ; shift 2;; 16 16 -h|--help ) cat trusting_help.txt; exit 1;; -m|--machine) CFG_ARCH=$2 ; shift 2;; 17 17 -n|--newconf) TEST_CONF=$2 ; shift 2;; -r|--refconf) REFE_CONF=$2 ; shift 2;; … … 44 44 mkdir -p ${TEST_DIR} ${REFE_DIR} 45 45 echo ${TEST_DIR} && cd ${TEST_DIR} 46 date str1=$( LC_ALL=C date -d"${DATE_STR}" +%Y-%m-%dT%H:%M)47 printf "Date\n$ datestr1\n" > mesg_01_date_${CFG_USER}_${CFG_ARCH}.txt46 date_str=$( date -ud ${DATESTR} +"%F %R %Z" ) 47 printf "Date\n${date_str}\n" > mesg_01_date_${CFG_USER}_${CFG_ARCH}.txt 48 48 49 49 # SVN action on XIOS & NEMO essentials directories … … 57 57 . ${ARCH_ENV} > /dev/null 58 58 get_soft_rel 59 cat arch_env.log59 cat model.log 60 60 61 61 # XIOS compiling … … 79 79 cd ${TEST_DIR} 80 80 find ${NEMO_CONF}/${TEST_CONF}/EXP00 -regex '.*\(cfg\|opa\|ref\|xml\)' -exec cp {} . \; 81 82 if [ ! -z "${FORC_TAR}" ]; then 83 cmd_iol="tar -tvf ${NEMO_FORC}/${FORC_TAR}"; cmd_iof="tar -vxf ${NEMO_FORC}/${FORC_TAR}" 84 else 85 cmd_iol="ls ${NEMO_FORC}/*" ; cmd_iof="\cp ${NEMO_FORC}/* ." 86 fi 87 88 ${cmd_iol} > inputs_list.txt && ${cmd_iof} > /dev/null 81 get_inputs 89 82 [ $? -ne 0 ] && get_out $STATUS $STEP 90 91 83 [ $( find . -name '*.gz' -print -quit ) ] && gunzip *.gz 92 84 93 85 # Check input files in all cases 94 86 #--------------------------------------------------- 95 print_step 'Difference between input files'87 print_step 'Difference with standard input files list' 96 88 diff_inputs 97 89 … … 101 93 JOB_ID=$( ${JOB_SUBM} | awk '{print $NF}' ) 102 94 [ $? -ne 0 ] && get_out $STATUS $STEP 103 104 print_step 'Pending/Running job' 105 time_elapsed=0; time_increment=30 106 sleep ${time_increment} 107 while [[ $( ${JOB_LIST} | grep ${JOB_ID} ) && ${time_elapsed} -lt ${TIME_LIMI} ]]; do 108 printf "\n####################################################" >> computation.log 109 ${JOB_VIEW} ${JOB_ID} >> computation.log 110 sleep ${time_increment} 111 let time_elapsed+=${time_increment} 112 done 113 sleep ${time_increment} 114 115 if [ ${time_elapsed} -eq ${TIME_LIMI} ]; then 116 print_step 'Killing job' 117 ${JOB_DELE} ${JOB_ID} &> /dev/null 118 get_out $STATUS 'Job timeout' 119 fi 95 print_step 'Holding-Running job' 96 job_pending 120 97 print_step 'Job finished' 121 98 … … 128 105 fi 129 106 130 STATUS='OK'131 107 # Inspect output text files 132 108 #--------------------------------------------------- 133 109 print_step 'Test ASCII output files diff' 134 for file in ocean.output solver.stat tracer.stat; do 135 diff $file ${REFE_DIR}/$file 136 [ $? -ne 0 ] && get_out 'FAILED' 'ASCII output file diff' 137 done 110 diff_textfiles 138 111 139 112 # Inspect output NetCDF files … … 141 114 print_step 'Test last restart NetCDF files diff' 142 115 diff_restart 143 [ $? -ne 0 ] && get_out 'FAILED' "$CDO diff failed"144 116 145 117 # Get computation duration … … 148 120 get_cpu_time 149 121 150 # Get comments (ocean.output & diff arch_env.log)122 # Get comments (ocean.output & diff model.log) 151 123 #--------------------------------------------------- 152 124 comments 'W A R N I N G' -
branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting_func.sh
r5459 r5464 16 16 fi 17 17 18 if [ $( diff -q arch_env.log ${REFE_DIR}/arch_env.log ) ]; then19 line=$( diff arch_env.log ${REFE_DIR}/arch_env.log | tr -d '\n' )18 if [ $( diff -q model.log ${REFE_DIR}/model.log ) ]; then 19 line=$( diff model.log ${REFE_DIR}/model.log | tr -d '\n' ) 20 20 fi 21 21 … … 29 29 } 30 30 31 get_inputs() { 32 if [ ! -z "${FORC_TAR}" ]; then 33 cmd_iol="tar -tvf ${NEMO_FORC}/${FORC_TAR}"; cmd_iof="tar -vxf ${NEMO_FORC}/${FORC_TAR}" 34 else 35 cmd_iol="ls ${NEMO_FORC}/*" ; cmd_iof="\cp ${NEMO_FORC}/* ." 36 fi 37 38 ${cmd_iol} > inputs_list.txt 39 ${cmd_iof} > /dev/null 40 } 41 31 42 diff_inputs() { 32 43 mesg='Same' … … 56 67 fi 57 68 58 cmp ${TEST_DIR}/${base_name}_$comp.nc ${REFE_DIR}/${base_name}_$comp.nc 69 # UNIX `cmp` not suitable (filename & timestamp in .nc file) 70 $CDO diffn ${TEST_DIR}/${base_name}_$comp.nc ${REFE_DIR}/${base_name}_$comp.nc 2> /dev/null 59 71 if [ $? -ne 0 ]; then 60 72 STATUS='FAILED'; STEP='Restart files different' 61 printf "$CDO diffn\n" 62 $CDO diffn ${TEST_DIR}/${base_name}_$comp.nc ${REFE_DIR}/${base_name}_$comp.nc 2> /dev/null 73 #printf "$CDO diffn\n" 63 74 else 64 75 printf "identical\n" 65 76 fi 66 77 67 \rm ${TEST_DIR}/${base_name}_${comp}_[0-9]*.nc78 rm -f ${TEST_DIR}/${base_name}_${comp}_[0-9]*.nc &> /dev/null 68 79 done 69 80 … … 73 84 fi 74 85 75 [ $STATUS == 'FAILED'] && get_out $STATUS $STEP 86 } 87 88 diff_textfiles() { 89 for file in ocean.output $( ls *.stat ); do 90 diff $file ${REFE_DIR}/$file 91 [ $? -ne 0 ] && STATUS='FAILED' && STEP='ASCII output file diff' 92 done 76 93 } 77 94 … … 81 98 [ -z "${arch_rel}" ] && arch_rel=$( echo $PATH | sed "s#.*$rel/\([^/]*\).*#\1#" ) 82 99 [ $rel == $COMPILER ] && arch_rel=$( $rel --version | grep -m1 -o ' [0-9.]* ' ) 83 echo $rel ${arch_rel} >> arch_env.log84 done 85 86 sed -n 3p arch_env.log | sed 'i\Compiler' > mesg_05_compiler_${CFG_USER}_${CFG_ARCH}.txt87 sed -n 4p arch_env.log | sed 'i\MPI libs' > mesg_06_mpi_${CFG_USER}_${CFG_ARCH}.txt88 sed -n 5p arch_env.log | sed 'i\NetCDF libs' > mesg_07_netcdf_${CFG_USER}_${CFG_ARCH}.txt100 echo $rel ${arch_rel} >> model.log 101 done 102 103 sed -n 4p model.log | sed 'i\Compiler' > mesg_05_compiler_${CFG_USER}_${CFG_ARCH}.txt 104 sed -n 5p model.log | sed 'i\MPI libs' > mesg_06_mpi_${CFG_USER}_${CFG_ARCH}.txt 105 sed -n 6p model.log | sed 'i\NetCDF libs' > mesg_07_netcdf_${CFG_USER}_${CFG_ARCH}.txt 89 106 } 90 107 … … 102 119 103 120 rev=0 104 for dir in ${DIR_XIOS} ${NEMO_ARCH} ${NEMO_CONF} ${NEMO_ENGI} \ 105 ${NEMO_EIOI} ${NEMO_EFCM} ${NEMO_TCMP} ${NEMO_TRBD} ; do 121 for dir in ${NEMO_ARCH} ${NEMO_CONF} ${NEMO_ENGI} \ 122 ${NEMO_EIOI} ${NEMO_EFCM} \ 123 ${NEMO_TCMP} ${NEMO_TRBD} \ 124 ${DIR_XIOS} ; do 106 125 107 126 # For time being, just get revision number from XIOS 108 127 if [ $dir == ${DIR_XIOS} ]; then 109 echo 'XIOS '$( svn info $dir | awk '(NR == 5) {print $NF}' ) >> arch_env.log128 echo 'XIOS '$( svn info $dir | awk '(NR == 5) {print $NF}' ) >> model.log 110 129 continue 111 130 fi … … 116 135 done 117 136 118 printf "NEMOGCM rev.\n$rev\n" > mesg_02_nemogcm_${CFG_USER}_${CFG_ARCH}.txt 137 echo "NEMOGCM $rev" >> model.log 138 printf "NEMOGCM rev.\n$rev\n" > mesg_02_nemogcm_${CFG_USER}_${CFG_ARCH}.txt 119 139 } 120 140 121 141 mesg_make() { 122 paste -d ';' mesg_*_${CFG_USER}_${CFG_ARCH}.txt > trusting_${DATE _STR}_${CFG_USER}_${CFG_ARCH}.txt123 cat trusting_${DATE _STR}_${CFG_USER}_${CFG_ARCH}.txt142 paste -d ';' mesg_*_${CFG_USER}_${CFG_ARCH}.txt > trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt 143 cat trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt 124 144 } 125 145 … … 130 150 if [ -f ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt ]; then cmd='sed -n 2p'; else cmd='cat'; fi 131 151 132 $cmd trusting_${DATE _STR}_${CFG_USER}_${CFG_ARCH}.txt >> ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt152 $cmd trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt >> ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt 133 153 134 154 # Send mail only when FAILED 135 if [[ ! -z $ MAIL && $STATUS == 'FAILED' ]]; then155 if [[ ! -z $EMAIL && $STATUS == 'FAILED' ]]; then 136 156 cat << END_MAIL > trusting.mail 137 157 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX … … 139 159 140 160 The trusting sequence for ${CONF_REF} has failed at step: 141 `tail -n 1 mesg_03_step_${CFG_USER}_${CFG_ARCH}.txt`161 `tail -n 1 ${TEST_DIR}/mesg_04_step_${CFG_USER}_${CFG_ARCH}.txt` 142 162 Directory : ${TEST_DIR} 143 163 … … 145 165 146 166 END_MAIL 147 cat mesg_*_${CFG_USER}_${CFG_ARCH}.txt >> trusting.mail 148 mail -s "[trusting ${CONF_REF}] FAILED" $MAIL < trusting.mail 167 if [ -e ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt ]; then 168 cat ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt >> trusting.mail 169 fi 170 mail -s "[trusting ${CONF_REF}] $STATUS $STEP" $EMAIL < trusting.mail 149 171 fi 150 172 … … 152 174 } 153 175 154 print_step() { STEP=$1; export STEP && printf "Step.....\n$TEP\n"; } 176 print_step() { 177 [ ! -z "$1" ] && STEP=$1 178 export STEP && printf "Step.....\n$STEP\n" 179 } 155 180 156 181 get_out() { … … 162 187 touch mesg_08_inputfiles_${CFG_USER}_${CFG_ARCH}.txt mesg_09_realcputime_${CFG_USER}_${CFG_ARCH}.txt \ 163 188 mesg_10_comments_${CFG_USER}_${CFG_ARCH}.txt 164 #tar -czf ${REFE_DIR}/trusting_${DATE _STR}_${CFG_USER}_${CFG_ARCH}.tar.gz *189 #tar -czf ${REFE_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.tar.gz * 165 190 fi 166 191 … … 170 195 exit 1 171 196 } 197 198 job_pending() { 199 time_elapsed=0; time_increment=30 200 sleep ${time_increment} 201 202 while [[ $( ${JOB_LIST} | grep ${JOB_ID} ) && ${time_elapsed} -lt ${TIME_LIMI} ]]; do 203 printf "\n####################################################" >> computation.log 204 ${JOB_VIEW} ${JOB_ID} >> computation.log 205 sleep ${time_increment} 206 let time_elapsed+=${time_increment} 207 done 208 209 sleep ${time_increment} 210 211 if [ ${time_elapsed} -eq ${TIME_LIMI} ]; then 212 STEP='Exceeded time limit' 213 214 if [ $( ${JOB_LIST} | grep ${JOB_ID} ) ]; then 215 STEP='Job not finished on time: '$(( ${TIME_LIMI}/3600 ))'h' 216 fi 217 218 print_step 219 ${JOB_DELE} ${JOB_ID} &> /dev/null 220 get_out $STATUS $STEP 221 fi 222 }
Note: See TracChangeset
for help on using the changeset viewer.