source: branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting_func.sh @ 5545

Last change on this file since 5545 was 5545, checked in by nicolasmartin, 5 years ago

dev_r5092_CNRS_SETTE Bugfixe

  • Property eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Rev URL
File size: 8.3 KB
RevLine 
[5268]1#!/bin/bash
2
[5383]3
[5438]4comments() {
[5509]5    state=$1; LAST_TIME_STEP=0
[5453]6
[5465]7    if [ "$state" == 'E R R O R' ]; then
[5509]8   [ -e time.step ] && LAST_TIME_STEP=$( cat time.step | tr -d [:space:] )
[5521]9   export ${LAST_TIME_STEP}
[5455]10    fi
[5453]11
[5485]12    [ -e ocean.output ] && line=$( grep -m1 -A5 "$state" ocean.output | tr -d '\n' )
[5521]13
[5485]14    if [[ -e ${REFE_DIR}/model.log && $( diff -q model.log ${REFE_DIR}/model.log ) ]]; then
[5524]15   line=$( diff model.log ${REFE_DIR}/model.log  | grep '>' )
[5455]16    fi
17
[5465]18    printf "Comments\n$line\n" | tee mesg_11_comments_${CFG_USER}_${CFG_ARCH}.txt
[5438]19}
20
[5509]21
[5402]22diff_inputs() {
[5438]23    mesg='Same'
[5453]24    for file in inputs_list.txt $( ls namelist_* ) $( ls *.xml ); do
[5521]25   diff -q $file ${REFE_DIR}/$file > /dev/null
[5438]26   [ $? -ne 0 ] && mesg='Different'
[5402]27    done
28
[5521]29    printf "Input files\n$mesg\n" | tee mesg_09_inputs_${CFG_USER}_${CFG_ARCH}.txt
[5402]30}
31
[5509]32diff_results() {
33    for file in ocean.output $( ls *.stat ); do
[5521]34   [ ! -e ${REFE_DIR}/$file ] && export ST='FAILED' && get_out 7
[5509]35   diff -q $file ${REFE_DIR}/$file
36   [ $? -ne 0 ] && export ST='FAILED'
37    done
38}
39
[5383]40diff_restart() {
[5521]41    [ ! -e  ${REFE_DIR}/time.step ] && export ST='FAILED' && get_out 9
[5485]42
[5509]43    export LAST_TIME_STEP=$( cat ${REFE_DIR}/time.step | tr -d [:space:] )
44    echo 'Last time step of standard run: '${LAST_TIME_STEP}
[5485]45
[5528]46    if [ $( find -regex ".*_0+${LAST_TIME_STEP}_restart.*\.nc" -print -quit ) ]; then
47   base_name=$( find -regex ".*_0+${LAST_TIME_STEP}_restart.*\.nc" -print -quit | awk -F/ '{print $NF}' \
48                | sed "s/^\(.*\)$LAST_TIME_STEP\_restart.*/\1$LAST_TIME_STEP\_/"                         )
[5383]49
[5432]50   for comp in restart restart_ice restart_trc; do
[5485]51       file=${base_name}${comp} && printf "$file.nc: "
[5424]52
[5483]53       nb_dom=$( find -name "${file}_[0-9]*.nc" | wc -l | awk '{print $1}' )
[5523]54       if   [ ${nb_dom} -gt 1 ]; then
[5528]55      ${NEMO_TRBD}/rebuild_nemo -t $NPROC $file ${nb_dom} > /dev/null
56      [ $? -eq 0 ] && rm -f ${base_name}${comp}_[0-9]*.nc > /dev/null
[5523]57       elif [ ${nb_dom} -eq 0 ]; then
58      export ST='FAILED' && get_out 10
[5424]59       fi
60
[5485]61       if [ -e ${REFE_DIR}/$file.nc ]; then
[5464]62       # UNIX `cmp` not suitable (filename & timestamp in .nc file)
[5528]63      nc_diff=$( $CDO diffn $file.nc ${REFE_DIR}/$file.nc 2> /dev/null \
64                 | awk '/records/ {print $1}'                           )
[5485]65
[5545]66      if [[ ! -z ${nc_diff} && ${nc_diff} -ne 0 ]]; then
[5509]67          export ST='FAILED'
[5485]68          printf "$CDO ${nc_diff}\n"
69      else
70          printf "identical\n"
71      fi
72
[5432]73       else
[5523]74      export ST='FAILED' && get_out 9
[5432]75       fi
[5438]76
[5424]77   done
[5383]78
[5424]79    else
[5509]80   export ST='FAILED'
[5424]81    fi
[5455]82
[5383]83}
84
[5472]85get_cpu_time() {
[5527]86    real_cpu_time=$( eval ${JOB_TIME} )
[5521]87    printf "Real CPU time\n${real_cpu_time}\n" | tee mesg_10_cputime_${CFG_USER}_${CFG_ARCH}.txt
[5472]88}
89
90get_inputs() {
91    if [ ! -z "${FORC_TAR}" ]; then
[5543]92   cmd_iol="tar -tvf ${NEMO_FORC}/${FORC_TARF}"; cmd_iof="tar -vxf ${NEMO_FORC}/${FORC_TARF}"
[5472]93    else
[5543]94   cmd_iol="ls ${NEMO_FORC}/*"                 ; cmd_iof="\cp ${NEMO_FORC}/* ."
[5472]95    fi
96
97    ${cmd_iol} > inputs_list.txt
98    ${cmd_iof} > /dev/null
99}
100
[5424]101get_soft_rel() {
[5383]102    for rel in $CDO $COMPILER $MPI $NETCDF; do
[5509]103                                     arch_rel=$( echo $LOADEDMODULES | sed  "s#.*$rel/\([^:]*\).*#\1#" )
104   [ -z "${arch_rel}"  ]     && arch_rel=$( echo $PATH          | sed  "s#.*$rel/\([^/]*\).*#\1#" )
[5465]105   [ "$rel" == "$COMPILER" ] && arch_rel=$( $rel --version      | grep -m1 -o ' [0-9.]* '         )
[5464]106   echo $rel ${arch_rel} >> model.log
[5383]107    done
[5438]108
[5472]109    sed -n 4p model.log | sed 'i\Fortran compiler' > mesg_06_compiler_${CFG_USER}_${CFG_ARCH}.txt
110    sed -n 5p model.log | sed 'i\MPI libs'         > mesg_07_mpi_${CFG_USER}_${CFG_ARCH}.txt
111    sed -n 6p model.log | sed 'i\NetCDF libs'      > mesg_08_netcdf_${CFG_USER}_${CFG_ARCH}.txt
[5383]112}
113
[5424]114get_nemo_rev() {
[5444]115    svn_opt='status'
[5383]116
[5438]117    # If -v|--version option has been set, modify svn command
[5523]118    if   [ $( echo ${NEMO_VERS} | grep  "HEAD\|up\|update"   ) ]; then
119   svn_cmd='svn update -r HEAD'
120    elif [ $( echo ${NEMO_VERS} | tr -d '[:alpha:][:punct:]' ) ]; then
121   svn_cmd='svn update -r '$( echo ${NEMO_VERS} | tr -d '[:alpha:][:punct:]' )
[5444]122    else
123   svn_cmd='svn '${svn_opt}
[5438]124    fi
125
[5444]126    rev=0
[5464]127    for dir in ${NEMO_ARCH} ${NEMO_CONF} ${NEMO_ENGI} \
128               ${NEMO_EIOI} ${NEMO_EFCM}              \
129          ${NEMO_TCMP} ${NEMO_TRBD}              \
130          ${DIR_XIOS}                             ; do
[5438]131
[5383]132   # For time being, just get revision number from XIOS
[5465]133   if [ "$dir" == "${DIR_XIOS}" ]; then
134       rev_loc=$( svn info $dir | awk '(NR == 5) {print $NF}' )
[5472]135       echo   'XIOS '${rev_loc} >> model.log
136       printf "XIOS rev.\n"     >  mesg_05_xios_${CFG_USER}_${CFG_ARCH}.txt
137       printf "<a href=\"https://forge.ipsl.jussieu.fr/ioserver/changeset/${rev_loc}\" target=\"_blank\">${rev_loc}</a>" \
138                                >> mesg_05_xios_${CFG_USER}_${CFG_ARCH}.txt
[5438]139       continue
140   fi
141
[5465]142   echo ${svn_cmd} $dir': '$( ${svn_cmd} $dir )
[5432]143   rev_loc=$( svn info $dir | awk '(NR == 5) {print $NF}' )
[5383]144   (( ${rev_loc} >= $rev )) && rev=${rev_loc}
145    done
146
[5472]147    echo   'NEMOGCM '$rev   >> model.log
148    printf "NEMOGCM rev.\n" >  mesg_04_nemogcm_${CFG_USER}_${CFG_ARCH}.txt
149    printf "<a href=\"https://forge.ipsl.jussieu.fr/nemo/changeset/$rev\" target=\"_blank\">$rev</a>" \
150                       >> mesg_04_nemogcm_${CFG_USER}_${CFG_ARCH}.txt
[5383]151}
152
153mesg_make() {
[5464]154    paste -d ';' mesg_*_${CFG_USER}_${CFG_ARCH}.txt > trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt
155    cat                                               trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt
[5268]156}
157
[5383]158mesg_publish() {
[5268]159 
160if [ $PUBLISH -eq 1 ]; then
161
[5472]162  if [ -f ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt ]; then cmd='tail -1'; else cmd='cat'; fi
[5268]163
[5464]164  $cmd trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt >> ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt
[5424]165
[5268]166  # Send mail only when FAILED
[5509]167  if [[ ! -z $EMAIL && "$ST" == 'FAILED' ]]; then
[5424]168      cat << END_MAIL > trusting.mail
[5292]169XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
[5472]170
[5268]171Dear $USER,
172
[5472]173The trusting sequence for ${REFE_CONF} has failed.
174Directory: ${TEST_DIR}
175Archive: ${REFE_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.tar.gz
[5268]176
[5292]177XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
[5268]178
179END_MAIL
[5521]180
[5464]181      if [ -e ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt ]; then
182     cat ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt  >> trusting.mail
183      fi
[5509]184      mail -s "[trusting ${REFE_CONF}] $ST $ERR" $EMAIL  <  trusting.mail
[5268]185  fi
186
187fi
188}
189
[5509]190print_step() { printf "Step.....\n$1\n"; }
[5438]191
[5383]192get_out() {
[5521]193    ERR=$1
194
[5542]195    cd ${TEST_DIR}
[5509]196    printf "Status\n$ST\n"  > mesg_02_status_${CFG_USER}_${CFG_ARCH}.txt
197
198    if [ "$ST" == 'FAILED' ]; then
[5545]199   file=mesg_09_inputs_${CFG_USER}_${CFG_ARCH}.txt
200   [ ! -e $file ] && printf "Input files\n\n"   > $file
201   file=mesg_10_cputime_${CFG_USER}_${CFG_ARCH}.txt
202   [ ! -e $file ] && printf "Real CPU time\n\n" > $file
203   file=mesg_11_comments_${CFG_USER}_${CFG_ARCH}.txt
204   [ ! -e $file ] && printf "Comments\n\n"      > $file
[5521]205
[5509]206        # Error identification
[5521]207   case $ERR in
[5509]208       # Compilation
[5529]209       1) ERR='XIOS compilation failed'             ;;  2) ERR='NEMO compilation failed'            ;;
[5509]210       # Submission
[5529]211       3) ERR='Missing input files'                  ;;  4) ERR='Job submission error'               ;;
[5509]212       # Running
[5529]213       5) ERR='nemo.exe crashed at '${LAST_TIME_STEP};;  6) ERR='Exceeded time limit '${TIME_LIMI}'h';;
[5509]214       # Results
[5529]215       7) ERR='Missing previous outputs '            ;;  8) ERR='New outputs  differ/missing'        ;;
216       9) ERR='Missing previous restarts'            ;; 10) ERR='New restarts differ/missing'        ;;
[5509]217       # Undefined
[5529]218       *) ERR='Unknown error'                        ;;
[5509]219   esac
220
[5521]221   export ERR
[5424]222    fi
[5438]223
[5509]224    printf "Result.....\n$ERR\n" > mesg_03_result_${CFG_USER}_${CFG_ARCH}.txt
[5438]225
[5521]226    # Save tested configuration if trusting failed
227    [ "$ST" == 'FAILED' ] && tar -czf ${REFE_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.tar.gz *
228
[5509]229    mesg_make
230    mesg_publish
[5465]231
[5424]232    exit 1
[5268]233}
[5464]234
235job_pending() {
236    time_elapsed=0; time_increment=30
[5509]237
[5464]238    sleep ${time_increment}
239    while [[ $( ${JOB_LIST} | grep ${JOB_ID} ) && ${time_elapsed} -lt ${TIME_LIMI} ]]; do
[5527]240   printf "\n####################################################\n" >> computation.log
241   ${JOB_INFO} ${JOB_ID}                                             >> computation.log
[5464]242   sleep ${time_increment}
[5509]243   time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
[5464]244    done
245    sleep ${time_increment}
246
247    if [ ${time_elapsed} -eq ${TIME_LIMI} ]; then
248   ${JOB_DELE} ${JOB_ID} &> /dev/null
[5509]249   TIME_LIMIT=$(( ${TIME_LIMI} / 3600 ))
250   export ${TIME_LIMIT}
251   get_out $ST ERR=6
[5521]252    fi
[5464]253}
Note: See TracBrowser for help on using the repository browser.