New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
trusting_func.sh in branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST – NEMO

source: branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting_func.sh @ 5509

Last change on this file since 5509 was 5509, checked in by nicolasmartin, 9 years ago

dev_r5092_CNRS_SETTE Error management redefinition

  • Property eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Rev URL
File size: 8.3 KB
Line 
1#!/bin/bash
2
3
4comments() {
5    state=$1; LAST_TIME_STEP=0
6
7    if [ "$state" == 'E R R O R' ]; then
8   [ -e time.step ] && LAST_TIME_STEP=$( cat time.step | tr -d [:space:] )
9   export ${LAST_TIME_STEP} ERR=1
10    fi
11
12    [ -e ocean.output ] && line=$( grep -m1 -A5 "$state" ocean.output | tr -d '\n' )
13    if [[ -e ${REFE_DIR}/model.log && $( diff -q model.log ${REFE_DIR}/model.log ) ]]; then
14   line=$( diff model.log ${REFE_DIR}/model.log  | tr -d '\n' )
15    fi
16
17    printf "Comments\n$line\n" | tee mesg_11_comments_${CFG_USER}_${CFG_ARCH}.txt
18}
19
20
21diff_inputs() {
22    mesg='Same'
23    for file in inputs_list.txt $( ls namelist_* ) $( ls *.xml ); do
24   diff -q $file ${REFE_DIR}/$file
25   [ $? -ne 0 ] && mesg='Different'
26    done
27
28    printf "Input files\n$mesg\n" | tee mesg_09_inputfiles_${CFG_USER}_${CFG_ARCH}.txt
29}
30
31diff_results() {
32    for file in ocean.output $( ls *.stat ); do
33   [ ! -e ${REFE_DIR}/$file ] && export ST='FAILED' && get_out $ST ERR=7
34   diff -q $file ${REFE_DIR}/$file
35   [ $? -ne 0 ] && export ST='FAILED'
36    done
37}
38
39diff_restart() {
40    [ ! -e  ${REFE_DIR}/time.step ] && export ST='FAILED' && get_out $ST ERR=9
41
42    export LAST_TIME_STEP=$( cat ${REFE_DIR}/time.step | tr -d [:space:] )
43    echo 'Last time step of standard run: '${LAST_TIME_STEP}
44
45    if [    $( find -name "*${LAST_TIME_STEP}_restart*.nc" -print -quit ) \
46    && $( diff -q     time.step ${REFE_DIR}/time.step              )  ]; then
47   base_name=$( find -name "*${LAST_TIME_STEP}_restart*.nc" -print -quit | awk -F/ '{print $NF}' \
48                | sed "s/^\(.*\)$LAST_TIME_STEP\_restart.*/\1$LAST_TIME_STEP\_/"                  )
49
50   for comp in restart restart_ice restart_trc; do
51       file=${base_name}${comp} && printf "$file.nc: "
52
53       nb_dom=$( find -name "${file}_[0-9]*.nc" | wc -l | awk '{print $1}' )
54       if [ ${nb_dom} -gt 1 ]; then
55      ${NEMO_TRBD}/rebuild_nemo -t $NPROC $file ${nb_dom}  > /dev/null
56      [ $? -eq 0 ] && rm -f ${base_name}_${comp}_[0-9]*.nc > /dev/null
57       fi
58
59       if [ -e ${REFE_DIR}/$file.nc ]; then
60       # UNIX `cmp` not suitable (filename & timestamp in .nc file)
61      nc_diff=$( $CDO diffn $file.nc ${REFE_DIR}/$file.nc 2> /dev/null | tail -1 )
62
63      if [ ! -z "${nc_diff}" ]; then
64          export ST='FAILED'
65          printf "$CDO ${nc_diff}\n"
66      else
67          printf "identical\n"
68      fi
69
70       else
71      export ST='FAILED'
72       fi
73
74   done
75
76    else
77   export ST='FAILED'
78    fi
79
80}
81
82get_cpu_time() {
83    real_cpu_time=$( eval "${JOB_INFO} ${JOB_ID} | ${JOB_TIME}" )
84    printf "Real CPU time\n${real_cpu_time}\n" | tee mesg_10_realcputime_${CFG_USER}_${CFG_ARCH}.txt
85}
86
87get_inputs() {
88    if [ ! -z "${FORC_TAR}" ]; then
89   cmd_iol="tar -tvf ${NEMO_FORC}/${FORC_TAR}"; cmd_iof="tar -vxf ${NEMO_FORC}/${FORC_TAR}"
90    else
91   cmd_iol="ls ${NEMO_FORC}/*"                ; cmd_iof="\cp ${NEMO_FORC}/* ."
92    fi
93
94    ${cmd_iol} > inputs_list.txt
95    ${cmd_iof} > /dev/null
96}
97
98get_soft_rel() {
99    for rel in $CDO $COMPILER $MPI $NETCDF; do
100                                     arch_rel=$( echo $LOADEDMODULES | sed  "s#.*$rel/\([^:]*\).*#\1#" )
101   [ -z "${arch_rel}"  ]     && arch_rel=$( echo $PATH          | sed  "s#.*$rel/\([^/]*\).*#\1#" )
102   [ "$rel" == "$COMPILER" ] && arch_rel=$( $rel --version      | grep -m1 -o ' [0-9.]* '         )
103   echo $rel ${arch_rel} >> model.log
104    done
105
106    sed -n 4p model.log | sed 'i\Fortran compiler' > mesg_06_compiler_${CFG_USER}_${CFG_ARCH}.txt
107    sed -n 5p model.log | sed 'i\MPI libs'         > mesg_07_mpi_${CFG_USER}_${CFG_ARCH}.txt
108    sed -n 6p model.log | sed 'i\NetCDF libs'      > mesg_08_netcdf_${CFG_USER}_${CFG_ARCH}.txt
109}
110
111get_nemo_rev() {
112    svn_opt='status'
113
114    # If -v|--version option has been set, modify svn command
115    if [[    $( echo ${NEMO_VERS} | grep  "HEAD\|up\|update"   ) \
116     || $( echo ${NEMO_VERS} | tr -d '[:alpha:][:punct:]' )  ]]; then
117    rev=$( echo ${NEMO_VERS} | tr -d '[:alpha:][:punct:]' ) || rev='HEAD'
118   svn_cmd='svn update -r '$rev 
119    else
120   svn_cmd='svn '${svn_opt}
121    fi
122
123    rev=0
124    for dir in ${NEMO_ARCH} ${NEMO_CONF} ${NEMO_ENGI} \
125               ${NEMO_EIOI} ${NEMO_EFCM}              \
126          ${NEMO_TCMP} ${NEMO_TRBD}              \
127          ${DIR_XIOS}                             ; do
128
129   # For time being, just get revision number from XIOS
130   if [ "$dir" == "${DIR_XIOS}" ]; then
131       rev_loc=$( svn info $dir | awk '(NR == 5) {print $NF}' )
132       echo   'XIOS '${rev_loc} >> model.log
133       printf "XIOS rev.\n"     >  mesg_05_xios_${CFG_USER}_${CFG_ARCH}.txt
134       printf "<a href=\"https://forge.ipsl.jussieu.fr/ioserver/changeset/${rev_loc}\" target=\"_blank\">${rev_loc}</a>" \
135                                >> mesg_05_xios_${CFG_USER}_${CFG_ARCH}.txt
136       continue
137   fi
138
139   echo ${svn_cmd} $dir': '$( ${svn_cmd} $dir )
140   rev_loc=$( svn info $dir | awk '(NR == 5) {print $NF}' )
141   (( ${rev_loc} >= $rev )) && rev=${rev_loc}
142    done
143
144    echo   'NEMOGCM '$rev   >> model.log
145    printf "NEMOGCM rev.\n" >  mesg_04_nemogcm_${CFG_USER}_${CFG_ARCH}.txt
146    printf "<a href=\"https://forge.ipsl.jussieu.fr/nemo/changeset/$rev\" target=\"_blank\">$rev</a>" \
147                       >> mesg_04_nemogcm_${CFG_USER}_${CFG_ARCH}.txt
148}
149
150mesg_make() {
151    paste -d ';' mesg_*_${CFG_USER}_${CFG_ARCH}.txt > trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt
152    cat                                               trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt
153}
154
155mesg_publish() {
156 
157if [ $PUBLISH -eq 1 ]; then
158
159  if [ -f ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt ]; then cmd='tail -1'; else cmd='cat'; fi
160
161  $cmd trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt >> ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt
162
163  # Send mail only when FAILED
164  if [[ ! -z $EMAIL && "$ST" == 'FAILED' ]]; then
165      cat << END_MAIL > trusting.mail
166XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
167
168Dear $USER,
169
170The trusting sequence for ${REFE_CONF} has failed.
171Directory: ${TEST_DIR}
172Archive: ${REFE_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.tar.gz
173
174XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
175
176END_MAIL
177#`tail -n 1 ${TEST_DIR}/mesg_03_error_${CFG_USER}_${CFG_ARCH}.txt`
178      if [ -e ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt ]; then
179     cat ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt  >> trusting.mail
180      fi
181      mail -s "[trusting ${REFE_CONF}] $ST $ERR" $EMAIL  <  trusting.mail
182  fi
183
184fi
185}
186
187print_step() { printf "Step.....\n$1\n"; }
188
189get_out() {
190    printf "Status\n$ST\n"  > mesg_02_status_${CFG_USER}_${CFG_ARCH}.txt
191
192    # Save tested configuration if trusting failed
193    if [ "$ST" == 'FAILED' ]; then
194   cd ${TEST_DIR}
195   printf "Input files\n\n"   > mesg_09_inputfiles_${CFG_USER}_${CFG_ARCH}.txt
196   printf "Real CPU time\n\n" > mesg_10_realcputime_${CFG_USER}_${CFG_ARCH}.txt
197
198   if [ ! -e mesg_11_comments_${CFG_USER}_${CFG_ARCH}.txt ]; then
199       printf "Comments\n\n"      > mesg_11_comments_${CFG_USER}_${CFG_ARCH}.txt
200   fi
201
202        # Error identification
203   case ERR in
204       # Compilation
205       1) ERR='XIOS compilation failed'            ;;  2) ERR='NEMO compilation failed'            ;;
206       # Submission
207       3) ERR='Missing input files'                 ;;  4) ERR='Job submission error'               ;;
208       # Running
209       5) ERR='nemo.exe crashed at '${LAST_TIME_ERR};;  6) ERR='Exceeded time limit '${TIME_LIMI}'h';;
210       # Results
211       7) ERR='Missing previous outputs '           ;;  8) ERR='New outputs  differ/missing'        ;;
212       9) ERR='Missing previous restarts'           ;; 10) ERR='New restarts differ/missing'        ;;
213       # Undefined
214       *) ERR='Unknown error'                       ;;
215   esac
216
217   #tar -czf ${REFE_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.tar.gz *
218    fi
219
220    printf "Result.....\n$ERR\n" > mesg_03_result_${CFG_USER}_${CFG_ARCH}.txt
221
222    mesg_make
223    mesg_publish
224
225    exit 1
226}
227
228job_pending() {
229    time_elapsed=0; time_increment=30
230
231    sleep ${time_increment}
232    while [[ $( ${JOB_LIST} | grep ${JOB_ID} ) && ${time_elapsed} -lt ${TIME_LIMI} ]]; do
233   printf "\n####################################################" >> computation.log
234   ${JOB_INFO} ${JOB_ID}                                           >> computation.log
235   sleep ${time_increment}
236#  let time_elapsed+=${time_increment}
237   time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
238    done
239    sleep ${time_increment}
240
241    if [ ${time_elapsed} -eq ${TIME_LIMI} ]; then
242   ${JOB_DELE} ${JOB_ID} &> /dev/null
243   TIME_LIMIT=$(( ${TIME_LIMI} / 3600 ))
244   export ${TIME_LIMIT}
245   get_out $ST ERR=6
246}
Note: See TracBrowser for help on using the repository browser.