New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
trusting_func.sh in branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST – NEMO

source: branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting_func.sh @ 5689

Last change on this file since 5689 was 5689, checked in by nicolasmartin, 9 years ago

dev_r5092_CNRS_SETTE Correct all arithmetic tests, global readability improvment & format words list in loop

  • Property eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Rev URL
File size: 10.2 KB
Line 
1#!/bin/bash
2
3
4mkdir -p ${TEST_DIR} ${REFE_DIR}
5
6## Messenger filenames
7FILE_DATE=mesg_01_date_$PATTERNAME.txt    ; FILE_STAT=mesg_02_status_$PATTERNAME.txt
8FILE_RESU=mesg_03_result_$PATTERNAME.txt  ; FILE_NEMO=mesg_04_nemogcm_$PATTERNAME.txt
9FILE_XIOS=mesg_05_xios_$PATTERNAME.txt    ; FILE_COMP=mesg_06_compiler_$PATTERNAME.txt
10FILE_MPIN=mesg_07_mpi_$PATTERNAME.txt     ; FILE_NCDF=mesg_08_netcdf_$PATTERNAME.txt
11FILE_INPT=mesg_09_inputs_$PATTERNAME.txt  ; FILE_CPUT=mesg_10_cputime_$PATTERNAME.txt
12FILE_COMM=mesg_11_comments_$PATTERNAME.txt;
13
14## Trusting summary & archive timestamped filenames
15FILE_TRUS=trusting_${DATE}_$PATTERNAME.txt; FILE_ARCH=trusting_${DATE}_$PATTERNAME.tar.gz
16
17
18print_step() { printf "\nStep.....\n$1\n"; }
19
20
21init_files() {
22    echo 'Date'          >  ${FILE_DATE}; echo 'Status'           >  ${FILE_STAT}
23    echo 'Result'        >  ${FILE_RESU}; echo 'NEMOGCM rev.'     >  ${FILE_NEMO}
24    echo 'XIOS rev.'     >  ${FILE_XIOS}; echo 'Fortran compiler' >  ${FILE_COMP}
25    echo 'MPI libs'      >  ${FILE_MPIN}; echo 'NetCDF libs'      >  ${FILE_NCDF}
26    echo 'Input files'   >  ${FILE_INPT}; echo 'Real CPU time'    >  ${FILE_CPUT}
27    echo 'Comments'      >  ${FILE_COMM}
28
29    # 'Failed' status with 'Unknown error' by default
30    echo $ST             \
31   >> ${FILE_STAT}
32    echo 'Unknown error' \
33   >> ${FILE_RESU}
34}
35
36
37get_date() {
38    local dat=$( date -ud ${DATE} +"%F %R %Z" )
39
40    echo $dat          \
41   >> ${FILE_DATE}
42}
43
44
45get_nemo_rev() {
46    local rev_loc svn_cmd
47    local rev=0 svn_opt='status'
48
49    ## If -v|--version option has been set, modify svn command
50    if   [ $( echo ${NEMO_VERS} | grep  "HEAD\|up\|update"                     ) ]; then
51   svn_cmd='svn update -r HEAD'
52    elif [ $( echo ${NEMO_VERS} | grep -o '{[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}}' ) ]; then
53   svn_cmd='svn update -r '$( echo ${NEMO_VERS} | grep -o '{[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}}' )
54    elif [ $( echo ${NEMO_VERS} | grep -o '[0-9]*'                             ) ]; then
55   svn_cmd='svn update -r '$( echo ${NEMO_VERS} | grep -o '[0-9]*'                             )
56    else
57   svn_cmd='svn '${svn_opt}
58    fi
59
60    for dir in ${NEMO_ARCH} ${NEMO_CONF} ${NEMO_ENGI} \
61               ${NEMO_EAGR} ${NEMO_EIOI} ${NEMO_EFCM} \
62          ${NEMO_TCMP} ${NEMO_TRBD}              \
63          ${DIR_XIOS}                             ; do
64
65   ## For time being, just get revision number from XIOS
66   if [ $dir == ${DIR_XIOS} ]; then
67       rev_loc=$( svn info $dir | awk '(NR == 9) {print $NF}' )
68       echo 'XIOS '${rev_loc} \
69      >> model.log
70       echo "<a href=\"https://forge.ipsl.jussieu.fr/ioserver/changeset/${rev_loc}\" target=\"_blank\">${rev_loc}</a>" \
71      >> ${FILE_XIOS}
72       continue
73   fi
74
75   echo $dir
76   ${svn_cmd} $dir
77   rev_loc=$( svn info $dir | awk '(NR == 9) {print $NF}' )
78   [ ${rev_loc} -gt $rev ] && rev=${rev_loc}
79    done
80
81    echo 'NEMOGCM '$rev \
82   >> model.log
83    echo "<a href=\"https://forge.ipsl.jussieu.fr/nemo/changeset/$rev\" target=\"_blank\">$rev</a>" \
84   >> ${FILE_NEMO}
85}
86
87
88get_soft_rel() {
89    local soft_rel
90
91    for soft in $CDO $COMPILER $MPI $NETCDF; do
92   soft_rel=''
93
94        if [ $IMOD -eq 1 ]; then
95       soft_rel=$( echo $LOADEDMODULES | sed "s/.*$soft\/\([0-9.a-z_]*\).*/\1/i" )
96   else
97       soft_rel=$( echo $PATH          | sed "s/.*$soft\([0-9.a-z_]*\).*/\1/i"   )
98   fi
99
100   [ $soft == $COMPILER ] && soft_rel=$( $soft --version | grep -m1 -oe '\<[0-9. ]*\>' )
101   ## Cleaning characters string to display proper soft name
102   soft=$( echo $soft | sed 's#\\##g; s#[/-]$##' )
103   echo $soft ${soft_rel} \
104       >> model.log
105    done
106
107    sed -n 4p model.log \
108   >> ${FILE_COMP}
109    sed -n 5p model.log \
110   >> ${FILE_MPIN}
111    sed -n 6p model.log \
112   >> ${FILE_NCDF}
113}
114
115
116get_inputs() {
117    local cmd_iol="tar -tvf ${NEMO_FORC}/${NEMO_TARF}" cmd_iof="tar -vxf ${NEMO_FORC}/${NEMO_TARF}"
118
119    [ -z "${NEMO_TARF}" ] && cmd_iol="ls ${NEMO_FORC}/*" && cmd_iof="\cp ${NEMO_FORC}/* ."
120    ${cmd_iol} > inputs_list.txt && ${cmd_iof} > /dev/null
121}
122
123
124diff_inputs() {
125    local dif
126    local files_list='' mesg='Same' 
127
128    for file in 'inputs_list.txt' *namelist_* *.xml cpp_*; do
129   dif=''
130   if [ -e ${REFE_DIR}/$file ]; then dif=$( diff -q $file ${REFE_DIR}/$file ); else dif=0; fi
131   if [ -n "$dif" ]; then mesg='Different'; [ "$dif" != '0' ] && echo $dif && files_list+=$file' '; fi
132    done
133
134    [ $mesg == 'Same' ] && echo $mesg
135    echo $mesg         \
136   >> ${FILE_INPT}
137    [ -n "${files_list}" ] && echo 'Inputs  : '${files_list}'differ<br>' \
138   >> temp_${FILE_COMM}
139}
140
141
142job_pending() {
143    local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
144
145    sleep ${time_increment}
146
147    while [[ $( ${JOB_LIST} | grep ${JOB_ID} ) && ${time_elapsed} -lt ${TIME_LIMI} ]]; do
148   printf "\n%s\n" ${outline// /#} \
149       >> computation.log
150   ${JOB_INFO} ${JOB_ID}           \
151       >> computation.log
152   sleep ${time_increment}
153   time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
154    done
155
156    sleep ${time_increment}
157
158    if [ ${time_elapsed} -eq ${TIME_LIMI} ]; then
159   ${JOB_DELE} ${JOB_ID} &> /dev/null
160   export TIME_LIMIT/=3600
161   get_out 6
162    fi
163}
164
165
166diff_results() {
167    local files_list=''
168
169    for file in 'ocean.output' *.stat; do
170   [ ! -e ${REFE_DIR}/$file ] && export ST='FAILED' && get_out 7
171   diff -q $file ${REFE_DIR}/$file
172   [ $? -ne 0 ] && export ST='FAILED' && files_list+=$file' '
173    done
174
175    [ -n "${files_list}" ] && echo 'Results : '${files_list}'differ<br>' \
176   >> temp_${FILE_COMM}
177}
178
179
180diff_restart() {
181    local base_name dif file list_comp list_tmsp nb_dom
182    local files_list='' dift=0
183
184    [ ! -e ${REFE_DIR}/time.step ] && export ST='FAILED' && get_out 7
185    export TIME_STEP=$( cat ${REFE_DIR}/time.step | tr -d [:space:] )
186    echo 'Last time step of benchmark run: '${TIME_STEP}
187
188    ## Find all restart files to rebuild
189    if [ $( find -regex ".*_restart.*[0-9]\.nc" -print -quit ) ]; then
190   base_name=$( find -regex ".*_restart.*[0-9]\.nc"                      \
191                | sed "s#^\./\(.*\)_[0-9]*_restart.*#\1#"       | sort -u )
192   list_comp=$( find -regex ".*_restart.*[0-9]\.nc"                      \
193                | sed "s#^.*\(restart[a-z_]*\)_[0-9].*\.nc#\1#" | sort -u )
194   list_tmsp=$( find -regex ".*_restart.*[0-9]\.nc"                      \
195                | sed "s#^.*\([0-9]\{8\}\)_restart.*#\1#"       | sort -u )
196
197   for tmsp in ${list_tmsp}; do
198       for comp in ${list_comp}; do
199      file=${base_name}_${tmsp}_${comp}
200      nb_dom=$( find -name "${file}_[0-9]*.nc" | wc -l | awk '{ print $1 }' )
201
202      if   [ ${nb_dom} -gt 1 ]; then
203          ${NEMO_TRBD}/rebuild_nemo -t $NPROC $file ${nb_dom} > /dev/null
204          [ $? -eq 0 ] && rm -f ${file}_[0-9]*.nc             > /dev/null
205      elif [ ${nb_dom} -eq 0 ]; then
206          export ST='FAILED' && get_out 8
207      fi
208
209      ## Compare restart files at same time step
210      if [ $tmsp -eq ${TIME_STEP} ]; then
211          if [ -e ${REFE_DIR}/$file.nc ]; then
212                   ## UNIX `cmp` not suitable (timestamp in .nc file)
213         dif=$( $CDO diffn $file.nc ${REFE_DIR}/$file.nc 2> /dev/null      \
214                | awk '/records/ {print $0}' | sed '2 s/^/,/' | tr -d '\n'  )
215
216         if [[ -n "$dif" && $( echo $dif | awk '{print $1}' ) -ne 0 ]]; then
217             export ST='FAILED' && files_list+=$file' '
218             let dift+=$( echo $dif | awk '{print $1}' )
219             echo $file.nc': '$dif
220         fi
221
222          else
223         export ST='FAILED' && get_out 7
224          fi
225
226      else
227          continue
228      fi
229
230       done
231   done
232
233   if [ $dift -ne 0 ]; then
234       echo 'Restarts: '${files_list}$dift' record(s) differ<br>' | tee -a temp_${FILE_COMM}
235   else
236       echo 'No restart differ'
237   fi
238
239    else
240   export ST='FAILED'
241    fi
242
243}
244
245
246get_cpu_time() {
247    local real_cpu_time=$( eval ${JOB_TIME} )
248
249    echo ${real_cpu_time} | tee -a ${FILE_CPUT}
250}
251
252
253comments() {
254    local state=$1
255
256    if [ "$state" == 'E R R O R' ]; then
257   [ -e time.step ] && export TIME_STEP=$( cat time.step | tr -d ' ' )
258   [ -e ocean.output ] && line=$( grep -m1 -A4 "$state"   ocean.output | tr -d '\n' )
259    else
260   [ -e ocean.output ] && line=$( grep -m1 -A2 "^ $state" ocean.output | tr -d '\n' )
261    fi
262
263    echo   $line
264    printf "$line<br>"      \
265   >> temp_${FILE_COMM}
266}
267
268
269mesg_make() {
270    ## Format comments for web
271    [ -e temp_${FILE_COMM} ] && cat temp_${FILE_COMM} | tr -d '\n' | sed 's/<br>$//' \
272   >> ${FILE_COMM}
273
274    paste -d ';' mesg_*.txt | tee ${FILE_TRUS}
275}
276
277
278mesg_publish() {
279    local cmd
280
281    if [ $PUBLISH -eq 1 ]; then
282
283   if [ -f ${REFE_DIR}/trusting_$PATTERNAME.txt ]; then cmd='tail -1'; else cmd='cat'; fi
284
285   $cmd ${FILE_TRUS}                           \
286       >> ${REFE_DIR}/trusting_$PATTERNAME.txt
287
288  ## Send mail only when FAILED
289   if [[ ! -z "$EMAIL" && $ST == 'FAILED' ]]; then
290       cat <<END_MAIL \
291      > trusting.mail
292XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
293
294Dear $USER,
295
296The trusting sequence for ${REFE_CONF} has failed.
297Directory: ${TEST_DIR}
298Archive created: ${FILE_ARCH} in ${REFE_DIR}
299
300XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
301
302`cat ${TEST_DIR}/${FILE_TRUS}`
303END_MAIL
304       mail -s "[NEMO Trusting ${NEMO_HPCC} ${REFE_CONF} ${NEMO_BRAN}] $ST $ERR" $EMAIL \
305      <  trusting.mail
306   fi
307
308    fi
309}
310
311
312get_out() {
313    ERR=$1
314
315    cd ${TEST_DIR}
316
317    if [ $ST == 'FAILED' ]; then
318
319        ## Error identification
320   case $ERR in
321       ## Compilation
322       1) ERR='XIOS compilation failed'          ;; 2) ERR='NEMO compilation failed'            ;;
323       ## Submission
324       3) ERR='Missing input files'              ;; 4) ERR='Job submission error'               ;;
325       ## Running
326       5) ERR='Crashed at time step '${TIME_STEP};; 6) ERR='Exceeded time limit '${TIME_LIMI}'h';;
327       ## Results
328       7) ERR='Missing previous outputs'         ;; 8) ERR='New outputs differ'                 ;;
329       ## Undefined
330       *) ERR='Unknown error'                    ;;
331   esac
332
333   export ERR
334    fi
335
336    sed -i "2 s/.*/$ST/" ${FILE_STAT} && sed -i "2 s/.*/$ERR/" ${FILE_RESU}
337
338    ## Save tested configuration if trusting failed
339    if [[ $ST == 'FAILED' && $PUBLISH -eq 1 && $DEBUG -eq 0 ]]; then
340   echo 'Creating archive '${FILE_ARCH}' under '${REFE_DIR}
341   tar -czf ${REFE_DIR}/${FILE_ARCH} * ${NEMO_HOME}/CONFIG/${TEST_CONF}/MY_SRC \
342                                       ${NEMO_HOME}/CONFIG/${TEST_CONF}/WORK
343    fi
344
345    if [ $ERR -eq 5 ]; then
346   print_step 'Comments'; comments 'E R R O R'
347    else
348   print_step 'Comments'; comments 'W A R N I N G'
349    fi
350
351    print_step 'Trusting outcome'; mesg_make; mesg_publish
352
353    exit 0
354}
Note: See TracBrowser for help on using the repository browser.