New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
trusting_func.sh in branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc – NEMO

source: branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc/trusting_func.sh @ 8808

Last change on this file since 8808 was 8808, checked in by nicolasmartin, 6 years ago

Continuation of global refactoring of the tool, in particular a intelligible variables namelist

  • Property eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Rev URL
File size: 13.0 KB
Line 
1#!/bin/bash
2
3
4## Messenger filenames
5file_date=mesg_01_date.txt  ; file_rslt=mesg_02_result.txt
6file_stat=mesg_03_status.txt; file_nemo=mesg_04_nemo.txt
7file_xios=mesg_05_xios.txt  ; file_cmpf=mesg_06_compiler.txt
8file_lmpi=mesg_07_mpi.txt   ; file_ncdf=mesg_08_netcdf.txt
9file_inpt=mesg_09_inputs.txt; file_time=mesg_10_time.txt
10file_memy=mesg_11_memory.txt; file_note=mesg_12_comments.txt
11
12
13## Functions in order of use
14print_step() {
15    local char_nb=$( echo "$1" | wc -c )
16    local outline=$( printf "%${char_nb}s" )
17
18    printf "\nStep.....\n%s\n%s\n" "$1" ${outline// /-}
19}
20
21init_files() {
22    echo 'Date'                > ${file_date}
23    echo 'Result'              > ${file_rslt}
24    echo 'Status'              > ${file_stat}
25    echo 'NEMOGCM rev.'        > ${file_nemo}
26    echo 'XIOS rev.'           > ${file_xios}
27    echo 'Fortran compiler'    > ${file_cmpf}
28    echo 'MPI libs'            > ${file_lmpi}
29    echo 'NetCDF libs'         > ${file_ncdf}
30    echo 'Input files'         > ${file_inpt}
31    echo 'Elapsed time'        > ${file_time}
32    echo 'Memory (Phy./Virt.)' > ${file_memy}
33    echo 'Comments'            > ${file_note}
34
35    ## 'Failed' status with 'Unknown error' by default
36    echo ${TRUST_FLAG_RESULT} \
37   >> ${file_rslt}
38    echo 'Unknown error' \
39   >> ${file_stat}
40}
41
42get_date() {
43    ## UTC time zone for timestamping
44    local dat=$( date -ud "${TRUST_TEST_DATE}" +"%F %R %Z" )
45
46    echo $dat           \
47   >> ${file_date}
48}
49
50get_nemo_rev() {
51    local dir rev_loc
52    local rev=0
53
54    ## Loop on essential NEMO directories
55    for dir in ${TRUST_SVN_CO} ${TRUST_DIR_XIOS}; do
56
57   ## For time being, just get revision from XIOS with no action on directory
58   if [ $dir == ${TRUST_DIR_XIOS} ]; then
59       rev_loc=$( svn info $dir | awk '/Last Changed Rev/ {print $NF}' )
60       echo 'XIOS '${rev_loc} \
61      >> model.log
62       echo "<a href=\"https://forge.ipsl.jussieu.fr/ioserver/changeset/${rev_loc}\" target=\"_blank\"> \
63        ${rev_loc}                                                                                 \
64        </a>"                                                                                      \
65      >> ${file_xios}
66       continue
67   fi
68
69   echo $dir && ${TRUST_SVN_ACTION} ${TRUST_DIR_NEMOGCM}/$dir
70   rev_loc=$( svn info ${TRUST_DIR_NEMOGCM}/$dir  \
71              | awk '/Last Changed Rev/ {print $NF}'   )
72
73   ## Keep last rev. nb
74   [ ${rev_loc} -gt $rev ] && rev=${rev_loc}
75    done
76
77    echo 'NEMOGCM '$rev \
78   >> model.log
79    echo "<a href=\"https://forge.ipsl.jussieu.fr/nemo/changeset/$rev\" target=\"_blank\"> \
80     $rev                                                                             \
81     </a> "                                                                           \
82   >> ${file_nemo}
83}
84
85get_soft_rel() {
86    local ver str
87
88    ## Sourcing environment
89    . ${TRUST_JOB_ENV}
90
91    for str in ${TRUST_COMPILE_FORTRAN}                         \
92          ${TRUST_COMPILE_MPI}     ${TRUST_COMPILE_NETCDF} \
93          ${TRUST_CDO}                                      ; do
94   [ -z "$str" ] && continue
95   ver=''
96
97   ## Extract version number after searching pattern in PATH env. variable
98   ver=$( echo $PATH | sed "s|.*\($str[0-9.]*\).*|\1|" )
99
100   ## option --version would work for main Fortran compilers and CDO
101   if [[ $str =~ ${TRUST_COMPILE_FORTRAN}|${TRUST_CDO} ]]; then
102       ver=$( $str --version 2>&1 | grep -m1 -oe '\<[0-9. ]*\>' \
103         | xargs echo $str                                   )
104   fi
105
106   ## Cleaning characters string to display proper soft name
107   #str=$( echo $str | sed 's|[/-]||g'  )
108   ver=$( echo $ver | sed 's|[/-]| |g' )
109
110   echo $ver \
111       >> model.log
112    done
113
114    sed -n 3p model.log \
115   >> ${file_cmpf}
116    sed -n 4p model.log \
117   >> ${file_lmpi}
118    sed -n 5p model.log \
119   >> ${file_ncdf}
120}
121
122get_inputs() {
123    # List archive content & extract it by default
124    local inputs_list=$( eval "
125   for archive in ${TRUST_CFG_FORC}; do
126       tar -tvf ${TRUST_DIR_FORC}/\$archive >> inputs_list.txt;
127   done
128   " )
129    local inputs_get=$( eval "
130   for archive in ${TRUST_CFG_FORC}; do
131       tar -vxf ${TRUST_DIR_FORC}/\$archive  >       /dev/null;
132   done
133   " )
134
135    ## List & copy files without archive
136    if [ -z "${TRUST_CFG_FORC}" ]; then
137   inputs_list=" ls -lh ${TRUST_DIR_FORC}/* >> inputs_list.txt"
138   inputs_get=" \cp     ${TRUST_DIR_FORC}/* .                 "
139    fi
140
141    ${inputs_list}; ${inputs_get}
142
143    if [ $( find -name '*.gz' -print -quit ) ]; then
144   find . -name '*.gz' -exec gzip -d {} \;
145    fi
146}
147
148diff_inputs() {
149    local dif file
150    local files_list='' mesg='Same' 
151
152    ## Simple diff
153    for file in 'inputs_list.txt' *namelist_* *.xml cpp_*; do
154   dif=''
155
156   ## Continue even if input file is not in here (see after)
157   if [ -e ${TRUST_DIR_STORE}/$file ]; then
158       dif=$( diff -q $file ${TRUST_DIR_STORE}/$file )
159   else
160       dif=0
161   fi
162
163   ## Pass over useless file omission in benckmark directory
164   if [[ -n "$dif" && "$dif" != '0' ]]; then
165       mesg='Different'
166       echo $dif
167       files_list+=$file' '
168   fi
169
170    done
171
172    [ $mesg == 'Same' ] && echo $mesg
173    echo $mesg          \
174   >> ${file_inpt}
175
176    ## List different files for web comment
177    [ -n "${files_list}" ] && echo 'Inputs  : '${files_list}'differ<br>' \
178   >> temp_${file_note}
179}
180
181job_pending() {
182    local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
183
184    sleep ${time_increment}
185
186    ## Append a log file while pending
187    while [[ $( eval ${TRUST_JOB_STATE} )                \
188        && ${time_elapsed} -lt ${TRUST_JOB_TIMEOUT}   ]]; do
189   printf "\n%s\n" ${outline// /#} \
190       >> computation.log
191   [ -n "${TRUST_JOB_INFO}" ] && eval ${TRUST_JOB_INFO} \
192       >> computation.log
193   sleep ${time_increment}
194   time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
195    done
196
197    sleep ${time_increment}
198
199    ## Kill remaining job & stop the test if it's too long
200    if [ ${time_elapsed} -eq ${TRUST_JOB_TIMEOUT} ]; then
201   eval ${TRUST_JOB_KILL} &> /dev/null
202   get_out 6
203    fi
204
205}
206
207diff_results() {
208    local file
209    local files_list='' mesg='Same'
210
211    ## Simple diff
212    for file in 'ocean.output' *.stat; do
213   ## Stop if no minimal benchmark files (ocean.output, eventual stat files)
214   if [ ! -e ${TRUST_DIR_STORE}/$file ]; then
215       TRUST_FLAG_RESULT='FAILED'
216       get_out 7
217   fi
218
219   diff -q $file ${TRUST_DIR_STORE}/$file
220
221   ## Continue even if it differs
222   if [ $? -ne 0 ]; then
223       TRUST_FLAG_RESULT='FAILED'
224       mesg='Different'
225       files_list+=$file' '
226   fi
227
228    done
229
230    [ $mesg == 'Same' ] && echo $mesg
231
232    ## List different files for web comment
233    [ -n "${files_list}" ] && echo 'Results : '${files_list}'differ<br>' \
234   >> temp_${file_note}
235}
236
237diff_restarts() {
238    local dif filebase filebases ndomain out
239    local files_list='' dif_sum='0' #bcmk='false'
240
241    ## Find all restart files to rebuild
242    if [ $( find -regex ".*_restart.*[0-9]\.nc" -print -quit ) ]; then
243        ################################################################
244   ## Think to set the confgiguration name in the 'namelist_cfg' ##
245   ################################################################
246   filebases=$( find -regextype sed -regex ".*${TRUST_CFG_NEW}.*_[0-9]\{4\}\.nc" \
247                | sed 's/\(.*\)_.*/\1/' | sort -u                                  )
248
249   for filebase in $filebases; do
250
251       ndomain=$( find -regex ".*${filebase}_[0-9]*.nc" \
252             | wc -l | awk '{print $1}'              )
253
254       [ $ndomain -eq 0 ] && TRUST_FLAG_RESULT='FAILED' && get_out 8
255
256       ${TRUST_DIR_NEMOGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo \
257      -t ${TRUST_COMPILE_NPROC} $filebase $ndomain       \
258      > /dev/null
259
260       ## Possibility of remaining decomposed restarts (even after rebuild)
261       [ $? -eq 0 ] && rm -f ${filebase}_[0-9]*.nc \
262                > /dev/null
263
264            ## Stop if no benchmark files (restart file)
265       if [ -e ${TRUST_DIR_STORE}/$filebase.nc ]; then
266
267      #bcmk='true'
268      cdo diffn $filebase.nc ${TRUST_DIR_STORE}/$filebase.nc \
269          > cdo_diff.out 2> /dev/null
270
271      ## Identical if cdo_diff.out is zero size
272      [ ! -s cdo_diff.out ] && continue
273
274      dif=$( grep -om1 '[0-9]* of [0-9]* records' cdo_diff.out )
275
276      if [ -n "$dif" ]; then
277          export TRUST_FLAG_RESULT='FAILED'
278          files_list+=$filebase' ' && echo $filebase'.nc: '$dif
279          let dif_sum+=$( echo $dif | sed '|^\([0-9]*\).*|\1|' )
280      fi
281
282       fi
283
284   done
285
286        ## No benchmark files
287   #if   [ $bcmk == 'false' ]; then
288   #    TRUST_FLAG_RESULT='FAILED'
289   #    get_out 7
290   #fi
291
292        ## List modified restart(s) for web comment with sum of differences
293   if [ ${dif_sum} -ne 0 ]; then
294       echo 'Restarts: '${files_list}${dif_sum}' record(s) differ<br>' \
295      >> temp_${file_note}
296   else
297       echo 'Same'
298   fi
299
300    fi
301
302}
303
304get_time() {
305    [ -z "${TRUST_JOB_TIME}" ] && return
306
307    ## Interest for checking unusual time computation
308    local time_cpu=$( eval ${TRUST_JOB_TIME} )
309
310    printf "Elapsed time: "
311    echo ${time_cpu} | tee -a ${file_time}
312}
313
314get_memy() {
315    [[ -z "${TRUST_JOB_RAM_P}" && -z "${TRUST_JOB_RAM_V}" ]] && return
316
317    ## Interest for checking unusual memory usage
318    local memory_pmax=$( eval ${TRUST_JOB_RAM_P} )
319    local memory_vmax=$( eval ${TRUST_JOB_RAM_V} )
320
321    printf "Memory max usage (physical/virtual): "
322    echo ${memory_pmax}' / '${memory_vmax} | tee -a ${file_memy}
323}
324
325comments() {
326    local opat
327    local line='' state=$1
328
329    if [ -e ocean.output ]; then
330        ## 'W A R N I N G' pattern by default
331   opat="-A2 \"^ $state\""
332   [ "$state" == 'E R R O R' ] && opat="-A4 \"$state\""
333
334        ## Select first occurence for web comment
335   line=$( eval grep -m1 $opat ocean.output | tr -d '\n' )
336    fi
337
338    [ -n "$line" ] && ( echo $line; printf "$line<br>" \
339   >> temp_${file_note} )
340}
341
342log_make() {
343    ## Format comments for web
344    if [ -e temp_${file_note} ]; then
345   cat temp_${file_note} | tr -d '\n' | sed 's/<br>$//' \
346       >> ${file_note}
347    fi
348
349    ## Construct txt file with all messenger files
350    paste -d ';' mesg_*.txt | tee ${TRUST_TEST_SUMMARY}
351}
352
353prod_publish() {
354    local cmd
355    local rev=$( awk '/NEMOGCM/ {print $NF}' model.log )
356
357    ## Production mode (-p|--prod)
358    if [ ${TRUST_FLAG_PROD} -eq 1 ]; then
359
360   ## Create or append trusting logfile
361   if [ -f ${TRUST_TEST_LOG} ]; then cmd='tail -1'; else cmd='cat'; fi
362
363   $cmd ${TRUST_TEST_SUMMARY}      \
364       >> ${TRUST_TEST_LOG}
365
366        ## Send mail only when FAILED
367   if [[ ! -z "${TRUST_TEST_MAILING}" \
368         && ${TRUST_FLAG_RESULT} == 'FAILED'   ]]; then
369
370       ## Content
371       cat <<END_MAIL      \
372      > trusting.mail
373Dear all,
374
375
376The following trusting sequence has not completed successfully:
377
378Testing configuration ${TRUST_CFG_NEW} based on ${TRUST_CFG_REF}.
379User installation ${TRUST_MAIN_USER}
380HPC environment ${TRUST_MAIN_HPCC}
381
382Here is the running environment summary:
383`cat model.log`
384
385For more details, look into the testing folder at:
386${TRUST_DIR_SCRATCH}
387An archive is also available to share the questionable configuration:
388${TRUST_DIR_STORE}/${TRUST_TEST_BACKUP}
389
390END_MAIL
391
392       ## Send with detailed subject
393       mail -s "[NEMO Trusting][${TRUST_CFG_REF}][${TRUST_SVN_BRANCH}] \
394           ${TRUST_FLAG_RESULT} ${TRUST_FLAG_ERROR}"               \
395       ${TRUST_TEST_MAILING}                                \
396      <  trusting.mail
397   fi
398
399    fi
400}
401
402get_out() {
403    local time_step=0
404
405    TRUST_FLAG_ERROR=$1
406
407    printf "\n\nEnd of test\n"
408
409    ## In case of compilation error
410    cd ${TRUST_DIR_SCRATCH}
411
412    if [ ${TRUST_FLAG_RESULT} == 'FAILED' ]; then
413   echo 'Failure'
414
415        ## Error identification
416   case ${TRUST_FLAG_ERROR} in
417            ## Compilation
418       '1') TRUST_FLAG_ERROR='XIOS compilation failed' ;;
419       '2') TRUST_FLAG_ERROR='NEMO compilation failed' ;;
420       ## Submission
421       '3') TRUST_FLAG_ERROR='Missing input files'     ;;
422       '4') TRUST_FLAG_ERROR='Job submission error'    ;;
423       ## Computing
424       '5') TRUST_FLAG_ERROR='Crashed at time step'    ;;
425       '6') TRUST_FLAG_ERROR='Exceeded time limit'     ;;
426       ## Results
427       '7') TRUST_FLAG_ERROR='Missing previous outputs';;
428       '8') TRUST_FLAG_ERROR='New outputs differ'      ;;
429       ## Other
430       '*') TRUST_FLAG_ERROR='Unknown error'           ;;
431   esac
432
433    else
434   echo 'Success' && TRUST_FLAG_ERROR='Code is reliable'
435    fi
436
437    ## Eventual comments from ocean.output
438    if [ "${TRUST_FLAG_ERROR}" == 'Crashed at time step' ]; then
439   comments 'E R R O R'
440   [ -e time.step ] && time_step=$( cat time.step )
441   TRUST_FLAG_ERROR+=' '$time_step
442    else
443   comments 'W A R N I N G'
444
445   if [ "${TRUST_FLAG_ERROR}" == 'Exceeded time limit' ]; then
446       TRUST_FLAG_ERROR+=' '$(( ${TRUST_JOB_TIMEOUT}/3600 ))'h'
447   fi
448
449    fi
450
451    ## Last messenger files
452    sed -i "2 s/.*/$TRUST_RESULT/"     ${file_rslt}
453    sed -i "2 s/.*/$TRUST_FLAG_ERROR/" ${file_stat}
454
455    ## Save tested configuration if trusting failed in production mode (-p|--prod)
456    if [[ ${TRUST_FLAG_RESULT} == 'FAILED' && ${TRUST_FLAG_PROD} -eq 1 ]]; then
457   echo 'Creating archive '${TRUST_TEST_BACKUP}' under '${TRUST_DIR_STORE}
458   tar -czf ${TRUST_DIR_STORE}/${TRUST_TEST_BACKUP}                 * \
459       -C   ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/MY_SRC . \
460       -C   ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}          \
461            cpp_${TRUST_CFG_NEW}.fcm
462    fi
463
464    ## Logfile construct & eventual sending of notification email
465    printf "\nTrusting digest:\n----------------\n"
466    log_make
467    prod_publish
468
469    exit 0
470}
Note: See TracBrowser for help on using the repository browser.