New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
trusting_func.sh in branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc – NEMO

source: branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc/trusting_func.sh @ 8818

Last change on this file since 8818 was 8818, checked in by nicolasmartin, 6 years ago

Continuation of global refactoring of the tool, set cfg files mime property and modify trusting.env trying to handle default settings

  • Property eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Rev URL
File size: 13.4 KB
Line 
1#!/bin/bash
2
3
4## Messenger filenames
5file_date=mesg_01_date.txt  ; file_rslt=mesg_02_result.txt
6file_stat=mesg_03_status.txt; file_nemo=mesg_04_nemo.txt
7file_xios=mesg_05_xios.txt  ; file_cmpf=mesg_06_compiler.txt
8file_lmpi=mesg_07_mpi.txt   ; file_ncdf=mesg_08_netcdf.txt
9file_inpt=mesg_09_inputs.txt; file_time=mesg_10_time.txt
10file_memy=mesg_11_memory.txt; file_note=mesg_12_comments.txt
11
12
13## Functions in order of use
14print_step() {
15    local char_nb=$( echo "$1" | wc -c )
16    local outline=$( printf "%${char_nb}s" )
17
18    printf "\nStep.....\n%s\n%s\n" "$1" ${outline// /-}
19}
20
21init_files() {
22    echo 'Date'                > ${file_date}
23    echo 'Result'              > ${file_rslt}
24    echo 'Status'              > ${file_stat}
25    echo 'NEMOGCM rev.'        > ${file_nemo}
26    echo 'XIOS rev.'           > ${file_xios}
27    echo 'Fortran compiler'    > ${file_cmpf}
28    echo 'MPI libs'            > ${file_lmpi}
29    echo 'NetCDF libs'         > ${file_ncdf}
30    echo 'Input files'         > ${file_inpt}
31    echo 'Elapsed time'        > ${file_time}
32    echo 'Memory (Phy./Virt.)' > ${file_memy}
33    echo 'Comments'            > ${file_note}
34
35    ## 'Failed' status with 'Unknown error' by default
36    echo ${TRUST_FLAG_RESULT} \
37   >> ${file_rslt}
38    echo 'Unknown error' \
39   >> ${file_stat}
40}
41
42get_date() {
43    ## UTC time zone for timestamping
44    local dat=$( date -ud "${TRUST_TEST_DATE}" +"%F %R %Z" )
45
46    echo $dat           \
47   >> ${file_date}
48}
49
50get_nemo_rev() {
51    local dir rev_loc
52    local rev=0
53
54    ## Loop on essential NEMO directories
55    for dir in ${TRUST_SVN_CO} ${TRUST_IO_XIOS}; do
56
57   ## For time being, just get revision from XIOS with no action on directory
58   if [ $dir == ${TRUST_IO_XIOS} ]; then
59       rev_loc=$( svn info $dir | awk '/Last Changed Rev/ {print $NF}' )
60       echo 'XIOS '${rev_loc} \
61      >> model.log
62       echo "<a href=\"https://forge.ipsl.jussieu.fr/ioserver/changeset/${rev_loc}\" target=\"_blank\">${rev_loc}</a>"
63      >> ${file_xios}
64       continue
65   fi
66
67   echo $dir && ${TRUST_SVN_ACTION} ${TRUST_DIR_NEMOGCM}/$dir
68   rev_loc=$( svn info ${TRUST_DIR_NEMOGCM}/$dir  \
69              | awk '/Last Changed Rev/ {print $NF}'   )
70
71   ## Keep last rev. nb
72   [ ${rev_loc} -gt $rev ] && rev=${rev_loc}
73    done
74
75    echo 'NEMOGCM '$rev \
76   >> model.log
77    echo "<a href=\"https://forge.ipsl.jussieu.fr/nemo/changeset/$rev\" target=\"_blank\">$rev</a>"
78   >> ${file_nemo}
79}
80
81get_soft_rel() {
82    local ver str
83
84    ## Sourcing environment
85    . ${TRUST_JOB_ENV}
86
87    for str in ${TRUST_COMPILE_FORTRAN}                         \
88          ${TRUST_COMPILE_MPI}     ${TRUST_COMPILE_NETCDF} \
89          ${TRUST_IO_CDO}                                    ; do
90   [ -z "$str" ] && continue
91   ver=''
92
93   ## Extract version number after searching pattern in PATH env. variable
94   ver=$( echo $PATH | sed "s|.*\($str[0-9.]*\).*|\1|" )
95
96   ## option --version would work for main Fortran compilers and CDO
97   if [[ $str =~ ${TRUST_COMPILE_FORTRAN}|${TRUST_IO_CDO} ]]; then
98       ver=$( $str --version 2>&1 | grep -m1 -oe '\<[0-9. ]*\>' \
99         | xargs echo $str                                   )
100   fi
101
102   ## Cleaning characters string to display proper soft name
103   #str=$( echo $str | sed 's|[/-]||g'  )
104   ver=$( echo $ver | sed 's|[/-]| |g' )
105
106   echo $ver \
107       >> model.log
108    done
109
110    sed -n 3p model.log \
111   >> ${file_cmpf}
112    sed -n 4p model.log \
113   >> ${file_lmpi}
114    sed -n 5p model.log \
115   >> ${file_ncdf}
116}
117
118get_inputs() {
119     # List archive content & extract it by default
120    local inputs_list=$( eval "
121        for archive in ${TRUST_IO_FORC_TAR}; do
122            tar -tvf ${TRUST_IO_FORC_PATH}/\$archive >> inputs_list.txt;
123         done
124         " )
125    local inputs_get=$( eval "
126        for archive in ${TRUST_IO_FORC_TAR}; do
127            tar -vxf ${TRUST_IO_FORC_PATH}/\$archive  >       /dev/null;
128         done
129         " )
130 
131    ## List & copy files without archive
132    if [ -z "${TRUST_IO_FORC_TAR}" ]; then
133        inputs_list=" ls -lh ${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
134        inputs_get=" \cp     ${TRUST_IO_FORC_PATH}/* .                 "
135    fi
136
137    ${inputs_list}; ${inputs_get}
138
139#    for entry in ${TRUST_IO_FORC_PATH}; do
140#
141   # If path to file (assuming it is an archive)
142#  if   [ -e $entry ]; then
143#      tar -tvf $entry >> inputs_list.txt;
144#      tar -vxf $entry  >       /dev/null;
145   # If path to directory
146#  elif [ -d $entry ]; then
147#      inputs_list=" ls -lh ${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
148#      inputs_get=" \cp     ${TRUST_IO_FORC_PATH}/* .                 "
149#  fi   
150
151#    done
152
153    if [ $( find -name '*.gz' -print -quit ) ]; then
154   find . -name '*.gz' -exec gzip -d {} \;
155    fi
156}
157
158diff_inputs() {
159    local dif file
160    local files_list='' mesg='Same' 
161
162    ###################################
163    ## Think of copying initial test ##
164    ###################################
165
166    ## Simple diff
167    for file in cpp_* 'inputs_list.txt' *namelist_* *.xml; do
168   dif=''
169
170   ## Continue even if input file is not in here (see after)
171   if [ -e ${TRUST_TEST_BENCHMARK}/$file ]; then
172       dif=$( diff -q $file ${TRUST_TEST_BENCHMARK}/$file )
173   else
174       dif=0
175   fi
176
177   ## Pass over useless file omission in benckmark directory
178   if [[ -n "$dif" && "$dif" != '0' ]]; then
179       mesg='Different'
180       echo $dif
181       files_list+=$file' '
182   fi
183
184    done
185
186    [ $mesg == 'Same' ] && echo $mesg
187    echo $mesg          \
188   >> ${file_inpt}
189
190    ## List different files for web comment
191    [ -n "${files_list}" ] && echo 'Inputs  : '${files_list}'differ<br>' \
192   >> temp_${file_note}
193}
194
195job_pending() {
196    local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
197
198    sleep ${time_increment}
199
200    ## Append a log file while pending
201    while [[ $( eval ${TRUST_JOB_STATE} )                \
202        && ${time_elapsed} -lt ${TRUST_JOB_TIMEOUT}   ]]; do
203   printf "\n%s\n" ${outline// /#} \
204       >> computation.log
205   [ -n "${TRUST_JOB_INFO}" ] && eval ${TRUST_JOB_INFO} \
206       >> computation.log
207   sleep ${time_increment}
208   time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
209    done
210
211    sleep ${time_increment}
212
213    ## Kill remaining job & stop the test if it's too long
214    if [ ${time_elapsed} -eq ${TRUST_JOB_TIMEOUT} ]; then
215   eval ${TRUST_JOB_KILL} &> /dev/null
216   get_out 6
217    fi
218
219}
220
221diff_results() {
222    local file
223    local files_list='' mesg='Same'
224
225    ###################################
226    ## Think of copying initial test ##
227    ###################################
228
229    ## Simple diff
230    for file in 'ocean.output' *.stat; do
231   ## Stop if no minimal benchmark files (ocean.output, eventual stat files)
232   [ ! -e ${TRUST_TEST_BENCHMARK}/$file ] && get_out 7
233
234   diff -q $file ${TRUST_TEST_BENCHMARK}/$file
235
236   ## Continue even if it differs
237   if [ $? -ne 0 ]; then mesg='Different'; files_list+=$file' '; fi
238
239    done
240
241    [ $mesg == 'Same' ] && echo $mesg
242
243    ## List different files for web comment
244    [ -n "${files_list}" ] && echo 'Results : '${files_list}'differ<br>' \
245   >> temp_${file_note}
246}
247
248diff_restarts() {
249    local dif filebase filebases ndomain out
250    local files_list='' dif_sum='0' #bcmk='false'
251
252    ## Find all restart files to rebuild
253    if [ $( find -regex ".*_restart.*[0-9]\.nc" -print -quit ) ]; then
254        ###############################################################
255   ## Think to set the configuration name in the 'namelist_cfg' ##
256   ###############################################################
257   filebases=$( find -regextype sed -regex ".*${TRUST_CFG_NEW}.*_[0-9]\{4\}\.nc" \
258                | sed 's/\(.*\)_.*/\1/' | sort -u                                  )
259
260   for filebase in $filebases; do
261
262       ndomain=$( find -regex ".*${filebase}_[0-9]*.nc" \
263             | wc -l | awk '{print $1}'              )
264
265       [ $ndomain -eq 0 ] && get_out X
266
267            #####################################################
268            ## Handle 2 possibilities of 'rebuild_nemo' origin ##
269            #####################################################
270
271       ${TRUST_DIR_NEMOGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo \
272      -t ${TRUST_COMPILE_NPROC} $filebase $ndomain       \
273      > /dev/null
274
275       ## Possibility of remaining decomposed restarts (even after rebuild)
276       if [ $? -eq 0 ]; then
277      rm -f ${filebase}_[0-9]*.nc \
278                    > /dev/null
279       else
280      get_out X
281       fi
282
283            ## Stop if no benchmark files (restart file)
284       if [ -e ${TRUST_TEST_BENCHMARK}/$filebase.nc ]; then
285
286      #bcmk='true'
287      cdo diffn $filebase.nc ${TRUST_TEST_BENCHMARK}/$filebase.nc \
288          > cdo_diff.out 2> /dev/null
289
290      ## Identical if cdo_diff.out is zero size
291      [ ! -s cdo_diff.out ] && continue
292
293      dif=$( grep -om1 '[0-9]* of [0-9]* records' cdo_diff.out )
294
295      if [ -n "$dif" ]; then
296          files_list+=$filebase' ' && echo $filebase'.nc: '$dif
297          let dif_sum+=$( echo $dif | sed '|^\([0-9]*\).*|\1|' )
298      fi
299
300       fi
301
302   done
303
304        ## List modified restart(s) for web comment with sum of differences
305   if [ ${dif_sum} -ne 0 ]; then
306       echo 'Restarts: '${files_list}${dif_sum}' record(s) differ<br>' \
307      >> temp_${file_note}
308   else
309       echo 'Same'
310   fi
311
312    fi
313
314}
315
316get_time() {
317    [ -z "${TRUST_JOB_TIME}" ] && return
318
319    ## Interest for checking unusual time computation
320    local time_cpu=$( eval ${TRUST_JOB_TIME} )
321
322    printf "Elapsed time: "
323    echo ${time_cpu} | tee -a ${file_time}
324}
325
326get_memy() {
327    [[ -z "${TRUST_JOB_RAM_P}" && -z "${TRUST_JOB_RAM_V}" ]] && return
328
329    ## Interest for checking unusual memory usage
330    local memory_pmax=$( eval ${TRUST_JOB_RAM_P} )
331    local memory_vmax=$( eval ${TRUST_JOB_RAM_V} )
332
333    printf "Memory max usage (physical/virtual): "
334    echo ${memory_pmax}' / '${memory_vmax} | tee -a ${file_memy}
335}
336
337comments() {
338    local opat
339    local line='' state=$1
340
341    if [ -e ocean.output ]; then
342        ## 'W A R N I N G' pattern by default
343   opat="-A2 \"^ $state\""
344   [ "$state" == 'E R R O R' ] && opat="-A4 \"$state\""
345
346        ## Select first occurence for web comment
347   line=$( eval grep -m1 $opat ocean.output | tr -d '\n' )
348    fi
349
350    [ -n "$line" ] && ( echo $line; printf "$line<br>" \
351   >> temp_${file_note} )
352}
353
354log_make() {
355    ## Format comments for web
356    if [ -e temp_${file_note} ]; then
357   cat temp_${file_note} | tr -d '\n' | sed 's/<br>$//' \
358       >> ${file_note}
359    fi
360
361    ## Construct txt file with all messenger files
362    paste -d ';' mesg_*.txt | tee ${TRUST_TEST_SUMMARY}
363}
364
365prod_publish() {
366    local cmd
367    local rev=$( awk '/NEMOGCM/ {print $NF}' model.log )
368
369    ## Production mode (-p|--prod)
370    if [ ${TRUST_FLAG_PROD} -eq 1 ]; then
371
372   ## Create or append trusting logfile
373   if [ -f ${TRUST_TEST_LOG} ]; then cmd='tail -1'; else cmd='cat'; fi
374
375   $cmd ${TRUST_TEST_SUMMARY}      \
376       >> ${TRUST_TEST_LOG}
377
378        ## Send mail only when FAILED
379   if [[ ! -z "${TRUST_TEST_MAILING}" \
380         && ${TRUST_FLAG_RESULT} == 'FAILED'   ]]; then
381
382       ## Content
383       cat <<END_MAIL      \
384      > trusting.mail
385Dear all,
386
387
388The following trusting sequence has not completed successfully:
389
390Testing configuration ${TRUST_CFG_NEW} based on ${TRUST_CFG_REF}.
391User installation ${TRUST_MAIN_USER}
392HPC environment ${TRUST_MAIN_HPCC}
393
394Here is the running environment summary:
395`cat model.log`
396
397For more details, look into the testing folder at:
398${TRUST_TEST_DIR}
399An archive is also available to share the questionable configuration:
400${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}
401
402END_MAIL
403
404       ## Send with detailed subject
405       mail -s "[NEMO Trusting][${TRUST_CFG_REF}][${TRUST_SVN_BRANCH}] \
406           ${TRUST_FLAG_RESULT} ${TRUST_FLAG_ERROR}"              \
407       ${TRUST_TEST_MAILING}                                      \
408      <  trusting.mail
409   fi
410
411    fi
412}
413
414get_out() {
415    local time_step=0
416
417    TRUST_FLAG_ERROR=$1
418
419    printf "\n\nEnd of test\n"
420
421    ## In case of compilation error
422    cd ${TRUST_TEST_DIR}
423
424    if [ ${TRUST_FLAG_RESULT} == 'FAILED' ]; then
425   echo 'Failure'
426
427        ## Error identification
428   case ${TRUST_FLAG_ERROR} in
429            ## Compilation
430       '1') TRUST_FLAG_ERROR='XIOS compilation failed' ;;
431       '2') TRUST_FLAG_ERROR='NEMO compilation failed' ;;
432       ## Submission
433       '3') TRUST_FLAG_ERROR='Missing input files'     ;;
434       '4') TRUST_FLAG_ERROR='Job submission error'    ;;
435       ## Computing
436       '5') TRUST_FLAG_ERROR='Crashed at time step'    ;;
437       '6') TRUST_FLAG_ERROR='Exceeded time limit'     ;;
438       ## Results
439       '7') TRUST_FLAG_ERROR='Missing previous outputs';;
440       '8') TRUST_FLAG_ERROR='New outputs differ'      ;;
441       ## Other
442       '*') TRUST_FLAG_ERROR='Unknown error'           ;;
443   esac
444
445    else
446   echo 'Success' && TRUST_FLAG_ERROR='Code is reliable'
447    fi
448
449    ## Eventual comments from ocean.output
450    if [ "${TRUST_FLAG_ERROR}" == 'Crashed at time step' ]; then
451   comments 'E R R O R'
452   [ -e time.step ] && time_step=$( cat time.step )
453   TRUST_FLAG_ERROR+=' '$time_step
454    else
455   comments 'W A R N I N G'
456
457   if [ "${TRUST_FLAG_ERROR}" == 'Exceeded time limit' ]; then
458       TRUST_FLAG_ERROR+=' '$(( ${TRUST_JOB_TIMEOUT}/3600 ))'h'
459   fi
460
461    fi
462
463    ## Last messenger files
464    sed -i "2 s/.*/$TRUST_RESULT/"     ${file_rslt}
465    sed -i "2 s/.*/$TRUST_FLAG_ERROR/" ${file_stat}
466
467    ## Save tested configuration if trusting failed in production mode (-p|--prod)
468    if [[ ${TRUST_FLAG_RESULT} == 'FAILED' && ${TRUST_FLAG_PROD} -eq 1 ]]; then
469   echo 'Creating archive '${TRUST_TEST_BACKUP}' under '${TRUST_TEST_BENCHMARK}
470   tar -czf ${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}                 * \
471       -C   ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/MY_SRC . \
472       -C   ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}          \
473            cpp_${TRUST_CFG_NEW}.fcm
474    fi
475
476    ## Logfile construct & eventual sending of notification email
477    printf "\nTrusting digest:\n----------------\n"
478    log_make
479    prod_publish
480
481    exit 0
482}
Note: See TracBrowser for help on using the repository browser.