New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
trusting_func.sh in branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc – NEMO

source: branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc/trusting_func.sh @ 8859

Last change on this file since 8859 was 8859, checked in by nicolasmartin, 6 years ago

Continuation of global refactoring of Trusting tool

  • Introduce new 'dev' mode beetween 'debug' and 'prod' modes to skip XIOS compilation from scratch and working in a solely testing directory (thanks Andrew for the suggestion)
  • Switch cfg files from 'svn:mime-type=text/x-shellscript' to 'svn:executable' trying to get syntax highlighting under Trac
  • Bugfixes: correct XIOS branch 'XIOS_DEV_CMIP6' for ORCA1 and remove remaining 'TRUST_DIR_BENCHMARK' (replace by TRUST_TEST_BENCHMARK)
  • Improve the workflow displayed in the terminal
  • Property svn:executable set to *
File size: 16.9 KB
Line 
1#!/bin/bash
2
3
4##--------------------------------------------------------------------------------
5## Messenger filenames
6##--------------------------------------------------------------------------------
7
8file_date=mesg_01_date.txt  ; file_rslt=mesg_02_result.txt
9file_stat=mesg_03_status.txt; file_nemo=mesg_04_nemo.txt
10file_xios=mesg_05_xios.txt  ; file_cmpf=mesg_06_compiler.txt
11file_lmpi=mesg_07_mpi.txt   ; file_ncdf=mesg_08_netcdf.txt
12file_inpt=mesg_09_inputs.txt; file_time=mesg_10_time.txt
13file_memy=mesg_11_memory.txt; file_note=mesg_12_comments.txt
14
15
16##--------------------------------------------------------------------------------
17## Functions in order of use
18##--------------------------------------------------------------------------------
19
20step() {
21    local char_nb=$( echo "$1" | wc -c )
22    local outline=$( printf "%${char_nb}s" )
23
24    printf "\n%s\n%s\n\n" "$1" ${outline// /-}
25}
26
27
28##
29##--------------------------------------------------------------------------------
30
31init() {
32    mkdir -p ${TRUST_TEST_DIR} ${TRUST_TEST_BENCHMARK} || get_out B
33    cd       ${TRUST_TEST_DIR}
34    echo     ${TRUST_TEST_DIR}
35
36    echo 'Date'             > ${file_date}; echo 'Result'   > ${file_rslt}
37    echo 'Status'           > ${file_stat}; echo 'NEMOGCM'  > ${file_nemo}
38    echo 'XIOS'          > ${file_xios}; echo 'Fortran'  > ${file_cmpf}
39    echo 'MPI'           > ${file_lmpi}; echo 'NetCDF'   > ${file_ncdf}
40    echo 'Inputs'        > ${file_inpt}; echo 'Time'     > ${file_time}
41    echo 'RAM (Phy./Virt.)' > ${file_memy}; echo 'Comments' > ${file_note}
42
43    ## 'FAILED' status with 'Unknown error' by default
44    echo ${TRUST_FLAG_RESULT} \
45   >> ${file_rslt}
46    echo 'Unknown error' \
47   >> ${file_stat}
48
49    ## UTC time zone for timestamping
50    local dat=$( date -ud "${TRUST_TEST_DATE}" +"%F %R %Z" )
51
52    echo $dat           \
53   >> ${file_date}
54}
55
56
57##
58##--------------------------------------------------------------------------------
59
60get_nemo_rev() {
61    local dir rev_loc
62    local rev=0
63
64    ## Loop on essential NEMO directories
65    for dir in ${TRUST_SVN_UP}; do
66
67   echo $dir && ${TRUST_SVN_ACTION} ${TRUST_SVN_NEMOGCM}/$dir || get_out C
68   rev_loc=$( svn info ${TRUST_SVN_NEMOGCM}/$dir         \
69                  | awk '/Last Changed Rev/ {print $NF}'   )
70
71   ## Keep last rev. nb
72   [ ${rev_loc} -gt $rev ] && rev=${rev_loc}
73    done
74
75    echo 'NEMOGCM '$rev \
76   >> model.log
77    echo "<a href=\"${TRUST_SVN_REPO}/nemo/changeset/$rev\" target=\"_blank\">$rev</a>" \
78   >> ${file_nemo}
79}
80
81
82##
83##--------------------------------------------------------------------------------
84
85get_soft_rel() {
86    local ver str
87
88    ## Sourcing environment
89    . ${TRUST_JOB_ENV} >& /dev/null
90
91    for str in ${TRUST_COMPILE_FORTRAN}                         \
92          ${TRUST_COMPILE_MPI}     ${TRUST_COMPILE_NETCDF} \
93          ${TRUST_IO_CDO}                                    ; do
94   [ -z "$str" ] && continue
95   ver=''
96
97   ## Extract version number after searching pattern in PATH env. variable
98   ver=$( echo $PATH | sed "s|.*\($str[0-9.]*\).*|\1|" )
99
100   ## option --version would work for main Fortran compilers and CDO
101   if [[ $str =~ ${TRUST_COMPILE_FORTRAN}|${TRUST_IO_CDO} ]]; then
102       ver=$( $str --version 2>&1 | grep -m1 -oe '\<[0-9. ]*\>' \
103             | xargs echo $str                               )
104   fi
105
106   ## Cleaning characters string to display proper soft name
107   ver=$( echo $ver | sed 's|[/-]| |g' )
108
109   echo $ver        \
110       >> model.log
111    done
112
113    sed -n 2p model.log \
114   >> ${file_cmpf}
115    sed -n 3p model.log \
116   >> ${file_lmpi}
117    sed -n 4p model.log \
118   >> ${file_ncdf}
119
120    cat model.log | awk '{printf "%-20s %s %s\n", $1, $2, $3}'
121    env | sort > env.log
122}
123
124
125##
126##--------------------------------------------------------------------------------
127
128compile_xios() {
129    cd ${TRUST_IO_XIOS}
130
131    rev=$( svn info | awk '/Last Changed Rev/ {print $NF}' )
132    printf 'XIOS                 branch %s rev. %s\n' \
133   $( basename ${TRUST_IO_XIOS} ) $rev           \
134   | tee -a ${TRUST_TEST_DIR}/model.log
135    echo "<a href=\"${TRUST_SVN_REPO}/ioserver/changeset/$rev target=\"_blank\">$rev</a>" \
136   >> ${TRUST_TEST_DIR}/${file_xios}
137
138    eval "
139    ./make_xios ${TRUST_IO_XIOS_MODE} --arch ${TRUST_MAIN_HPCC}     \
140                                 --job  ${TRUST_COMPILE_NPROC} \
141   ${TRUST_MAIN_STDOUT}
142    "
143
144    [ ! -e ./lib/libxios.a ] && get_out D || echo 'Success'
145}
146
147
148##
149##--------------------------------------------------------------------------------
150
151compile_nemo() {
152    cd ${TRUST_SVN_NEMOGCM}/CONFIG
153
154    ## Recompiling from scratch if not in debug or dev mode
155    if [[ ${TRUST_FLAG_DEBUG} == 'false' && ${TRUST_FLAG_DEV} == 'false' ]]; then
156
157     if [[ -d ${TRUST_CFG_NEW} ]]; then
158         ./makenemo -n ${TRUST_CFG_NEW} clean_config \
159         > /dev/null <<EOF
160y
161EOF
162     fi
163
164    fi
165
166    eval "
167    ./makenemo -n ${TRUST_CFG_NEW}   -r ${TRUST_CFG_REF}       \
168               -m ${TRUST_MAIN_HPCC} -j ${TRUST_COMPILE_NPROC} \
169               ${TRUST_CFG_KEY_ADD} ${TRUST_CFG_KEY_DEL}       \
170   ${TRUST_MAIN_STDOUT}
171    "
172
173    [ ! -e ./${TRUST_CFG_NEW}/BLD/bin/nemo.exe ] && get_out E || echo 'Success'
174}
175
176
177##
178##--------------------------------------------------------------------------------
179
180get_inputs() {
181    cd ${TRUST_TEST_DIR}
182
183    ## Test forcing directory
184    if [[ ! ${TRUST_IO_FORC_PATH} && ${TRUST_IO_FORC_PATH-_} ]]; then
185
186   echo 'No forcing files needed'
187
188    else
189
190   ## Test forcing archive
191   if [ -n "${TRUST_IO_FORC_TAR}" ]; then
192
193       echo 'Forcing archive(s): '${TRUST_IO_FORC_TAR}
194
195       # List archive content & extract it by default
196       local inputs_list="
197            for archive in \${TRUST_IO_FORC_TAR}; do
198      tar -tvf \${TRUST_IO_FORC_PATH}/\$archive >> inputs_list.txt;
199            done
200            "
201       local inputs_get="
202            for archive in \${TRUST_IO_FORC_TAR}; do
203      tar -vxf \${TRUST_IO_FORC_PATH}/\$archive  >       /dev/null;
204            done
205            "
206
207   else
208
209       echo 'Forcing directory: '${TRUST_IO_FORC_PATH}
210
211       ## List & copy files without archive
212            local inputs_list=" ls -lh \${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
213            local inputs_get=" \cp     \${TRUST_IO_FORC_PATH}/* .                 "
214   fi
215
216    fi
217
218    eval ${inputs_list}
219    eval ${inputs_get}
220
221    if [ $( find -name '*.gz' -print -quit ) ]; then
222   find . -name '*.gz' -exec gzip -d {} \;
223    fi
224
225    cp   ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/cpp_* .
226    find ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/EXP00 \
227    -regex '.*\(_cfg\|.in\|opa\|_ref\|.xml\)'          \
228    -exec  cp {} . \;
229}
230
231
232##
233##--------------------------------------------------------------------------------
234
235diff_inputs() {
236    local dif file
237    local files_list='' mesg='Same' 
238
239    ###################################
240    ## Think of copying initial test ##
241    ###################################
242
243    ## Simple diff
244    for file in cpp_* 'inputs_list.txt' *namelist_* *.xml; do
245   dif=''
246
247   ## Continue even if input file is not in here (see after)
248   if [ -e ${TRUST_TEST_BENCHMARK}/$file ]; then
249       dif=$( diff -q $file ${TRUST_TEST_BENCHMARK}/$file )
250   else
251       dif=0
252   fi
253
254   ## Pass over useless file omission in benckmark directory
255   if [[ -n "$dif" && "$dif" != '0' ]]; then
256       mesg='Different'
257       echo $dif
258       files_list+=$file' '
259   fi
260
261    done
262
263    [ $mesg == 'Same' ] && echo $mesg
264    echo $mesg          \
265   >> ${file_inpt}
266
267    ## List different files for web comment
268    [ -n "${files_list}" ] && echo 'Inputs  : '${files_list}'differ<br>' \
269   >> temp_${file_note}
270}
271
272
273##
274##--------------------------------------------------------------------------------
275
276job_submit() {
277    ## Copy the submitting script to testing folder
278    cp ${TRUST_JOB_SCRIPT} ${TRUST_TEST_DIR}
279
280    TRUST_JOB_ID=$( eval ${TRUST_JOB_SUBMIT} )
281    [ $? -ne 0 ] && get_out G || printf "Success (job ID %s)\n" ${TRUST_JOB_ID}
282}
283
284
285##
286##--------------------------------------------------------------------------------
287
288job_pending() {
289    local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
290
291    sleep ${time_increment}
292
293    ## Append a log file while pending
294    while [[ $( eval ${TRUST_JOB_STATE} )                \
295        && ${time_elapsed} -lt ${TRUST_JOB_TIMEOUT}   ]]; do
296   printf "\n%s\n" ${outline// /#} \
297       >> computation.log
298   [ -n "${TRUST_JOB_INFO}" ] && eval ${TRUST_JOB_INFO} \
299       >> computation.log
300   sleep ${time_increment}
301   time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
302    done
303
304    sleep ${time_increment}
305
306    ## Kill remaining job & stop the test if it's too long
307    if [ ${time_elapsed} -eq ${TRUST_JOB_TIMEOUT} ]; then
308   eval ${TRUST_JOB_KILL} &> /dev/null
309   get_out I
310    fi
311
312}
313
314
315##
316##--------------------------------------------------------------------------------
317
318job_perfs() {
319    if [ -n "${TRUST_JOB_TIME}" ]; then
320
321    ## Interest for checking unusual time computation
322   local time_cpu=$( eval ${TRUST_JOB_TIME} )
323
324   printf "Time: "
325   echo ${time_cpu} | tee -a ${file_time}
326
327    fi
328
329    if [[ -n "${TRUST_JOB_RAM_P}" && -n "${TRUST_JOB_RAM_V}" ]]; then
330
331    ## Interest for checking unusual memory usage
332   local memory_pmax=$( eval ${TRUST_JOB_RAM_P} )
333   local memory_vmax=$( eval ${TRUST_JOB_RAM_V} )
334
335   printf "Max memory usage (physical/virtual): "
336   echo ${memory_pmax}' / '${memory_vmax} | tee -a ${file_memy}
337    fi
338}
339
340
341##
342##--------------------------------------------------------------------------------
343
344job_state() {
345    if [[ ! -e time.step || $( grep 'E R R O R' ocean.output ) ]]; then
346   get_out H
347    else
348   echo 'Success' ## Must be reviewed
349    fi
350}
351
352
353##
354##--------------------------------------------------------------------------------
355
356diff_results() {
357    local file
358    local files_list='' mesg='Same'
359
360    ###################################
361    ## Think of copying initial test ##
362    ###################################
363
364    ## Now test is good by default ('OK')
365    TRUST_FLAG_RESULT='OK'
366
367    ## Simple diff
368    for file in 'ocean.output' *.stat; do
369   ## Stop if no minimal benchmark files (ocean.output, eventual stat files)
370   [ ! -e ${TRUST_TEST_BENCHMARK}/$file ] && get_out J
371
372   diff -q $file ${TRUST_TEST_BENCHMARK}/$file
373
374   ## Continue even if it differs
375   if [ $? -ne 0 ]; then mesg='Different'; files_list+=$file' '; fi
376
377    done
378
379    [ $mesg == 'Same' ] && echo $mesg
380
381    ## List different files for web comment
382    [ -n "${files_list}" ] && echo 'Results : '${files_list}'differ<br>' \
383   >> temp_${file_note}
384}
385
386
387##
388##--------------------------------------------------------------------------------
389
390diff_restarts() {
391    local dif filebase filebases ndomain out
392    local files_list='' dif_sum='0'
393
394    ## Find all restart files to rebuild
395    if [ $( find -regex ".*_restart.*[0-9]\.nc" -print -quit ) ]; then
396        ###############################################################
397   ## Think to set the configuration name in the 'namelist_cfg' ##
398   ###############################################################
399   filebases=$( find -regextype sed -regex ".*${TRUST_CFG_NEW}.*_[0-9]\{4\}\.nc" \
400                    | sed 's/\(.*\)_.*/\1/' | sort -u                              )
401
402   for filebase in $filebases; do
403
404       ndomain=$( find -regex ".*${filebase}_[0-9]*.nc" \
405                 | wc -l | awk '{print $1}'          )
406
407            #####################################################
408            ## Handle 2 possibilities of 'rebuild_nemo' origin ##
409            #####################################################
410
411       ${TRUST_SVN_NEMOGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo \
412      -t ${TRUST_COMPILE_NPROC} $filebase $ndomain       \
413      > /dev/null
414
415       ## Possibility of remaining decomposed restarts (even after rebuild)
416       if [ $? -eq 0 ]; then
417      rm -f ${filebase}_[0-9]*.nc \
418                    > /dev/null
419       else
420      get_out K
421       fi
422
423            ## Stop if no benchmark files (restart file)
424       if [ -e ${TRUST_TEST_BENCHMARK}/$filebase.nc ]; then
425
426      cdo diffn $filebase.nc ${TRUST_TEST_BENCHMARK}/$filebase.nc \
427          > cdo_diff.out 2> /dev/null
428
429      ## Identical if cdo_diff.out is zero size
430      [ ! -s cdo_diff.out ] && continue
431
432      dif=$( grep -om1 '[0-9]* of [0-9]* records' cdo_diff.out )
433
434      if [ -n "$dif" ]; then
435          files_list+=$filebase' ' && echo $filebase'.nc: '$dif
436          let dif_sum+=$( echo $dif | sed '|^\([0-9]*\).*|\1|' )
437      fi
438
439       fi
440
441   done
442
443        ## List modified restart(s) for web comment with sum of differences
444   if [ ${dif_sum} -ne 0 ]; then
445       echo 'Restarts: '${files_list}${dif_sum}' record(s) differ<br>' \
446      >> temp_${file_note}
447   else
448       echo 'Same'
449   fi
450
451    fi
452
453    [ $TRUST_FLAG_RESULT == 'FAILED' ] && get_out L
454}
455
456
457##
458##--------------------------------------------------------------------------------
459
460comments() {
461    local opat
462    local line='' state=$1
463
464    if [ -e ocean.output ]; then
465        ## 'W A R N I N G' pattern by default
466   opat="-A2 \"^ $state\""
467   [ "$state" == 'E R R O R' ] && opat="-A4 \"$state\""
468
469        ## Select first occurence for web comment
470   line=$( eval grep -m1 $opat ocean.output | tr -d '\n' )
471    fi
472
473    [ -n "$line" ] && ( echo $line; printf "$line<br>" \
474   >> temp_${file_note} )
475}
476
477
478##
479##--------------------------------------------------------------------------------
480
481log_make() {
482    ## Format comments for web
483    if [ -e temp_${file_note} ]; then
484   cat temp_${file_note} | tr -d '\n' | sed 's/<br>$//' \
485       >> ${file_note}
486    fi
487
488    ## Construct txt file with all messenger files
489    paste -d ';' mesg_*.txt | tee ${TRUST_TEST_SUMMARY}
490}
491
492
493##
494##--------------------------------------------------------------------------------
495
496prod_publish() {
497    local cmd
498    local rev=$( awk '/NEMOGCM/ {print $NF}' model.log )
499
500    ## Production mode (-p|--prod)
501    if [ ${TRUST_FLAG_PROD} == 'true' ]; then
502
503   ## Create or append trusting logfile
504   if [ -f ${TRUST_TEST_LOG} ]; then cmd='tail -1'; else cmd='cat'; fi
505
506   $cmd ${TRUST_TEST_SUMMARY} \
507       >> ${TRUST_TEST_LOG}
508
509        ## Send mail only when FAILED
510   if [[ -n "${TRUST_TEST_MAILING}"          \
511         && ${TRUST_FLAG_RESULT} == 'FAILED'   ]]; then
512
513       ## Content
514       cat <<END_MAIL      \
515      > trusting.mail
516Dear all,
517
518
519The following trusting sequence has not completed successfully:
520
521Testing configuration ${TRUST_CFG_NEW} based on ${TRUST_CFG_REF}.
522User installation ${TRUST_MAIN_USER}
523HPC environment ${TRUST_MAIN_HPCC}
524
525Here is the running environment summary:
526`cat model.log`
527
528For more details, look into the testing folder at:
529${TRUST_TEST_DIR}
530An archive is also available to share the questionable configuration:
531${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}
532
533END_MAIL
534
535       ## Send with detailed subject
536       mail -s "[NEMO Trusting][${TRUST_CFG_REF}][${TRUST_SVN_BRANCH}] \
537           ${TRUST_FLAG_RESULT} ${TRUST_FLAG_ERROR}"              \
538       ${TRUST_TEST_MAILING}                                      \
539      <  trusting.mail
540   fi
541
542    fi
543}
544
545
546##
547##--------------------------------------------------------------------------------
548
549get_out() {
550    local time_step=0
551
552    TRUST_FLAG_ERROR=$1
553
554    printf "\n\nEnd of test\n"
555
556    ## In case of compilation error
557    cd ${TRUST_TEST_DIR}
558
559    if [ ${TRUST_FLAG_RESULT} == 'FAILED' ]; then
560   echo 'Failure'
561
562        ## Error identification
563   case ${TRUST_FLAG_ERROR} in
564
565       ## Initialisation
566       'A') TRUST_FLAG_ERROR='Missing environment variable'      ;;
567       'B') TRUST_FLAG_ERROR='Unable to create testing directory';;
568       'C') TRUST_FLAG_ERROR='SVN issue on local working copy'   ;;
569
570            ## Compilation
571       'D') TRUST_FLAG_ERROR='XIOS compilation failed'           ;;
572       'E') TRUST_FLAG_ERROR='NEMO compilation failed'           ;;
573
574       ## Submission
575       'F') TRUST_FLAG_ERROR='Missing input files'               ;;
576       'G') TRUST_FLAG_ERROR='Job submission error'              ;;
577
578       ## Computing
579       'H') TRUST_FLAG_ERROR='Crashed at time step '
580      comments 'E R R O R'
581      [ -e time.step ] && time_step=$( cat time.step )
582      TRUST_FLAG_ERROR+=${time_step:=0}                     ;;
583       'I') TRUST_FLAG_ERROR='Exceeded time limit of '
584      TRUST_FLAG_ERROR+=$(( ${TRUST_JOB_TIMEOUT}/3600 ))'h' ;;
585
586       ## Results
587       'J') TRUST_FLAG_ERROR='Missing previous outputs'          ;;
588       'K') TRUST_FLAG_ERROR='Restart rebuild error'             ;;
589       'L') TRUST_FLAG_ERROR='New outputs differ'                ;;
590
591       ## Other
592       '*') TRUST_FLAG_ERROR='Unknown error'                     ;;
593
594   esac
595
596    else
597   echo 'Success' && TRUST_FLAG_ERROR='Code is reliable'
598    fi
599
600    ## Eventual comments from ocean.output
601    [[ ! ${TRUST_FLAG_ERROR} =~ 'Crashed at time step' ]] && comments 'W A R N I N G'
602
603    ## Last messenger files
604    sed -i "2 s/.*/$TRUST_FLAG_RESULT/" ${file_rslt}
605    sed -i "2 s/.*/$TRUST_FLAG_ERROR/"  ${file_stat}
606
607    ## Save tested configuration if trusting failed in production mode ('-p')
608    if [[ ${TRUST_FLAG_RESULT} == 'FAILED' && ${TRUST_FLAG_PROD} == 'true' ]]; then
609   echo 'Creating archive '${TRUST_TEST_BACKUP}' under '${TRUST_TEST_BENCHMARK}
610   tar -czf ${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}                 * \
611       -C   ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/MY_SRC . \
612       -C   ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}          \
613            cpp_${TRUST_CFG_NEW}.fcm
614    fi
615
616    ## Logfile construct & eventual sending of notification email
617    printf "\nTrusting digest:\n----------------\n"
618    log_make
619    prod_publish
620
621    exit 0
622}
Note: See TracBrowser for help on using the repository browser.