New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
trusting_func.sh in branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc – NEMO

source: branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc/trusting_func.sh @ 8843

Last change on this file since 8843 was 8843, checked in by nicolasmartin, 6 years ago

Review forcing inputs management + cosmetic changes

  • Property eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Rev URL
File size: 16.9 KB
Line 
1#!/bin/bash
2
3
4##--------------------------------------------------------------------------------
5## Messenger filenames
6##--------------------------------------------------------------------------------
7
8file_date=mesg_01_date.txt  ; file_rslt=mesg_02_result.txt
9file_stat=mesg_03_status.txt; file_nemo=mesg_04_nemo.txt
10file_xios=mesg_05_xios.txt  ; file_cmpf=mesg_06_compiler.txt
11file_lmpi=mesg_07_mpi.txt   ; file_ncdf=mesg_08_netcdf.txt
12file_inpt=mesg_09_inputs.txt; file_time=mesg_10_time.txt
13file_memy=mesg_11_memory.txt; file_note=mesg_12_comments.txt
14
15
16##--------------------------------------------------------------------------------
17## Functions in order of use
18##--------------------------------------------------------------------------------
19
20print_step() {
21    local char_nb=$( echo "$1" | wc -c )
22    local outline=$( printf "%${char_nb}s" )
23
24    printf "\n\nStep.....\n%s\n%s\n\n" "$1" ${outline// /-}
25}
26
27
28##
29##--------------------------------------------------------------------------------
30
31init() {
32    mkdir -p ${TRUST_TEST_DIR} ${TRUST_TEST_BENCHMARK} || get_out B
33    cd       ${TRUST_TEST_DIR}
34    echo     ${TRUST_TEST_DIR}
35
36    echo 'Date'             > ${file_date}; echo 'Result'   > ${file_rslt}
37    echo 'Status'           > ${file_stat}; echo 'NEMOGCM'  > ${file_nemo}
38    echo 'XIOS'          > ${file_xios}; echo 'Fortran'  > ${file_cmpf}
39    echo 'MPI'           > ${file_lmpi}; echo 'NetCDF'   > ${file_ncdf}
40    echo 'Inputs'        > ${file_inpt}; echo 'Time'     > ${file_time}
41    echo 'RAM (Phy./Virt.)' > ${file_memy}; echo 'Comments' > ${file_note}
42
43    ## 'FAILED' status with 'Unknown error' by default
44    echo ${TRUST_FLAG_RESULT} \
45   >> ${file_rslt}
46    echo 'Unknown error' \
47   >> ${file_stat}
48
49    ## UTC time zone for timestamping
50    local dat=$( date -ud "${TRUST_TEST_DATE}" +"%F %R %Z" )
51
52    echo $dat           \
53   >> ${file_date}
54}
55
56
57##
58##--------------------------------------------------------------------------------
59
60get_nemo_rev() {
61    local dir rev_loc
62    local rev=0
63
64    ## Loop on essential NEMO directories
65    for dir in ${TRUST_SVN_UP}; do
66
67   echo $dir && ${TRUST_SVN_ACTION} ${TRUST_SVN_NEMOGCM}/$dir || get_out C
68   rev_loc=$( svn info ${TRUST_SVN_NEMOGCM}/$dir         \
69                  | awk '/Last Changed Rev/ {print $NF}'   )
70
71   ## Keep last rev. nb
72   [ ${rev_loc} -gt $rev ] && rev=${rev_loc}
73    done
74
75    echo 'NEMOGCM '$rev \
76   >> model.log
77    echo "<a href=\"${TRUST_SVN_REPO}/nemo/changeset/$rev\" target=\"_blank\">$rev</a>" \
78   >> ${file_nemo}
79}
80
81
82##
83##--------------------------------------------------------------------------------
84
85get_soft_rel() {
86    local ver str
87
88    ## Sourcing environment
89    . ${TRUST_JOB_ENV} >& /dev/null
90
91    for str in ${TRUST_COMPILE_FORTRAN}                         \
92          ${TRUST_COMPILE_MPI}     ${TRUST_COMPILE_NETCDF} \
93          ${TRUST_IO_CDO}                                    ; do
94   [ -z "$str" ] && continue
95   ver=''
96
97   ## Extract version number after searching pattern in PATH env. variable
98   ver=$( echo $PATH | sed "s|.*\($str[0-9.]*\).*|\1|" )
99
100   ## option --version would work for main Fortran compilers and CDO
101   if [[ $str =~ ${TRUST_COMPILE_FORTRAN}|${TRUST_IO_CDO} ]]; then
102       ver=$( $str --version 2>&1 | grep -m1 -oe '\<[0-9. ]*\>' \
103             | xargs echo $str                               )
104   fi
105
106   ## Cleaning characters string to display proper soft name
107   ver=$( echo $ver | sed 's|[/-]| |g' )
108
109   echo $ver        \
110       >> model.log
111    done
112
113    sed -n 2p model.log \
114   >> ${file_cmpf}
115    sed -n 3p model.log \
116   >> ${file_lmpi}
117    sed -n 4p model.log \
118   >> ${file_ncdf}
119
120    cat model.log | awk '{printf "%-20s %s %s\n", $1, $2, $3}'
121    env | sort > env.log
122}
123
124
125##
126##--------------------------------------------------------------------------------
127
128compile_xios() {
129    cd ${TRUST_IO_XIOS}
130
131    rev=$( svn info | awk '/Last Changed Rev/ {print $NF}' )
132    printf 'XIOS                 branch %s rev. %s\n' \
133   $( basename ${TRUST_IO_XIOS} ) $rev           \
134   | tee -a ${TRUST_TEST_DIR}/model.log
135    echo "<a href=\"${TRUST_SVN_REPO}/ioserver/changeset/$rev target=\"_blank\">$rev</a>" \
136   >> ${TRUST_TEST_DIR}/${file_xios}
137
138    eval "
139    ./make_xios ${TRUST_IO_XIOS_MODE} --arch ${TRUST_MAIN_HPCC}     \
140                                 --job  ${TRUST_COMPILE_NPROC} \
141   ${TRUST_MAIN_STDOUT}
142    "
143
144    [ ! -e ./lib/libxios.a ] && get_out D || echo 'Success'
145}
146
147
148##
149##--------------------------------------------------------------------------------
150
151compile_nemo() {
152    cd ${TRUST_SVN_NEMOGCM}/CONFIG
153
154    ## Recompiling from scratch if not in debug mode
155    if [[ -d ${TRUST_CFG_NEW} && ${TRUST_FLAG_DEBUG} == 'false' ]]; then
156   ./makenemo -n ${TRUST_CFG_NEW} clean_config \
157       > /dev/null <<EOF
158y
159EOF
160    fi
161
162    eval "
163    ./makenemo -n ${TRUST_CFG_NEW}   -r ${TRUST_CFG_REF}       \
164               -m ${TRUST_MAIN_HPCC} -j ${TRUST_COMPILE_NPROC} \
165               ${TRUST_CFG_KEY_ADD} ${TRUST_CFG_KEY_DEL}       \
166   ${TRUST_MAIN_STDOUT}
167    "
168
169    [ ! -e ./${TRUST_CFG_NEW}/BLD/bin/nemo.exe ] && get_out E || echo 'Success'
170}
171
172
173##
174##--------------------------------------------------------------------------------
175
176get_inputs() {
177    cd ${TRUST_TEST_DIR}
178
179    ## Test forcing directory
180    if [[ ! ${TRUST_IO_FORC_PATH} && ${TRUST_IO_FORC_PATH-_} ]]; then
181
182   echo 'No forcing files needed'
183
184    else
185
186   ## Test forcing archive
187   if [ -n "${TRUST_IO_FORC_TAR}" ]; then
188
189       echo 'Forcing archive(s): '${TRUST_IO_FORC_TAR}
190
191       # List archive content & extract it by default
192       local inputs_list="
193            for archive in \${TRUST_IO_FORC_TAR}; do
194      tar -tvf \${TRUST_IO_FORC_PATH}/\$archive >> inputs_list.txt;
195            done
196            "
197       local inputs_get="
198            for archive in \${TRUST_IO_FORC_TAR}; do
199      tar -vxf \${TRUST_IO_FORC_PATH}/\$archive  >       /dev/null;
200            done
201            "
202
203   else
204
205       echo 'Forcing directory: '${TRUST_IO_FORC_PATH}
206
207       ## List & copy files without archive
208            local inputs_list=" ls -lh \${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
209            local inputs_get=" \cp     \${TRUST_IO_FORC_PATH}/* .                 "
210   fi
211
212    fi
213
214    eval ${inputs_list}
215    eval ${inputs_get}
216
217    if [ $( find -name '*.gz' -print -quit ) ]; then
218   find . -name '*.gz' -exec gzip -d {} \;
219    fi
220
221    cp   ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/cpp_* .
222    find ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/EXP00 \
223    -regex '.*\(_cfg\|.in\|opa\|_ref\|.xml\)'          \
224    -exec  cp {} . \;
225}
226
227
228##
229##--------------------------------------------------------------------------------
230
231diff_inputs() {
232    local dif file
233    local files_list='' mesg='Same' 
234
235    ###################################
236    ## Think of copying initial test ##
237    ###################################
238
239    ## Simple diff
240    for file in cpp_* 'inputs_list.txt' *namelist_* *.xml; do
241   dif=''
242
243   ## Continue even if input file is not in here (see after)
244   if [ -e ${TRUST_TEST_BENCHMARK}/$file ]; then
245       dif=$( diff -q $file ${TRUST_TEST_BENCHMARK}/$file )
246   else
247       dif=0
248   fi
249
250   ## Pass over useless file omission in benckmark directory
251   if [[ -n "$dif" && "$dif" != '0' ]]; then
252       mesg='Different'
253       echo $dif
254       files_list+=$file' '
255   fi
256
257    done
258
259    [ $mesg == 'Same' ] && echo $mesg
260    echo $mesg          \
261   >> ${file_inpt}
262
263    ## List different files for web comment
264    [ -n "${files_list}" ] && echo 'Inputs  : '${files_list}'differ<br>' \
265   >> temp_${file_note}
266}
267
268
269##
270##--------------------------------------------------------------------------------
271
272job_submit() {
273    ## Copy the submitting script to testing folder
274    cp ${TRUST_JOB_SCRIPT} ${TRUST_TEST_DIR}
275
276    TRUST_JOB_ID=$( eval ${TRUST_JOB_SUBMIT} )
277    [ $? -ne 0 ] && get_out G || printf "Success (job ID %s)\n" ${TRUST_JOB_ID}
278}
279
280
281##
282##--------------------------------------------------------------------------------
283
284job_pending() {
285    local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
286
287    sleep ${time_increment}
288
289    ## Append a log file while pending
290    while [[ $( eval ${TRUST_JOB_STATE} )                \
291        && ${time_elapsed} -lt ${TRUST_JOB_TIMEOUT}   ]]; do
292   printf "\n%s\n" ${outline// /#} \
293       >> computation.log
294   [ -n "${TRUST_JOB_INFO}" ] && eval ${TRUST_JOB_INFO} \
295       >> computation.log
296   sleep ${time_increment}
297   time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
298    done
299
300    sleep ${time_increment}
301
302    ## Kill remaining job & stop the test if it's too long
303    if [ ${time_elapsed} -eq ${TRUST_JOB_TIMEOUT} ]; then
304   eval ${TRUST_JOB_KILL} &> /dev/null
305   get_out I
306    fi
307
308}
309
310
311##
312##--------------------------------------------------------------------------------
313
314job_perfs() {
315    if [ -n "${TRUST_JOB_TIME}" ]; then
316
317    ## Interest for checking unusual time computation
318   local time_cpu=$( eval ${TRUST_JOB_TIME} )
319
320   printf "Time: "
321   echo ${time_cpu} | tee -a ${file_time}
322
323    fi
324
325    if [[ -n "${TRUST_JOB_RAM_P}" && -n "${TRUST_JOB_RAM_V}" ]]; then
326
327    ## Interest for checking unusual memory usage
328   local memory_pmax=$( eval ${TRUST_JOB_RAM_P} )
329   local memory_vmax=$( eval ${TRUST_JOB_RAM_V} )
330
331   printf "Max memory usage (physical/virtual): "
332   echo ${memory_pmax}' / '${memory_vmax} | tee -a ${file_memy}
333    fi
334}
335
336
337##
338##--------------------------------------------------------------------------------
339
340job_state() {
341    if [[ ! -e time.step || $( grep 'E R R O R' ocean.output ) ]]; then
342   get_out H
343    else
344   echo 'Success' ## Must be reviewed
345    fi
346}
347
348
349##
350##--------------------------------------------------------------------------------
351
352diff_results() {
353    local file
354    local files_list='' mesg='Same'
355
356    ###################################
357    ## Think of copying initial test ##
358    ###################################
359
360    ## Now test is good by default ('OK')
361    TRUST_FLAG_RESULT='OK'
362
363    ## Simple diff
364    for file in 'ocean.output' *.stat; do
365   ## Stop if no minimal benchmark files (ocean.output, eventual stat files)
366   [ ! -e ${TRUST_TEST_BENCHMARK}/$file ] && get_out J
367
368   diff -q $file ${TRUST_TEST_BENCHMARK}/$file
369
370   ## Continue even if it differs
371   if [ $? -ne 0 ]; then mesg='Different'; files_list+=$file' '; fi
372
373    done
374
375    [ $mesg == 'Same' ] && echo $mesg
376
377    ## List different files for web comment
378    [ -n "${files_list}" ] && echo 'Results : '${files_list}'differ<br>' \
379   >> temp_${file_note}
380}
381
382
383##
384##--------------------------------------------------------------------------------
385
386diff_restarts() {
387    local dif filebase filebases ndomain out
388    local files_list='' dif_sum='0'
389
390    ## Find all restart files to rebuild
391    if [ $( find -regex ".*_restart.*[0-9]\.nc" -print -quit ) ]; then
392        ###############################################################
393   ## Think to set the configuration name in the 'namelist_cfg' ##
394   ###############################################################
395   filebases=$( find -regextype sed -regex ".*${TRUST_CFG_NEW}.*_[0-9]\{4\}\.nc" \
396                    | sed 's/\(.*\)_.*/\1/' | sort -u                              )
397
398   for filebase in $filebases; do
399
400       ndomain=$( find -regex ".*${filebase}_[0-9]*.nc" \
401                 | wc -l | awk '{print $1}'          )
402
403            #####################################################
404            ## Handle 2 possibilities of 'rebuild_nemo' origin ##
405            #####################################################
406
407       ${TRUST_SVN_NEMOGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo \
408      -t ${TRUST_COMPILE_NPROC} $filebase $ndomain       \
409      > /dev/null
410
411       ## Possibility of remaining decomposed restarts (even after rebuild)
412       if [ $? -eq 0 ]; then
413      rm -f ${filebase}_[0-9]*.nc \
414                    > /dev/null
415       else
416      get_out K
417       fi
418
419            ## Stop if no benchmark files (restart file)
420       if [ -e ${TRUST_TEST_BENCHMARK}/$filebase.nc ]; then
421
422      #bcmk='true'
423      cdo diffn $filebase.nc ${TRUST_TEST_BENCHMARK}/$filebase.nc \
424          > cdo_diff.out 2> /dev/null
425
426      ## Identical if cdo_diff.out is zero size
427      [ ! -s cdo_diff.out ] && continue
428
429      dif=$( grep -om1 '[0-9]* of [0-9]* records' cdo_diff.out )
430
431      if [ -n "$dif" ]; then
432          files_list+=$filebase' ' && echo $filebase'.nc: '$dif
433          let dif_sum+=$( echo $dif | sed '|^\([0-9]*\).*|\1|' )
434      fi
435
436       fi
437
438   done
439
440        ## List modified restart(s) for web comment with sum of differences
441   if [ ${dif_sum} -ne 0 ]; then
442       echo 'Restarts: '${files_list}${dif_sum}' record(s) differ<br>' \
443      >> temp_${file_note}
444   else
445       echo 'Same'
446   fi
447
448    fi
449
450    [ $TRUST_FLAG_RESULT == 'FAILED' ] && get_out L
451}
452
453
454##
455##--------------------------------------------------------------------------------
456
457comments() {
458    local opat
459    local line='' state=$1
460
461    if [ -e ocean.output ]; then
462        ## 'W A R N I N G' pattern by default
463   opat="-A2 \"^ $state\""
464   [ "$state" == 'E R R O R' ] && opat="-A4 \"$state\""
465
466        ## Select first occurence for web comment
467   line=$( eval grep -m1 $opat ocean.output | tr -d '\n' )
468    fi
469
470    [ -n "$line" ] && ( echo $line; printf "$line<br>" \
471   >> temp_${file_note} )
472}
473
474
475##
476##--------------------------------------------------------------------------------
477
478log_make() {
479    ## Format comments for web
480    if [ -e temp_${file_note} ]; then
481   cat temp_${file_note} | tr -d '\n' | sed 's/<br>$//' \
482       >> ${file_note}
483    fi
484
485    ## Construct txt file with all messenger files
486    paste -d ';' mesg_*.txt | tee ${TRUST_TEST_SUMMARY}
487}
488
489
490##
491##--------------------------------------------------------------------------------
492
493prod_publish() {
494    local cmd
495    local rev=$( awk '/NEMOGCM/ {print $NF}' model.log )
496
497    ## Production mode (-p|--prod)
498    if [ ${TRUST_FLAG_PROD} == 'true' ]; then
499
500   ## Create or append trusting logfile
501   if [ -f ${TRUST_TEST_LOG} ]; then cmd='tail -1'; else cmd='cat'; fi
502
503   $cmd ${TRUST_TEST_SUMMARY} \
504       >> ${TRUST_TEST_LOG}
505
506        ## Send mail only when FAILED
507   if [[ -n "${TRUST_TEST_MAILING}"          \
508         && ${TRUST_FLAG_RESULT} == 'FAILED'   ]]; then
509
510       ## Content
511       cat <<END_MAIL      \
512      > trusting.mail
513Dear all,
514
515
516The following trusting sequence has not completed successfully:
517
518Testing configuration ${TRUST_CFG_NEW} based on ${TRUST_CFG_REF}.
519User installation ${TRUST_MAIN_USER}
520HPC environment ${TRUST_MAIN_HPCC}
521
522Here is the running environment summary:
523`cat model.log`
524
525For more details, look into the testing folder at:
526${TRUST_TEST_DIR}
527An archive is also available to share the questionable configuration:
528${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}
529
530END_MAIL
531
532       ## Send with detailed subject
533       mail -s "[NEMO Trusting][${TRUST_CFG_REF}][${TRUST_SVN_BRANCH}] \
534           ${TRUST_FLAG_RESULT} ${TRUST_FLAG_ERROR}"              \
535       ${TRUST_TEST_MAILING}                                      \
536      <  trusting.mail
537   fi
538
539    fi
540}
541
542
543##
544##--------------------------------------------------------------------------------
545
546get_out() {
547    local time_step=0
548
549    TRUST_FLAG_ERROR=$1
550
551    printf "\n\nEnd of test\n"
552
553    ## In case of compilation error
554    cd ${TRUST_TEST_DIR}
555
556    if [ ${TRUST_FLAG_RESULT} == 'FAILED' ]; then
557   echo 'Failure'
558
559        ## Error identification
560   case ${TRUST_FLAG_ERROR} in
561
562       ## Initialisation
563       'A') TRUST_FLAG_ERROR='Missing environment variable'      ;;
564       'B') TRUST_FLAG_ERROR='Unable to create testing directory';;
565       'C') TRUST_FLAG_ERROR='SVN issue on local working copy'   ;;
566
567            ## Compilation
568       'D') TRUST_FLAG_ERROR='XIOS compilation failed'           ;;
569       'E') TRUST_FLAG_ERROR='NEMO compilation failed'           ;;
570
571       ## Submission
572       'F') TRUST_FLAG_ERROR='Missing input files'               ;;
573       'G') TRUST_FLAG_ERROR='Job submission error'              ;;
574
575       ## Computing
576       'H') TRUST_FLAG_ERROR='Crashed at time step'              ;;
577       'I') TRUST_FLAG_ERROR='Exceeded time limit'               ;;
578
579       ## Results
580       'J') TRUST_FLAG_ERROR='Missing previous outputs'          ;;
581       'K') TRUST_FLAG_ERROR='Restart rebuild error'             ;;
582       'L') TRUST_FLAG_ERROR='New outputs differ'                ;;
583
584       ## Other
585       '*') TRUST_FLAG_ERROR='Unknown error'                     ;;
586
587   esac
588
589    else
590   echo 'Success' && TRUST_FLAG_ERROR='Code is reliable'
591    fi
592
593    ## Eventual comments from ocean.output
594    if [ ${TRUST_FLAG_ERROR} == 'Crashed at time step' ]; then
595   comments 'E R R O R'
596   [ -e time.step ] && time_step=$( cat time.step )
597   TRUST_FLAG_ERROR+=' '${time_step:=0}
598    else
599   comments 'W A R N I N G'
600
601   if [ ${TRUST_FLAG_ERROR} == 'Exceeded time limit' ]; then
602       TRUST_FLAG_ERROR+=' '$(( ${TRUST_JOB_TIMEOUT}/3600 ))'h'
603   fi
604
605    fi
606
607    ## Last messenger files
608    sed -i "2 s/.*/$TRUST_FLAG_RESULT/" ${file_rslt}
609    sed -i "2 s/.*/$TRUST_FLAG_ERROR/"  ${file_stat}
610
611    ## Save tested configuration if trusting failed in production mode (-p|--prod)
612    if [[ ${TRUST_FLAG_RESULT} == 'FAILED' && ${TRUST_FLAG_PROD} == 'true' ]]; then
613   echo 'Creating archive '${TRUST_TEST_BACKUP}' under '${TRUST_TEST_BENCHMARK}
614   tar -czf ${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}                 * \
615       -C   ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/MY_SRC . \
616       -C   ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}          \
617            cpp_${TRUST_CFG_NEW}.fcm
618    fi
619
620    ## Logfile construct & eventual sending of notification email
621    printf "\nTrusting digest:\n----------------\n"
622    log_make
623    prod_publish
624
625    exit 0
626}
Note: See TracBrowser for help on using the repository browser.