New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
trusting_func.sh in branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc – NEMO

source: branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc/trusting_func.sh @ 8818

Last change on this file since 8818 was 8818, checked in by nicolasmartin, 6 years ago

Continuation of global refactoring of the tool, set cfg files mime property and modify trusting.env trying to handle default settings

  • Property eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Rev URL
File size: 13.4 KB
RevLine 
[5268]1#!/bin/bash
2
[5383]3
[5644]4## Messenger filenames
[8780]5file_date=mesg_01_date.txt  ; file_rslt=mesg_02_result.txt
[8797]6file_stat=mesg_03_status.txt; file_nemo=mesg_04_nemo.txt
[8780]7file_xios=mesg_05_xios.txt  ; file_cmpf=mesg_06_compiler.txt
8file_lmpi=mesg_07_mpi.txt   ; file_ncdf=mesg_08_netcdf.txt
9file_inpt=mesg_09_inputs.txt; file_time=mesg_10_time.txt
10file_memy=mesg_11_memory.txt; file_note=mesg_12_comments.txt
[5453]11
[5644]12
[5788]13## Functions in order of use
14print_step() {
15    local char_nb=$( echo "$1" | wc -c )
16    local outline=$( printf "%${char_nb}s" )
[5644]17
[5788]18    printf "\nStep.....\n%s\n%s\n" "$1" ${outline// /-}
19}
[5681]20
[5644]21init_files() {
[8808]22    echo 'Date'                > ${file_date}
23    echo 'Result'              > ${file_rslt}
24    echo 'Status'              > ${file_stat}
25    echo 'NEMOGCM rev.'        > ${file_nemo}
26    echo 'XIOS rev.'           > ${file_xios}
27    echo 'Fortran compiler'    > ${file_cmpf}
28    echo 'MPI libs'            > ${file_lmpi}
29    echo 'NetCDF libs'         > ${file_ncdf}
30    echo 'Input files'         > ${file_inpt}
31    echo 'Elapsed time'        > ${file_time}
32    echo 'Memory (Phy./Virt.)' > ${file_memy}
33    echo 'Comments'            > ${file_note}
[5664]34
[5690]35    ## 'Failed' status with 'Unknown error' by default
[8808]36    echo ${TRUST_FLAG_RESULT} \
[8780]37   >> ${file_rslt}
[5788]38    echo 'Unknown error' \
[8780]39   >> ${file_stat}
[5644]40}
41
42get_date() {
[5690]43    ## UTC time zone for timestamping
[8808]44    local dat=$( date -ud "${TRUST_TEST_DATE}" +"%F %R %Z" )
[5644]45
[5788]46    echo $dat           \
[8780]47   >> ${file_date}
[5644]48}
49
50get_nemo_rev() {
[5695]51    local dir rev_loc
[5799]52    local rev=0
[5644]53
[5788]54    ## Loop on essential NEMO directories
[8818]55    for dir in ${TRUST_SVN_CO} ${TRUST_IO_XIOS}; do
[5455]56
[5690]57   ## For time being, just get revision from XIOS with no action on directory
[8818]58   if [ $dir == ${TRUST_IO_XIOS} ]; then
[5799]59       rev_loc=$( svn info $dir | awk '/Last Changed Rev/ {print $NF}' )
[5689]60       echo 'XIOS '${rev_loc} \
61      >> model.log
[8818]62       echo "<a href=\"https://forge.ipsl.jussieu.fr/ioserver/changeset/${rev_loc}\" target=\"_blank\">${rev_loc}</a>"
[8780]63      >> ${file_xios}
[5644]64       continue
65   fi
66
[8808]67   echo $dir && ${TRUST_SVN_ACTION} ${TRUST_DIR_NEMOGCM}/$dir
68   rev_loc=$( svn info ${TRUST_DIR_NEMOGCM}/$dir  \
69              | awk '/Last Changed Rev/ {print $NF}'   )
[5690]70
71   ## Keep last rev. nb
[5689]72   [ ${rev_loc} -gt $rev ] && rev=${rev_loc}
[5644]73    done
74
[5689]75    echo 'NEMOGCM '$rev \
76   >> model.log
[8818]77    echo "<a href=\"https://forge.ipsl.jussieu.fr/nemo/changeset/$rev\" target=\"_blank\">$rev</a>"
[8780]78   >> ${file_nemo}
[5438]79}
80
[5644]81get_soft_rel() {
[8808]82    local ver str
[5509]83
[5934]84    ## Sourcing environment
[8808]85    . ${TRUST_JOB_ENV}
[5788]86
[8808]87    for str in ${TRUST_COMPILE_FORTRAN}                         \
88          ${TRUST_COMPILE_MPI}     ${TRUST_COMPILE_NETCDF} \
[8818]89          ${TRUST_IO_CDO}                                    ; do
[5799]90   [ -z "$str" ] && continue
[8808]91   ver=''
[5664]92
[8808]93   ## Extract version number after searching pattern in PATH env. variable
94   ver=$( echo $PATH | sed "s|.*\($str[0-9.]*\).*|\1|" )
[5664]95
[8808]96   ## option --version would work for main Fortran compilers and CDO
[8818]97   if [[ $str =~ ${TRUST_COMPILE_FORTRAN}|${TRUST_IO_CDO} ]]; then
[8808]98       ver=$( $str --version 2>&1 | grep -m1 -oe '\<[0-9. ]*\>' \
99         | xargs echo $str                                   )
100   fi
[5690]101
[5672]102   ## Cleaning characters string to display proper soft name
[8808]103   #str=$( echo $str | sed 's|[/-]||g'  )
104   ver=$( echo $ver | sed 's|[/-]| |g' )
[5690]105
[8808]106   echo $ver \
[5689]107       >> model.log
[5644]108    done
109
[5799]110    sed -n 3p model.log \
[8780]111   >> ${file_cmpf}
[5689]112    sed -n 4p model.log \
[8780]113   >> ${file_lmpi}
[5689]114    sed -n 5p model.log \
[8780]115   >> ${file_ncdf}
[5644]116}
117
118get_inputs() {
[8818]119     # List archive content & extract it by default
[8808]120    local inputs_list=$( eval "
[8818]121        for archive in ${TRUST_IO_FORC_TAR}; do
122            tar -tvf ${TRUST_IO_FORC_PATH}/\$archive >> inputs_list.txt;
123         done
124         " )
[8808]125    local inputs_get=$( eval "
[8818]126        for archive in ${TRUST_IO_FORC_TAR}; do
127            tar -vxf ${TRUST_IO_FORC_PATH}/\$archive  >       /dev/null;
128         done
129         " )
130 
[8808]131    ## List & copy files without archive
[8818]132    if [ -z "${TRUST_IO_FORC_TAR}" ]; then
133        inputs_list=" ls -lh ${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
134        inputs_get=" \cp     ${TRUST_IO_FORC_PATH}/* .                 "
[8797]135    fi
[5690]136
[8808]137    ${inputs_list}; ${inputs_get}
[8797]138
[8818]139#    for entry in ${TRUST_IO_FORC_PATH}; do
140#
141   # If path to file (assuming it is an archive)
142#  if   [ -e $entry ]; then
143#      tar -tvf $entry >> inputs_list.txt;
144#      tar -vxf $entry  >       /dev/null;
145   # If path to directory
146#  elif [ -d $entry ]; then
147#      inputs_list=" ls -lh ${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
148#      inputs_get=" \cp     ${TRUST_IO_FORC_PATH}/* .                 "
149#  fi   
150
151#    done
152
[8808]153    if [ $( find -name '*.gz' -print -quit ) ]; then
[8797]154   find . -name '*.gz' -exec gzip -d {} \;
155    fi
[5644]156}
157
[5402]158diff_inputs() {
[5695]159    local dif file
[5664]160    local files_list='' mesg='Same' 
[5644]161
[8818]162    ###################################
163    ## Think of copying initial test ##
164    ###################################
165
[5690]166    ## Simple diff
[8818]167    for file in cpp_* 'inputs_list.txt' *namelist_* *.xml; do
[5644]168   dif=''
[5690]169
170   ## Continue even if input file is not in here (see after)
[8818]171   if [ -e ${TRUST_TEST_BENCHMARK}/$file ]; then
172       dif=$( diff -q $file ${TRUST_TEST_BENCHMARK}/$file )
[8808]173   else
174       dif=0
175   fi
[5690]176
177   ## Pass over useless file omission in benckmark directory
[8808]178   if [[ -n "$dif" && "$dif" != '0' ]]; then
179       mesg='Different'
180       echo $dif
181       files_list+=$file' '
182   fi
183
[5402]184    done
185
[5689]186    [ $mesg == 'Same' ] && echo $mesg
[5788]187    echo $mesg          \
[8780]188   >> ${file_inpt}
[5690]189
190    ## List different files for web comment
[5672]191    [ -n "${files_list}" ] && echo 'Inputs  : '${files_list}'differ<br>' \
[8780]192   >> temp_${file_note}
[5402]193}
194
[5644]195job_pending() {
[5672]196    local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
[5644]197
198    sleep ${time_increment}
199
[5690]200    ## Append a log file while pending
[8808]201    while [[ $( eval ${TRUST_JOB_STATE} )                \
202        && ${time_elapsed} -lt ${TRUST_JOB_TIMEOUT}   ]]; do
203   printf "\n%s\n" ${outline// /#} \
[5689]204       >> computation.log
[8808]205   [ -n "${TRUST_JOB_INFO}" ] && eval ${TRUST_JOB_INFO} \
[5689]206       >> computation.log
[5644]207   sleep ${time_increment}
208   time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
209    done
210
211    sleep ${time_increment}
212
[5690]213    ## Kill remaining job & stop the test if it's too long
[8808]214    if [ ${time_elapsed} -eq ${TRUST_JOB_TIMEOUT} ]; then
215   eval ${TRUST_JOB_KILL} &> /dev/null
216   get_out 6
217    fi
218
[5644]219}
220
[5509]221diff_results() {
[5695]222    local file
[5696]223    local files_list='' mesg='Same'
[5664]224
[8818]225    ###################################
226    ## Think of copying initial test ##
227    ###################################
228
[5690]229    ## Simple diff
[5689]230    for file in 'ocean.output' *.stat; do
[8808]231   ## Stop if no minimal benchmark files (ocean.output, eventual stat files)
[8818]232   [ ! -e ${TRUST_TEST_BENCHMARK}/$file ] && get_out 7
[5690]233
[8818]234   diff -q $file ${TRUST_TEST_BENCHMARK}/$file
[5690]235
236   ## Continue even if it differs
[8818]237   if [ $? -ne 0 ]; then mesg='Different'; files_list+=$file' '; fi
[8808]238
[5509]239    done
[5664]240
[5696]241    [ $mesg == 'Same' ] && echo $mesg
242
[5690]243    ## List different files for web comment
[5672]244    [ -n "${files_list}" ] && echo 'Results : '${files_list}'differ<br>' \
[8780]245   >> temp_${file_note}
[5509]246}
247
[8808]248diff_restarts() {
[8797]249    local dif filebase filebases ndomain out
[8808]250    local files_list='' dif_sum='0' #bcmk='false'
[5644]251
[5672]252    ## Find all restart files to rebuild
253    if [ $( find -regex ".*_restart.*[0-9]\.nc" -print -quit ) ]; then
[8818]254        ###############################################################
255   ## Think to set the configuration name in the 'namelist_cfg' ##
256   ###############################################################
[8808]257   filebases=$( find -regextype sed -regex ".*${TRUST_CFG_NEW}.*_[0-9]\{4\}\.nc" \
258                | sed 's/\(.*\)_.*/\1/' | sort -u                                  )
[5383]259
[8808]260   for filebase in $filebases; do
[5788]261
[8808]262       ndomain=$( find -regex ".*${filebase}_[0-9]*.nc" \
263             | wc -l | awk '{print $1}'              )
[5424]264
[8818]265       [ $ndomain -eq 0 ] && get_out X
[5934]266
[8818]267            #####################################################
268            ## Handle 2 possibilities of 'rebuild_nemo' origin ##
269            #####################################################
270
[8808]271       ${TRUST_DIR_NEMOGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo \
272      -t ${TRUST_COMPILE_NPROC} $filebase $ndomain       \
[8797]273      > /dev/null
[5934]274
[8797]275       ## Possibility of remaining decomposed restarts (even after rebuild)
[8818]276       if [ $? -eq 0 ]; then
277      rm -f ${filebase}_[0-9]*.nc \
278                    > /dev/null
279       else
280      get_out X
281       fi
[5424]282
[8797]283            ## Stop if no benchmark files (restart file)
[8818]284       if [ -e ${TRUST_TEST_BENCHMARK}/$filebase.nc ]; then
[5690]285
[8808]286      #bcmk='true'
[8818]287      cdo diffn $filebase.nc ${TRUST_TEST_BENCHMARK}/$filebase.nc \
[8808]288          > cdo_diff.out 2> /dev/null
[5485]289
[8808]290      ## Identical if cdo_diff.out is zero size
291      [ ! -s cdo_diff.out ] && continue
[5672]292
[8808]293      dif=$( grep -om1 '[0-9]* of [0-9]* records' cdo_diff.out )
294
[8797]295      if [ -n "$dif" ]; then
[8808]296          files_list+=$filebase' ' && echo $filebase'.nc: '$dif
297          let dif_sum+=$( echo $dif | sed '|^\([0-9]*\).*|\1|' )
[5689]298      fi
[5485]299
[8808]300       fi
301
[5424]302   done
[5383]303
[8808]304        ## List modified restart(s) for web comment with sum of differences
305   if [ ${dif_sum} -ne 0 ]; then
[5788]306       echo 'Restarts: '${files_list}${dif_sum}' record(s) differ<br>' \
[8780]307      >> temp_${file_note}
[5696]308   else
309       echo 'Same'
[5689]310   fi
[5455]311
[5689]312    fi
[5681]313
[5383]314}
315
[5788]316get_time() {
[8808]317    [ -z "${TRUST_JOB_TIME}" ] && return
[5799]318
[5788]319    ## Interest for checking unusual time computation
[8808]320    local time_cpu=$( eval ${TRUST_JOB_TIME} )
[5681]321
[5788]322    printf "Elapsed time: "
[8780]323    echo ${time_cpu} | tee -a ${file_time}
[5788]324}
[5472]325
[5788]326get_memy() {
[8808]327    [[ -z "${TRUST_JOB_RAM_P}" && -z "${TRUST_JOB_RAM_V}" ]] && return
[5799]328
[5788]329    ## Interest for checking unusual memory usage
[8808]330    local memory_pmax=$( eval ${TRUST_JOB_RAM_P} )
331    local memory_vmax=$( eval ${TRUST_JOB_RAM_V} )
[5788]332
333    printf "Memory max usage (physical/virtual): "
[8780]334    echo ${memory_pmax}' / '${memory_vmax} | tee -a ${file_memy}
[5472]335}
336
[5644]337comments() {
[5695]338    local opat
339    local line='' state=$1
[5438]340
[5695]341    if [ -e ocean.output ]; then
[5788]342        ## 'W A R N I N G' pattern by default
[5696]343   opat="-A2 \"^ $state\""
344   [ "$state" == 'E R R O R' ] && opat="-A4 \"$state\""
[5438]345
[5695]346        ## Select first occurence for web comment
[5696]347   line=$( eval grep -m1 $opat ocean.output | tr -d '\n' )
[5695]348    fi
[5690]349
[5788]350    [ -n "$line" ] && ( echo $line; printf "$line<br>" \
[8780]351   >> temp_${file_note} )
[5383]352}
353
[5788]354log_make() {
[5669]355    ## Format comments for web
[8808]356    if [ -e temp_${file_note} ]; then
357   cat temp_${file_note} | tr -d '\n' | sed 's/<br>$//' \
358       >> ${file_note}
359    fi
[5669]360
[5788]361    ## Construct txt file with all messenger files
[8808]362    paste -d ';' mesg_*.txt | tee ${TRUST_TEST_SUMMARY}
[5268]363}
364
[5788]365prod_publish() {
[5644]366    local cmd
[5788]367    local rev=$( awk '/NEMOGCM/ {print $NF}' model.log )
[5268]368
[5788]369    ## Production mode (-p|--prod)
[8808]370    if [ ${TRUST_FLAG_PROD} -eq 1 ]; then
[5268]371
[5788]372   ## Create or append trusting logfile
[8808]373   if [ -f ${TRUST_TEST_LOG} ]; then cmd='tail -1'; else cmd='cat'; fi
[5424]374
[8808]375   $cmd ${TRUST_TEST_SUMMARY}      \
376       >> ${TRUST_TEST_LOG}
[5644]377
[5690]378        ## Send mail only when FAILED
[8808]379   if [[ ! -z "${TRUST_TEST_MAILING}" \
380         && ${TRUST_FLAG_RESULT} == 'FAILED'   ]]; then
[5690]381
382       ## Content
[5788]383       cat <<END_MAIL      \
[5681]384      > trusting.mail
[5788]385Dear all,
[5472]386
[5268]387
[8808]388The following trusting sequence has not completed successfully:
[5268]389
[8808]390Testing configuration ${TRUST_CFG_NEW} based on ${TRUST_CFG_REF}.
391User installation ${TRUST_MAIN_USER}
392HPC environment ${TRUST_MAIN_HPCC}
393
394Here is the running environment summary:
[5788]395`cat model.log`
[5268]396
[5929]397For more details, look into the testing folder at:
[8818]398${TRUST_TEST_DIR}
[8808]399An archive is also available to share the questionable configuration:
[8818]400${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}
[5788]401
[5268]402END_MAIL
[5690]403
404       ## Send with detailed subject
[8808]405       mail -s "[NEMO Trusting][${TRUST_CFG_REF}][${TRUST_SVN_BRANCH}] \
[8818]406           ${TRUST_FLAG_RESULT} ${TRUST_FLAG_ERROR}"              \
407       ${TRUST_TEST_MAILING}                                      \
[5672]408      <  trusting.mail
[5644]409   fi
[5521]410
[5644]411    fi
[5268]412}
413
[5383]414get_out() {
[5695]415    local time_step=0
416
[8808]417    TRUST_FLAG_ERROR=$1
[5521]418
[5788]419    printf "\n\nEnd of test\n"
[5696]420
421    ## In case of compilation error
[8818]422    cd ${TRUST_TEST_DIR}
[5696]423
[8808]424    if [ ${TRUST_FLAG_RESULT} == 'FAILED' ]; then
[5695]425   echo 'Failure'
[5521]426
[5629]427        ## Error identification
[8808]428   case ${TRUST_FLAG_ERROR} in
[5695]429            ## Compilation
[8808]430       '1') TRUST_FLAG_ERROR='XIOS compilation failed' ;;
431       '2') TRUST_FLAG_ERROR='NEMO compilation failed' ;;
[5629]432       ## Submission
[8808]433       '3') TRUST_FLAG_ERROR='Missing input files'     ;;
434       '4') TRUST_FLAG_ERROR='Job submission error'    ;;
435       ## Computing
436       '5') TRUST_FLAG_ERROR='Crashed at time step'    ;;
437       '6') TRUST_FLAG_ERROR='Exceeded time limit'     ;;
[5629]438       ## Results
[8808]439       '7') TRUST_FLAG_ERROR='Missing previous outputs';;
440       '8') TRUST_FLAG_ERROR='New outputs differ'      ;;
[5799]441       ## Other
[8808]442       '*') TRUST_FLAG_ERROR='Unknown error'           ;;
[5509]443   esac
444
[5695]445    else
[8808]446   echo 'Success' && TRUST_FLAG_ERROR='Code is reliable'
[5424]447    fi
[5438]448
[5690]449    ## Eventual comments from ocean.output
[8808]450    if [ "${TRUST_FLAG_ERROR}" == 'Crashed at time step' ]; then
[5695]451   comments 'E R R O R'
[8808]452   [ -e time.step ] && time_step=$( cat time.step )
453   TRUST_FLAG_ERROR+=' '$time_step
[5695]454    else
455   comments 'W A R N I N G'
[8808]456
457   if [ "${TRUST_FLAG_ERROR}" == 'Exceeded time limit' ]; then
458       TRUST_FLAG_ERROR+=' '$(( ${TRUST_JOB_TIMEOUT}/3600 ))'h'
459   fi
460
[5690]461    fi
[5438]462
[5696]463    ## Last messenger files
[8808]464    sed -i "2 s/.*/$TRUST_RESULT/"     ${file_rslt}
465    sed -i "2 s/.*/$TRUST_FLAG_ERROR/" ${file_stat}
[5690]466
[5788]467    ## Save tested configuration if trusting failed in production mode (-p|--prod)
[8808]468    if [[ ${TRUST_FLAG_RESULT} == 'FAILED' && ${TRUST_FLAG_PROD} -eq 1 ]]; then
[8818]469   echo 'Creating archive '${TRUST_TEST_BACKUP}' under '${TRUST_TEST_BENCHMARK}
470   tar -czf ${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}                 * \
[8808]471       -C   ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/MY_SRC . \
472       -C   ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}          \
473            cpp_${TRUST_CFG_NEW}.fcm
[5635]474    fi
[5521]475
[5788]476    ## Logfile construct & eventual sending of notification email
477    printf "\nTrusting digest:\n----------------\n"
478    log_make
479    prod_publish
[5689]480
[5623]481    exit 0
[5268]482}
Note: See TracBrowser for help on using the repository browser.