New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
trusting_func.sh in branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST – NEMO

source: branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting_func.sh @ 5685

Last change on this file since 5685 was 5685, checked in by nicolasmartin, 9 years ago

dev_r5092_CNRS_SETTE Modification to avoid void 'del_key' string in cpp file

  • Property eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Rev URL
File size: 10.0 KB
RevLine 
[5268]1#!/bin/bash
2
[5383]3
[5644]4## Messenger filenames
5FILE_DATE=mesg_01_date_$PATTERNAME.txt    ; FILE_STAT=mesg_02_status_$PATTERNAME.txt
6FILE_RESU=mesg_03_result_$PATTERNAME.txt  ; FILE_NEMO=mesg_04_nemogcm_$PATTERNAME.txt
7FILE_XIOS=mesg_05_xios_$PATTERNAME.txt    ; FILE_COMP=mesg_06_compiler_$PATTERNAME.txt
8FILE_MPIN=mesg_07_mpi_$PATTERNAME.txt     ; FILE_NCDF=mesg_08_netcdf_$PATTERNAME.txt
9FILE_INPT=mesg_09_inputs_$PATTERNAME.txt  ; FILE_CPUT=mesg_10_cputime_$PATTERNAME.txt
10FILE_COMM=mesg_11_comments_$PATTERNAME.txt;
[5453]11
[5667]12## Trusting summary & archive timestamped filenames
[5644]13FILE_TRUS=trusting_${DATE}_$PATTERNAME.txt; FILE_ARCH=trusting_${DATE}_$PATTERNAME.tar.gz
14
[5681]15
[5644]16print_step() { printf "\nStep.....\n$1\n"; }
17
[5681]18
[5644]19init_files() {
[5664]20    echo 'Date'          >  ${FILE_DATE}; echo 'Status'           >  ${FILE_STAT}
21    echo 'Result'        >  ${FILE_RESU}; echo 'NEMOGCM rev.'     >  ${FILE_NEMO}
22    echo 'XIOS rev.'     >  ${FILE_XIOS}; echo 'Fortran compiler' >  ${FILE_COMP}
23    echo 'MPI libs'      >  ${FILE_MPIN}; echo 'NetCDF libs'      >  ${FILE_NCDF}
24    echo 'Input files'   >  ${FILE_INPT}; echo 'Real CPU time'    >  ${FILE_CPUT}
25    echo 'Comments'      >  ${FILE_COMM}
26
[5667]27    # 'Failed' status with 'Unknown error' by default
[5664]28    echo $ST             >> ${FILE_STAT}; echo 'Unknown error'    >> ${FILE_RESU}
[5644]29}
30
[5681]31
[5644]32get_date() {
33    local dat=$( date -ud ${DATE} +"%F %R %Z" )
34
35    echo $dat >> ${FILE_DATE}
36}
37
[5681]38
[5644]39get_nemo_rev() {
40    local rev_loc svn_cmd
41    local rev=0 svn_opt='status'
42
43    ## If -v|--version option has been set, modify svn command
44    if   [ $( echo ${NEMO_VERS} | grep  "HEAD\|up\|update"                     ) ]; then
45   svn_cmd='svn update -r HEAD'
46    elif [ $( echo ${NEMO_VERS} | grep -o '{[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}}' ) ]; then
47   svn_cmd='svn update -r '$( echo ${NEMO_VERS} | grep -o '{[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}}' )
48    elif [ $( echo ${NEMO_VERS} | grep -o '[0-9]*'                             ) ]; then
[5664]49   svn_cmd='svn update -r '$( echo ${NEMO_VERS} | grep -o '[0-9]*'                             )
[5633]50    else
[5644]51   svn_cmd='svn '${svn_opt}
[5455]52    fi
[5453]53
[5644]54    for dir in ${NEMO_ARCH} ${NEMO_CONF} ${NEMO_ENGI} \
[5683]55               ${NEMO_EAGR} ${NEMO_EIOI} ${NEMO_EFCM} \
[5644]56          ${NEMO_TCMP} ${NEMO_TRBD}              \
57          ${DIR_XIOS}                             ; do
[5455]58
[5644]59   ## For time being, just get revision number from XIOS
[5681]60   if [ $dir == ${DIR_XIOS} ]; then
[5644]61       rev_loc=$( svn info $dir | awk '(NR == 9) {print $NF}' )
62       echo 'XIOS '${rev_loc} >> model.log
63       echo "<a href=\"https://forge.ipsl.jussieu.fr/ioserver/changeset/${rev_loc}\" target=\"_blank\">${rev_loc}</a>" \
64      >> ${FILE_XIOS}
65       continue
66   fi
67
68   echo $dir
69   ${svn_cmd} $dir
70   rev_loc=$( svn info $dir | awk '(NR == 9) {print $NF}' )
71   (( ${rev_loc} >= $rev )) && rev=${rev_loc}
72    done
73
74    echo 'NEMOGCM '$rev   >> model.log
75    echo "<a href=\"https://forge.ipsl.jussieu.fr/nemo/changeset/$rev\" target=\"_blank\">$rev</a>" \
76   >> ${FILE_NEMO}
[5438]77}
78
[5681]79
[5644]80get_soft_rel() {
81    local soft_rel
[5509]82
[5644]83    for soft in $CDO $COMPILER $MPI $NETCDF; do
84   soft_rel=''
[5664]85
86        if (( ${I_MODULE} == 1 )); then
87       soft_rel=$( echo $LOADEDMODULES | sed "s/.*$soft\/\([0-9.a-z_]*\).*/\1/i" )
88   else
89       soft_rel=$( echo $PATH          | sed "s/.*$soft\([0-9.a-z_]*\).*/\1/i"   )
90   fi
91
[5681]92   [ $soft == $COMPILER ] && soft_rel=$( $soft --version | grep -m1 -oe '\<[0-9. ]*\>' )
[5672]93   ## Cleaning characters string to display proper soft name
94   soft=$( echo $soft | sed 's#\\##g; s#[/-]$##' )
[5644]95   echo $soft ${soft_rel} >> model.log
96    done
97
[5672]98    sed -n 4p model.log >> ${FILE_COMP}; sed -n 5p model.log >> ${FILE_MPIN}
99    sed -n 6p model.log >> ${FILE_NCDF}
[5644]100}
101
[5681]102
[5644]103get_inputs() {
[5664]104    local cmd_iol="tar -tvf ${NEMO_FORC}/${NEMO_TARF}" cmd_iof="tar -vxf ${NEMO_FORC}/${NEMO_TARF}"
[5644]105
106    [ -z "${NEMO_TARF}" ] && cmd_iol="ls ${NEMO_FORC}/*" && cmd_iof="\cp ${NEMO_FORC}/* ."
107    ${cmd_iol} > inputs_list.txt && ${cmd_iof} > /dev/null
108}
109
[5681]110
[5402]111diff_inputs() {
[5644]112    local dif
[5664]113    local files_list='' mesg='Same' 
[5644]114
[5681]115    for file in inputs_list.txt namelist_* *.xml cpp_*; do
[5644]116   dif=''
[5664]117
[5681]118   if [ -e ${REFE_DIR}/$file ]; then
119       dif=$( diff -q $file ${REFE_DIR}/$file )
120   else dif=0; fi
121
[5664]122   if [ -n "$dif" ]; then
123       mesg='Different'
[5683]124       [ "$dif" != '0' ] && echo $dif && files_list+=$file' '
[5664]125   fi
126
[5402]127    done
128
[5685]129    echo $mesg | tee -a ${FILE_INPT}
[5672]130    [ -n "${files_list}" ] && echo 'Inputs  : '${files_list}'differ<br>' \
131   >> temp_${FILE_COMM}
[5402]132}
133
[5681]134
[5644]135job_pending() {
[5672]136    local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
[5644]137
138    sleep ${time_increment}
139
140    while [[ $( ${JOB_LIST} | grep ${JOB_ID} ) && ${time_elapsed} -lt ${TIME_LIMI} ]]; do
[5672]141   printf "\n%s\n" ${outline// /#} >> computation.log
142   ${JOB_INFO} ${JOB_ID}           >> computation.log
[5644]143   sleep ${time_increment}
144   time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
145    done
146
147    sleep ${time_increment}
148
[5672]149    if (( ${time_elapsed} == ${TIME_LIMI} )); then
[5644]150   ${JOB_DELE} ${JOB_ID} &> /dev/null
[5664]151   let TIME_LIMIT/=3600
[5671]152   export ${TIME_LIMIT}
[5644]153   get_out 6
154    fi
155}
156
[5681]157
[5509]158diff_results() {
[5664]159    local files_list=''
160
161    for file in ocean.output *.stat; do
[5521]162   [ ! -e ${REFE_DIR}/$file ] && export ST='FAILED' && get_out 7
[5664]163   diff -q $file ${REFE_DIR}/$file
164   (( $? != 0 )) && export ST='FAILED' && files_list+=$file' '
[5509]165    done
[5664]166
[5672]167    [ -n "${files_list}" ] && echo 'Results : '${files_list}'differ<br>' \
168   >> temp_${FILE_COMM}
[5509]169}
170
[5681]171
[5383]172diff_restart() {
[5681]173    local base_name dif file list_comp list_tmsp nb_dom
174    local files_list='' dift=0
[5644]175
[5681]176    [ ! -e ${REFE_DIR}/time.step ] && export ST='FAILED' && get_out 7
[5671]177    export TIME_STEP=$( cat ${REFE_DIR}/time.step | tr -d [:space:] )
[5672]178    echo 'Last time step of benchmark run: '${TIME_STEP}
[5485]179
[5672]180    ## Find all restart files to rebuild
181    if [ $( find -regex ".*_restart.*[0-9]\.nc" -print -quit ) ]; then
182   base_name=$( find -regex ".*_restart.*[0-9]\.nc"                      \
183                | sed "s#^\./\(.*\)_[0-9]*_restart.*#\1#"       | sort -u )
184   list_comp=$( find -regex ".*_restart.*[0-9]\.nc"                      \
185                | sed "s#^.*\(restart[a-z_]*\)_[0-9].*\.nc#\1#" | sort -u )
186   list_tmsp=$( find -regex ".*_restart.*[0-9]\.nc"                      \
187                | sed "s#^.*\([0-9]\{8\}\)_restart.*#\1#"       | sort -u )
[5383]188
[5672]189   for tmsp in ${list_tmsp}; do
190       for comp in ${list_comp}; do
191      file=${base_name}_${tmsp}_${comp}
192      nb_dom=$( find -name "${file}_[0-9]*.nc" | wc -l | awk '{ print $1 }' )
[5424]193
[5672]194      if   (( ${nb_dom} >  1 )); then
195          ${NEMO_TRBD}/rebuild_nemo -t $NPROC $file ${nb_dom} > /dev/null
196          [ $? -eq 0 ] && rm -f ${file}_[0-9]*.nc             > /dev/null
197      elif (( ${nb_dom} == 0 )); then
198          export ST='FAILED' && get_out 8
199      fi
[5424]200
[5672]201      ## Compare restart files at same time step
202      if (( $tmsp == ${TIME_STEP} )); then
203          if [ -e ${REFE_DIR}/$file.nc ]; then
204                   ## UNIX `cmp` not suitable (timestamp in .nc file)
205         dif=$( $CDO diffn $file.nc ${REFE_DIR}/$file.nc 2> /dev/null      \
206                | awk '/records/ {print $0}' | sed '2 s/^/,/' | tr -d '\n'  )
[5485]207
[5681]208         if [[ -n "$dif" && (( $( echo $dif | awk '{print $1}' ) -ne 0 )) ]]; then
[5672]209             export ST='FAILED' && files_list+=$file' '
210             let dift+=$( echo $dif | awk '{print $1}' )
[5681]211             echo $file.nc': '$dif
[5672]212         fi
213
[5681]214          else export ST='FAILED' && get_out 7; fi
[5672]215
[5681]216      else continue; fi
[5485]217
[5672]218       done
[5424]219   done
[5383]220
[5681]221   if (( $dift != 0 )); then
222       echo 'Restarts: '${files_list}$dift' record(s) differ<br>' \
223      >> temp_${FILE_COMM}
224   else echo 'No restart differ'; fi
[5455]225
[5681]226    else export ST='FAILED'; fi
227
[5383]228}
229
[5681]230
[5472]231get_cpu_time() {
[5644]232    local real_cpu_time=$( eval ${JOB_TIME} )
[5472]233
[5644]234    echo ${real_cpu_time} | tee -a ${FILE_CPUT}
[5472]235}
236
[5681]237
[5644]238comments() {
239    local state=$1
[5438]240
[5644]241    if [ "$state" == 'E R R O R' ]; then
[5669]242   [ -e time.step ] && export TIME_STEP=$( cat time.step | tr -d ' ' )
[5644]243   [ -e ocean.output ] && line=$( grep -m1 -A4 "$state"   ocean.output | tr -d '\n' )
[5444]244    else
[5644]245   [ -e ocean.output ] && line=$( grep -m1 -A2 "^ $state" ocean.output | tr -d '\n' )
[5438]246    fi
247
[5664]248    echo   $line
[5667]249    printf "$line<br>" >> temp_${FILE_COMM}
[5383]250}
251
[5681]252
[5383]253mesg_make() {
[5669]254    ## Format comments for web
[5672]255    [ -e temp_${FILE_COMM} ] && cat temp_${FILE_COMM} | tr -d '\n' | sed 's/<br>$//' \
256   >> ${FILE_COMM}
[5669]257
[5664]258    paste -d ';' mesg_*.txt | tee ${FILE_TRUS}
[5268]259}
260
[5681]261
[5383]262mesg_publish() {
[5644]263    local cmd
[5268]264
[5644]265    if [ $PUBLISH -eq 1 ]; then
[5268]266
[5644]267   if [ -f ${REFE_DIR}/trusting_$PATTERNAME.txt ]; then cmd='tail -1'; else cmd='cat'; fi
[5424]268
[5644]269   $cmd ${FILE_TRUS} >> ${REFE_DIR}/trusting_$PATTERNAME.txt
270
[5681]271  ## Send mail only when FAILED
272   if [[ ! -z "$EMAIL" && $ST == 'FAILED' ]]; then
273       cat <<END_MAIL \
274      > trusting.mail
275XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
[5472]276
[5268]277Dear $USER,
278
[5472]279The trusting sequence for ${REFE_CONF} has failed.
280Directory: ${TEST_DIR}
[5644]281Archive created: ${FILE_ARCH} in ${REFE_DIR}
[5268]282
[5681]283XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
[5268]284
[5683]285`cat ${TEST_DIR}/${FILE_TRUS}`
[5268]286END_MAIL
[5683]287#      cat ${TEST_DIR}/${FILE_TRUS}                  \
288#     >> trusting.mail
[5672]289       mail -s "[NEMO Trusting ${NEMO_HPCC} ${REFE_CONF} ${NEMO_BRAN}] $ST $ERR" $EMAIL \
290      <  trusting.mail
[5644]291   fi
[5521]292
[5644]293    fi
[5268]294}
295
[5681]296
[5383]297get_out() {
[5521]298    ERR=$1
299
[5542]300    cd ${TEST_DIR}
[5509]301
[5681]302    if [ $ST == 'FAILED' ]; then
[5521]303
[5629]304        ## Error identification
[5521]305   case $ERR in
[5629]306       ## Compilation
[5667]307       1) ERR='XIOS compilation failed'          ;; 2) ERR='NEMO compilation failed'            ;;
[5629]308       ## Submission
[5667]309       3) ERR='Missing input files'              ;; 4) ERR='Job submission error'               ;;
[5629]310       ## Running
[5667]311       5) ERR='Crashed at time step '${TIME_STEP};; 6) ERR='Exceeded time limit '${TIME_LIMI}'h';;
[5629]312       ## Results
[5667]313       7) ERR='Missing previous outputs'         ;; 8) ERR='New outputs differ'                 ;;
[5629]314       ## Undefined
[5664]315       *) ERR='Unknown error'                    ;;
[5509]316   esac
317
[5521]318   export ERR
[5424]319    fi
[5438]320
[5667]321    sed -i "2 s/.*/$ST/"  ${FILE_STAT} && sed -i "2 s/.*/$ERR/" ${FILE_RESU}
[5438]322
[5629]323    ## Save tested configuration if trusting failed
[5681]324    if [[ $ST == 'FAILED' && $PUBLISH -eq 1 && $DEBUG -eq 0 ]]; then
[5644]325   echo 'Creating archive '${FILE_ARCH}' under '${REFE_DIR}
[5683]326   tar -czf ${REFE_DIR}/${FILE_ARCH} * ${NEMO_HOME}/CONFIG/${TEST_CONF}/MY_SRC \
[5681]327                                       ${NEMO_HOME}/CONFIG/${TEST_CONF}/WORK
[5635]328    fi
[5521]329
[5509]330    mesg_make
331    mesg_publish
[5623]332
333    exit 0
[5268]334}
Note: See TracBrowser for help on using the repository browser.