New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
Changeset 5799 for branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/trusting_func.sh – NEMO

Ignore:
Timestamp:
2015-10-16T16:38:37+02:00 (9 years ago)
Author:
nicolasmartin
Message:

dev_r5092_CNRS18_TRUST Establishing common environment for installing & running trusting, consolidation of scripts & continuation of templates & help section improvments

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/trusting_func.sh

    r5788 r5799  
    33 
    44## Messenger filenames 
    5 FILE_DATE=mesg_01_date_$PATTERNAME.txt  ; FILE_TRUS_RSLT=mesg_02_result_$PATTERNAME.txt 
     5FILE_DATE=mesg_01_date_$PATTERNAME.txt  ; FILE_RSLT=mesg_02_result_$PATTERNAME.txt 
    66FILE_STAT=mesg_03_state_$PATTERNAME.txt ; FILE_NEMO=mesg_04_nemo_$PATTERNAME.txt 
    77FILE_XIOS=mesg_05_xios_$PATTERNAME.txt  ; FILE_CMPF=mesg_06_compiler_$PATTERNAME.txt 
     
    1010FILE_MEMY=mesg_11_memory_$PATTERNAME.txt; FILE_NOTE=mesg_12_comments_$PATTERNAME.txt 
    1111 
    12 ## Timestamped logfile & archive filenames 
    13 FILE_TRUS=trusting_${DATE}_$PATTERNAME.txt; FILE_ARCH=trusting_${DATE}_$PATTERNAME.tgz 
     12## Trusting timestamped logfile & archive 
     13TRUS_FILE=trusting_${DATE}_$PATTERNAME.txt; TRUS_ARCH=trusting_${DATE}_$PATTERNAME.tgz 
    1414 
    1515 
     
    2323 
    2424init_files() { 
    25     echo 'Date'               > ${FILE_DATE}; echo 'Result'           > ${FILE_TRUS_RSLT} 
     25    echo 'Date'               > ${FILE_DATE}; echo 'Result'           > ${FILE_RSLT} 
    2626    echo 'State'              > ${FILE_STAT}; echo 'NEMOGCM rev.'     > ${FILE_NEMO} 
    2727    echo 'XIOS rev.'          > ${FILE_XIOS}; echo 'Fortran compiler' > ${FILE_CMPF} 
     
    3131 
    3232    ## 'Failed' status with 'Unknown error' by default 
    33     echo $TRUS_RSLT           \ 
    34    >> ${FILE_TRUS_RSLT} 
     33    echo ${TRUS_RSLT}      \ 
     34   >> ${FILE_RSLT} 
    3535    echo 'Unknown error' \ 
    3636   >> ${FILE_STAT} 
     
    4747get_nemo_rev() { 
    4848    local dir rev_loc 
    49     local rev=0 list=( 'ARCH CONFIG NEMO EXTERNAL/AGRIF EXTERNAL/IOIPSL EXTERNAL/fcm TOOLS/COMPILE TOOLS/REBUILD_NEMO' ) 
     49    local rev=0 
    5050 
    5151    ## Loop on essential NEMO directories 
    52     for dir in $list ${DIR_XIOS}; do 
     52    for dir in ${TRUS_CKOT} ${TRUS_XIOS}; do 
    5353 
    5454   ## For time being, just get revision from XIOS with no action on directory 
    55    if [ $dir == ${DIR_XIOS} ]; then 
    56        rev_loc=$( svn info $dir | awk '(NR == 9) {print $NF}' ) 
     55   if [ $dir == ${TRUS_XIOS} ]; then 
     56       rev_loc=$( svn info $dir | awk '/Last Changed Rev/ {print $NF}' ) 
    5757       echo 'XIOS '${rev_loc} \ 
    5858      >> model.log 
     
    6262   fi 
    6363 
    64    echo $dir && ${SVN_CMD} ${TRUS_WKCY}/$dir 
    65    rev_loc=$( svn info ${TRUS_WKCY}/$dir | awk '(NR == 9) {print $NF}' ) 
     64   echo $dir && ${TRUS_SVNA} ${TRUS_NGCM}/$dir 
     65   rev_loc=$( svn info ${TRUS_NGCM}/$dir | awk '/Last Changed Rev/ {print $NF}' ) 
    6666 
    6767   ## Keep last rev. nb 
     
    7676 
    7777get_soft_rel() { 
    78     local soft soft_rel 
    79  
    80     ## Sourcing environment modulefile only if module function is set 
    81     [[ -e ${ARCH_ENV} && $( declare -F | grep ' module' ) ]] && . ${ARCH_ENV} 
    82  
    83     for soft in $CMPF ${STR_CDOD} ${STR_LMPI} ${STR_NCDF}; do 
     78    local soft_rel str 
     79 
     80    if [ -n "${TRUS_ENVI}" ]; then 
     81        ## Sourcing environment modulefile (.env) only if module function is set 
     82   if [[  -e ${TRUS_ENVI}.env && $( declare -F | grep ' module' ) ]]; then 
     83       . ${TRUS_ENVI}.env 
     84   else 
     85       [ -e ${TRUS_ENVI}.path ] && . ${TRUS_ENVI}.path || . ${TRUS_ENVI} 
     86   fi 
     87    fi 
     88 
     89    ## Problem with `prepend-path` of modulefile that use ':' instead of ' ' as delimiter on LDFLAGS variables 
     90    [ $TRUS_HPCC == 'X64_ADA' ] && WRAPPER_LDFLAGS='-L/smplocal/pub/IdrMemMPI/1.4/lib -lidrmem '${WRAPPER_LDFLAGS} 
     91 
     92    for str in ${TRUS_CMPF} ${TRUS_MPIR} ${TRUS_CDFR} ${TRUS_CDOR}; do 
     93   [ -z "$str" ] && continue 
    8494   soft_rel='' 
    8595 
    8696   ## Software release: next word after "$soft" in $PATH (case-insensitive) 
    87    soft_rel=$( echo $PATH | sed "s#.*$soft\([0-9.a-z_]*\).*#\1#i" ) 
     97   soft_rel=$( echo $PATH | sed "s#.*$str\([0-9.a-z_]*\).*#\1#i" ) 
    8898 
    8999   ## option --version would work for main compilers (gfortran, intel, pgfortran, ...) 
    90    [ $soft == $COMPILER ] && soft_rel=$( $soft --version | grep -m1 -oe '\<[0-9. ]*\>' ) 
     100   [ $str == ${TRUS_CMPF} ] && soft_rel=$( $str --version | grep -m1 -oe '\<[0-9. ]*\>' ) 
    91101 
    92102   ## Cleaning characters string to display proper soft name 
    93    soft=$( echo $soft | sed 's#\\##g; s#[/-]$##' ) 
    94  
    95    echo $soft ${soft_rel} \ 
     103   str=$( echo $str | sed 's#\\##g; s#[/-]$##' ) 
     104 
     105   echo $str ${soft_rel} \ 
    96106       >> model.log 
    97107    done 
    98108 
     109    sed -n 3p model.log \ 
     110   >> ${FILE_CMPF} 
    99111    sed -n 4p model.log \ 
    100    >> ${FILE_CMPF} 
     112   >> ${FILE_LMPI} 
    101113    sed -n 5p model.log \ 
    102    >> ${FILE_LMPI} 
    103     sed -n 6p model.log \ 
    104114   >> ${FILE_NCDF} 
    105115} 
     
    110120 
    111121    ## List & copy files in case of personal inputs 
    112     [ -z "${TRUS_TARF}" ] && { cmd_iol="ls ${TRUS_FORC}/*"; cmd_iof="\cp ${TRUS_FORC}/* ."; } 
     122    if [ -z "${TRUS_TARF}" ]; then 
     123     cmd_iol="ls ${TRUS_FORC}/*"                ; cmd_iof="\cp ${TRUS_FORC}/* ." 
     124    fi 
    113125 
    114126    ${cmd_iol} > inputs_list.txt 
     
    128140 
    129141   ## Pass over useless file omission in benckmark directory 
    130    [[ -n "$dif" && "$dif" != '0' ]] && ( mesg='Different'; echo $dif; files_list+=$file' ' ) 
     142   [[ -n "$dif" && "$dif" != '0' ]] && { mesg='Different'; echo $dif; files_list+=$file' '; } 
    131143    done 
    132144 
     
    146158 
    147159    ## Append a log file while pending 
    148     while [[ $( eval ${JOB_STAT} ) && ${time_elapsed} -lt $TIMEOUT ]]; do 
    149    printf "\n%s\n" ${outline// /#} \ 
     160    while [[ $( eval ${TRUS_JSTA} ) && ${time_elapsed} -lt ${TRUS_TOUT} ]]; do 
     161   printf "\n%s\n" ${outline// /#}          \ 
    150162       >> computation.log 
    151    eval ${JOB_INFO}                \ 
     163   [ -n "${TRUS_JINF}" ] && eval ${JOB_INFO} \ 
    152164       >> computation.log 
    153165   sleep ${time_increment} 
     
    158170 
    159171    ## Kill remaining job & stop the test if it's too long 
    160     [ ${time_elapsed} -eq $TIMEOUT ] && { eval ${JOB_DELE} &> /dev/null; get_out 6; } 
     172    [ ${time_elapsed} -eq ${TRUS_TOUT} ] && { eval ${JOB_DELE} &> /dev/null; get_out 6; } 
    161173} 
    162174 
     
    168180    for file in 'ocean.output' *.stat; do 
    169181   ## Stop if no benchmark files (ocean.output, eventual stat files) 
    170    [ ! -e ${TRUS_BHMK}/$file ] && { export TRUS_RSLT='FAILED'; get_out 7; } 
     182   [ ! -e ${TRUS_BHMK}/$file ] && { TRUS_RSLT='FAILED'; get_out 7; } 
    171183 
    172184   diff -q $file ${TRUS_BHMK}/$file 
    173185 
    174186   ## Continue even if it differs 
    175    [ $? -ne 0 ] && { export TRUS_RSLT='FAILED'; mesg='Different'; files_list+=$file' '; } 
     187   [ $? -ne 0 ] && { TRUS_RSLT='FAILED'; mesg='Different'; files_list+=$file' '; } 
    176188    done 
    177189 
     
    188200 
    189201    ## Stop if no benchmark files (ie time.step) 
    190     [ ! -e ${TRUS_BHMK}/time.step ] && { export TRUS_RSLT='FAILED'; get_out 7; } 
     202    [ ! -e ${TRUS_BHMK}/time.step ] && { TRUS_RSLT='FAILED'; get_out 7; } 
    191203    time_step=$( cat ${TRUS_BHMK}/time.step | tr -d [:space:] ) 
    192204 
     
    208220 
    209221      if   [ ${nb_dom} -gt 1 ]; then 
    210           ${TRUS_WKCY}/TOOLS/REBUILD_NEMO/rebuild_nemo -t ${TRUS_NPRO} $file ${nb_dom} > /dev/null 
    211           [ $? -eq 0 ] && rm -f ${file}_[0-9]*.nc                                > /dev/null 
     222          ${TRUS_NGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo -t ${TRUS_NPRO} $file ${nb_dom} \ 
     223         > /dev/null 
     224          [ $? -eq 0 ] && rm -f ${file}_[0-9]*.nc \ 
     225                        > /dev/null 
    212226      elif [ ${nb_dom} -eq 0 ]; then 
    213           export TRUS_RSLT='FAILED' && get_out 8 
     227          TRUS_RSLT='FAILED' && get_out 8 
    214228      fi 
    215229 
     
    221235 
    222236                   ## UNIX `cmp` not suitable (timestamp in .nc file) 
    223          dif=$( $CDOD $file.nc ${TRUS_BHMK}/$file.nc 2> /dev/null          \ 
     237         dif=$( $TRUS_CDOD $file.nc ${TRUS_BHMK}/$file.nc 2> /dev/null          \ 
    224238                | awk '/records/ {print $0}' | sed '2 s/^/,/' | tr -d '\n' ) 
    225239 
    226240         ## CDO can return void stdout with no difference 
    227241         if [[ -n "$dif" && $( echo $dif | awk '{print $1}' ) -ne 0 ]]; then 
    228              export TRUS_RSLT='FAILED' 
     242             TRUS_RSLT='FAILED' 
    229243             files_list+=$comp' ' && let dif_sum+=$( echo $dif | awk '{print $1}' ) 
    230244             echo $file.nc': '$dif 
     
    232246 
    233247          else 
    234          export TRUS_RSLT='FAILED' && get_out 7 
     248         TRUS_RSLT='FAILED' && get_out 7 
    235249          fi 
    236250 
     
    252266 
    253267    else 
    254    export TRUS_RSLT='FAILED' 
     268   TRUS_RSLT='FAILED' 
    255269    fi 
    256270 
     
    258272 
    259273get_time() { 
     274    [ -z "${TRUS_JTIM}" ] && return 
     275 
    260276    ## Interest for checking unusual time computation 
    261     local time_cpu=$( eval ${JOB_TIME} ) 
     277    local time_cpu=$( eval ${TRUS_JTIM} ) 
    262278 
    263279    printf "Elapsed time: " 
     
    266282 
    267283get_memy() { 
     284    [[ -z "${TRUS_JPME}" && -z "${TRUS_JVME}" ]] && return 
     285 
    268286    ## Interest for checking unusual memory usage 
    269     local memory_pmax=$( eval ${JOB_PMEM} ) memory_vmax=$( eval ${JOB_VMEM} ) 
     287    local memory_pmax=$( eval ${TRUS_JPME} ) memory_vmax=$( eval ${TRUS_JVME} ) 
    270288 
    271289    printf "Memory max usage (physical/virtual): " 
     
    296314 
    297315    ## Construct txt file with all messenger files 
    298     paste -d ';' mesg_*.txt | tee ${FILE_TRUS} 
     316    paste -d ';' mesg_*.txt | tee ${TRUS_FILE} 
    299317} 
    300318 
     
    304322 
    305323    ## Production mode (-p|--prod) 
    306     if [ $PROD -eq 1 ]; then 
     324    if [ ${TRUS_PROD} -eq 1 ]; then 
    307325 
    308326   ## Create or append trusting logfile 
    309327   if [ -f ${TRUS_BHMK}/trusting_$PATTERNAME.txt ]; then cmd='tail -1'; else cmd='cat'; fi 
    310328 
    311    $cmd ${FILE_TRUS}                           \ 
     329   $cmd ${TRUS_FILE}                           \ 
    312330       >> ${TRUS_BHMK}/trusting_$PATTERNAME.txt 
    313331 
    314332        ## Send mail only when FAILED 
    315    if [[ ! -z "$TRUS_MAIL" && $TRUS_RSLT == 'FAILED' ]]; then 
     333   if [[ ! -z "${TRUS_MAIL}" && ${TRUS_RSLT} == 'FAILED' ]]; then 
    316334 
    317335       ## Content 
     
    321339 
    322340 
    323 The trusting sequence has not completed successfully on new configuration ${TRUS_TEST} based on ${TRUS_REFE}. 
     341The trusting sequence has not completed successfully on new configuration ${TRUS_CONF} based on ${TRUS_REFE}. 
    324342 
    325343Here is the model summary: 
     
    330348 
    331349For more details, look into the testing directory at: 
    332 ${TEST_DIR} 
     350${TRUS_TEST} 
    333351 
    334352An archive has been created to share the questionable configuration for further studies: 
    335 ${TRUS_BHMK}/${FILE_ARCH} 
     353${TRUS_BHMK}/${TRUS_ARCH} 
    336354 
    337355END_MAIL 
    338356 
    339357       ## Send with detailed subject 
    340        mail -s "[NEMO Trusting][$rev][${TRUS_WKCY}][${TRUS_REFE}] $TRUS_RSLT $ERR" $TRUS_MAIL \ 
     358       mail -s "[NEMO Trusting][$rev][${TRUS_BRAN}][${TRUS_REFE}] ${TRUS_RSLT} ${TRUS_RORR}" ${TRUS_MAIL} \ 
    341359      <  trusting.mail 
    342360   fi 
     
    348366    local time_step=0 
    349367 
    350     ERR=$1 
     368    TRUS_RORR=$1 
    351369 
    352370    printf "\n\nEnd of test\n" 
    353371 
    354372    ## In case of compilation error 
    355     cd ${TEST_DIR} 
    356  
    357     if [ $TRUS_RSLT == 'FAILED' ]; then 
     373    cd ${TRUS_TEST} 
     374 
     375    if [ ${TRUS_RSLT} == 'FAILED' ]; then 
    358376   echo 'Failure' 
    359377 
    360378        ## Error identification 
    361    case $ERR in 
     379   case ${TRUS_RORR} in 
    362380            ## Compilation 
    363        '1') ERR='XIOS compilation failed' ;; '2') ERR='NEMO compilation failed';; 
     381       '1') TRUS_RORR='XIOS compilation failed' ;; '2') TRUS_RORR='NEMO compilation failed';; 
    364382       ## Submission 
    365        '3') ERR='Missing input files'     ;; '4') ERR='Job submission error'   ;; 
     383       '3') TRUS_RORR='Missing input files'     ;; '4') TRUS_RORR='Job submission error'   ;; 
    366384       ## Computation 
    367        '5') ERR='Crashed at time step'    ;; '6') ERR='Exceeded time limit'    ;; 
     385       '5') TRUS_RORR='Crashed at time step'    ;; '6') TRUS_RORR='Exceeded time limit'    ;; 
    368386       ## Results 
    369        '7') ERR='Missing previous outputs';; '8') ERR='New outputs differ'     ;; 
     387       '7') TRUS_RORR='Missing previous outputs';; '8') TRUS_RORR='New outputs differ'     ;; 
     388       ## Other 
     389       '*') TRUS_RORR='Unknown error'           ;; 
    370390   esac 
    371391 
    372392    else 
    373    echo 'Success' && ERR='Code is reliable' 
     393   echo 'Success' && TRUS_RORR='Code is reliable' 
    374394    fi 
    375395 
    376396    ## Eventual comments from ocean.output 
    377     if [ "$ERR" == 'Crashed at time step' ]; then 
     397    if [ "${TRUS_RORR}" == 'Crashed at time step' ]; then 
    378398   comments 'E R R O R' 
    379399   [ -e time.step ] && time_step=$( grep -o [0-9]* time.step ) 
    380    ERR+=' '$time_step 
     400   TRUS_RORR+=' '$time_step 
    381401    else 
    382402   comments 'W A R N I N G' 
    383    [ "$ERR" == 'Exceeded time limit' ] && ERR+=' '$(( ${TIMEOUTT}/3600 ))'h' 
     403   [ "${TRUS_RORR}" == 'Exceeded time limit' ] && TRUS_RORR+=' '$(( ${TRUS_TOUT}/3600 ))'h' 
    384404    fi 
    385405 
    386406    ## Last messenger files 
    387     export ERR 
    388     sed -i "2 s/.*/$TRUS_RSLT/" ${FILE_TRUS_RSLT}; sed -i "2 s/.*/$ERR/" ${FILE_STAT} 
     407    #export TRUS_RORR 
     408    sed -i "2 s/.*/$TRUS_RSLT/" ${FILE_RSLT}; sed -i "2 s/.*/$TRUS_RORR/" ${FILE_STAT} 
    389409 
    390410    ## Save tested configuration if trusting failed in production mode (-p|--prod) 
    391     if [[ $TRUS_RSLT == 'FAILED' && $PROD -eq 1 ]]; then 
    392    echo 'Creating archive '${FILE_ARCH}' under '${TRUS_BHMK} 
    393    tar -czf ${TRUS_BHMK}/${FILE_ARCH}              *                    \ 
    394        -C ${TRUS_WKCY}/CONFIG/${TRUS_TEST}/MY_SRC .                    \ 
    395        -C ${TRUS_WKCY}/CONFIG/${TRUS_TEST}        cpp_${TRUS_TEST}.fcm 
     411    if [[ ${TRUS_RSLT} == 'FAILED' && ${TRUS_PROD} -eq 1 ]]; then 
     412   echo 'Creating archive '${TRUS_ARCH}' under '${TRUS_BHMK} 
     413   tar -czf ${TRUS_BHMK}/${TRUS_ARCH}               *                    \ 
     414       -C   ${TRUS_NGCM}/CONFIG/${TRUS_CONF}/MY_SRC .                    \ 
     415       -C   ${TRUS_NGCM}/CONFIG/${TRUS_CONF}        cpp_${TRUS_CONF}.fcm 
    396416    fi 
    397417 
Note: See TracChangeset for help on using the changeset viewer.