Changeset 5464


Ignore:
Timestamp:
2015-06-22T21:04:36+02:00 (5 years ago)
Author:
nicolasmartin
Message:

dev_r5092_CNRS_SETTE End of main script trusting.sh size reduction

Location:
branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/config/arch_template.cfg

    r5355 r5464  
    44 
    55# Available softwares: 
    6 CDO=''                    # CDO (Climate Data Operators) (root directory name in $LOADEDMODULES or $PATH) 
    7 COMPILER=''               # ifort/gfortran/pgfortran/... ( ""     " "     ""  ""       ""       ""  " " ) 
    8 MPI=''                    # MPI    libraries             ( ""     " "     ""  ""       ""       ""  " " ) 
    9 NETCDF=''                 # NetCDF    " "                ( ""     " "     ""  ""       ""       ""  " " ) 
     6CDO=''                    # CDO (Climate Data Operators) 
     7                                           # (root directory name in $LOADEDMODULES or $PATH) 
     8COMPILER=''               # ifort/gfortran/pgfortran/... 
     9                                           # (to get compiler release with command `$COMPILER --version`) 
     10MPI=''                    # MPI    libraries 
     11                                           # ( ""     " "     ""  ""       ""       ""  " " ) 
     12NETCDF=''                 # NetCDF    " "    
     13                                           # ( ""     " "     ""  ""       ""       ""  " " ) 
    1014 
    11 # Job commands: 
     15# Job commands (works with job ID): 
    1216JOB_SUBM=''               # Submit a job && get his ID in return 
    13 JOB_LIST=''                  # List submitted jobs  (to know if the job is in progress with his ID) 
    14 JOB_VIEW=''               # Get job informations (to increment a log during computing from job ID) 
    15 JOB_DELE=''               # Stop a job           (not used currently) 
     17JOB_LIST=''                  # List all submitted jobs to search for job ID with `grep` 
     18JOB_VIEW=''               # Get job informations (to fill in a log during computing) 
     19JOB_DELE=''               # Kill a job (in case of crash or time limit reaching) 
    1620 
    1721# Compiling & computing settings 
    18 [ -z "$NPROC"       ] && NPROC=1          # Number of processing units 
    19 [ -z "${TIME_LIMI}" ] && TIME_LIMI=''     # Timeout in s (pending & running) 
     22[ -z "$NPROC"       ] && NPROC=1          # Processing units for compilation 
     23[ -z "${TIME_LIMI}" ] && TIME_LIMI=''     # Utmost time in seconds to wait for finished job 
     24                                           # (pending & running) 
    2025 
    2126# Architecture environment (be careful with compliance with XIOS) 
  • branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting.env

    r5453 r5464  
    44# Time-stamp 
    55date_test=$( date ) 
    6 export DATE_STR=$( LC_ALL=C date -d"${date_test}" +%Y%m%dT%H%M ) 
     6export DATESTR=$( date -ud "${date_test}" +%Y%m%d%Z%H%M ) 
    77 
    88# Sourcing user configuration 
     
    1414export DIR_XIOS  #DIR_OASI 
    1515export NEMO_FORC FORC_TARF 
     16export EMAIL 
    1617 
    1718# Only interest for essential NEMO directories 
  • branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting.sh

    r5459 r5464  
    66NEMO_TRUS=$( pwd $( dirname $0 ) ) 
    77DEBUG=0; PUBLISH=0 
    8 STATUS='FAILED'; STEP=''# Only at end status will change to OK 
     8STATUS='FAILED'; STEP='' # Only at end status will change to OK 
    99 
    1010# Get options for replacing some initials settings 
     
    1313    case $1 in 
    1414        -b|--branch ) NEMO_BRAN=$2         ; shift 2;; -d|--debug  ) set -vx; DEBUG=1     ; shift  ;; 
    15    -e|--email  ) MAIL=$2              ; shift 2;; -j|--job    ) NPROC=$2             ; shift 2;; 
     15   -e|--email  ) EMAIL=$2             ; shift 2;; -j|--job    ) NPROC=$2             ; shift 2;; 
    1616   -h|--help   ) cat trusting_help.txt; exit  1;; -m|--machine) CFG_ARCH=$2          ; shift 2;; 
    1717   -n|--newconf) TEST_CONF=$2         ; shift 2;; -r|--refconf) REFE_CONF=$2         ; shift 2;; 
     
    4444mkdir -p ${TEST_DIR} ${REFE_DIR} 
    4545echo ${TEST_DIR} && cd ${TEST_DIR} 
    46 datestr1=$( LC_ALL=C date -d"${DATE_STR}" +%Y-%m-%dT%H:%M ) 
    47 printf "Date\n$datestr1\n" > mesg_01_date_${CFG_USER}_${CFG_ARCH}.txt 
     46date_str=$( date -ud ${DATESTR} +"%F %R %Z" ) 
     47printf "Date\n${date_str}\n" > mesg_01_date_${CFG_USER}_${CFG_ARCH}.txt 
    4848 
    4949# SVN action on XIOS & NEMO essentials directories 
     
    5757. ${ARCH_ENV} > /dev/null 
    5858get_soft_rel 
    59 cat arch_env.log 
     59cat model.log 
    6060 
    6161# XIOS compiling 
     
    7979cd ${TEST_DIR} 
    8080find ${NEMO_CONF}/${TEST_CONF}/EXP00 -regex '.*\(cfg\|opa\|ref\|xml\)' -exec cp {} . \; 
    81  
    82 if [ ! -z "${FORC_TAR}" ]; then 
    83     cmd_iol="tar -tvf ${NEMO_FORC}/${FORC_TAR}"; cmd_iof="tar -vxf ${NEMO_FORC}/${FORC_TAR}" 
    84 else 
    85     cmd_iol="ls ${NEMO_FORC}/*"                ; cmd_iof="\cp ${NEMO_FORC}/* ." 
    86 fi 
    87  
    88 ${cmd_iol} > inputs_list.txt && ${cmd_iof} > /dev/null 
     81get_inputs 
    8982[ $? -ne 0 ] && get_out $STATUS $STEP 
    90  
    9183[ $( find . -name '*.gz' -print -quit ) ] && gunzip *.gz 
    9284 
    9385# Check input files in all cases 
    9486#--------------------------------------------------- 
    95 print_step 'Difference between input files' 
     87print_step 'Difference with standard input files list' 
    9688diff_inputs 
    9789 
     
    10193JOB_ID=$( ${JOB_SUBM} | awk '{print $NF}' ) 
    10294[ $? -ne 0 ] && get_out $STATUS $STEP 
    103  
    104 print_step 'Pending/Running job' 
    105 time_elapsed=0; time_increment=30 
    106 sleep ${time_increment} 
    107 while [[ $( ${JOB_LIST} | grep ${JOB_ID} ) && ${time_elapsed} -lt ${TIME_LIMI} ]]; do 
    108     printf "\n####################################################" >> computation.log 
    109     ${JOB_VIEW} ${JOB_ID}                                           >> computation.log 
    110     sleep ${time_increment} 
    111     let time_elapsed+=${time_increment} 
    112 done 
    113 sleep ${time_increment} 
    114  
    115 if [ ${time_elapsed} -eq ${TIME_LIMI} ]; then 
    116     print_step 'Killing job' 
    117     ${JOB_DELE} ${JOB_ID} &> /dev/null 
    118     get_out $STATUS 'Job timeout' 
    119 fi 
     95print_step 'Holding-Running job' 
     96job_pending 
    12097print_step 'Job finished' 
    12198 
     
    128105fi 
    129106 
    130 STATUS='OK' 
    131107# Inspect output text files 
    132108#--------------------------------------------------- 
    133109print_step 'Test ASCII output files diff' 
    134 for file in ocean.output solver.stat tracer.stat; do 
    135     diff $file ${REFE_DIR}/$file 
    136     [ $? -ne 0 ] && get_out 'FAILED' 'ASCII output file diff' 
    137 done 
     110diff_textfiles 
    138111 
    139112# Inspect output NetCDF files 
     
    141114print_step 'Test last restart NetCDF files diff' 
    142115diff_restart 
    143 [ $? -ne 0 ] && get_out 'FAILED' "$CDO diff failed" 
    144116 
    145117# Get computation duration 
     
    148120get_cpu_time 
    149121 
    150 # Get comments (ocean.output & diff arch_env.log) 
     122# Get comments (ocean.output & diff model.log) 
    151123#--------------------------------------------------- 
    152124comments 'W A R N I N G' 
  • branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting_func.sh

    r5459 r5464  
    1616    fi 
    1717 
    18     if [ $( diff -q arch_env.log ${REFE_DIR}/arch_env.log ) ]; then 
    19    line=$( diff arch_env.log ${REFE_DIR}/arch_env.log  | tr -d '\n' ) 
     18    if [ $( diff -q model.log ${REFE_DIR}/model.log ) ]; then 
     19   line=$( diff model.log ${REFE_DIR}/model.log  | tr -d '\n' ) 
    2020    fi 
    2121 
     
    2929} 
    3030 
     31get_inputs() { 
     32    if [ ! -z "${FORC_TAR}" ]; then 
     33   cmd_iol="tar -tvf ${NEMO_FORC}/${FORC_TAR}"; cmd_iof="tar -vxf ${NEMO_FORC}/${FORC_TAR}" 
     34    else 
     35   cmd_iol="ls ${NEMO_FORC}/*"                ; cmd_iof="\cp ${NEMO_FORC}/* ." 
     36    fi 
     37 
     38    ${cmd_iol} > inputs_list.txt 
     39    ${cmd_iof} > /dev/null 
     40} 
     41 
    3142diff_inputs() { 
    3243    mesg='Same' 
     
    5667       fi 
    5768 
    58        cmp ${TEST_DIR}/${base_name}_$comp.nc ${REFE_DIR}/${base_name}_$comp.nc 
     69       # UNIX `cmp` not suitable (filename & timestamp in .nc file) 
     70       $CDO diffn ${TEST_DIR}/${base_name}_$comp.nc ${REFE_DIR}/${base_name}_$comp.nc 2> /dev/null 
    5971       if [ $? -ne 0 ]; then 
    6072      STATUS='FAILED'; STEP='Restart files different' 
    61       printf "$CDO diffn\n" 
    62       $CDO diffn ${TEST_DIR}/${base_name}_$comp.nc ${REFE_DIR}/${base_name}_$comp.nc 2> /dev/null 
     73      #printf "$CDO diffn\n" 
    6374       else 
    6475      printf "identical\n" 
    6576       fi 
    6677 
    67        \rm  ${TEST_DIR}/${base_name}_${comp}_[0-9]*.nc 
     78       rm -f ${TEST_DIR}/${base_name}_${comp}_[0-9]*.nc &> /dev/null 
    6879   done 
    6980 
     
    7384    fi 
    7485 
    75     [ $STATUS == 'FAILED'] && get_out $STATUS $STEP 
     86} 
     87 
     88diff_textfiles() { 
     89    for file in ocean.output $( ls *.stat ); do 
     90   diff $file ${REFE_DIR}/$file 
     91   [ $? -ne 0 ] && STATUS='FAILED' && STEP='ASCII output file diff' 
     92    done 
    7693} 
    7794 
     
    8198   [ -z "${arch_rel}"  ] && arch_rel=$( echo $PATH          | sed  "s#.*$rel/\([^/]*\).*#\1#" ) 
    8299   [ $rel == $COMPILER ] && arch_rel=$( $rel --version      | grep -m1 -o ' [0-9.]* '         ) 
    83    echo $rel ${arch_rel} >> arch_env.log 
    84     done 
    85  
    86     sed -n 3p arch_env.log | sed 'i\Compiler'    > mesg_05_compiler_${CFG_USER}_${CFG_ARCH}.txt 
    87     sed -n 4p arch_env.log | sed 'i\MPI libs'    > mesg_06_mpi_${CFG_USER}_${CFG_ARCH}.txt 
    88     sed -n 5p arch_env.log | sed 'i\NetCDF libs' > mesg_07_netcdf_${CFG_USER}_${CFG_ARCH}.txt 
     100   echo $rel ${arch_rel} >> model.log 
     101    done 
     102 
     103    sed -n 4p model.log | sed 'i\Compiler'    > mesg_05_compiler_${CFG_USER}_${CFG_ARCH}.txt 
     104    sed -n 5p model.log | sed 'i\MPI libs'    > mesg_06_mpi_${CFG_USER}_${CFG_ARCH}.txt 
     105    sed -n 6p model.log | sed 'i\NetCDF libs' > mesg_07_netcdf_${CFG_USER}_${CFG_ARCH}.txt 
    89106} 
    90107 
     
    102119 
    103120    rev=0 
    104     for dir in ${DIR_XIOS}  ${NEMO_ARCH} ${NEMO_CONF} ${NEMO_ENGI} \ 
    105           ${NEMO_EIOI} ${NEMO_EFCM} ${NEMO_TCMP} ${NEMO_TRBD}  ; do 
     121    for dir in ${NEMO_ARCH} ${NEMO_CONF} ${NEMO_ENGI} \ 
     122               ${NEMO_EIOI} ${NEMO_EFCM}              \ 
     123          ${NEMO_TCMP} ${NEMO_TRBD}              \ 
     124          ${DIR_XIOS}                             ; do 
    106125 
    107126   # For time being, just get revision number from XIOS 
    108127   if [ $dir == ${DIR_XIOS} ]; then 
    109        echo 'XIOS '$( svn info $dir | awk '(NR == 5) {print $NF}' ) >> arch_env.log 
     128       echo 'XIOS '$( svn info $dir | awk '(NR == 5) {print $NF}' ) >> model.log 
    110129       continue 
    111130   fi 
     
    116135    done 
    117136 
    118     printf "NEMOGCM rev.\n$rev\n" > mesg_02_nemogcm_${CFG_USER}_${CFG_ARCH}.txt 
     137    echo   "NEMOGCM $rev"         >> model.log 
     138    printf "NEMOGCM rev.\n$rev\n" >  mesg_02_nemogcm_${CFG_USER}_${CFG_ARCH}.txt 
    119139} 
    120140 
    121141mesg_make() { 
    122     paste -d ';' mesg_*_${CFG_USER}_${CFG_ARCH}.txt > trusting_${DATE_STR}_${CFG_USER}_${CFG_ARCH}.txt 
    123     cat                                               trusting_${DATE_STR}_${CFG_USER}_${CFG_ARCH}.txt 
     142    paste -d ';' mesg_*_${CFG_USER}_${CFG_ARCH}.txt > trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt 
     143    cat                                               trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt 
    124144} 
    125145 
     
    130150  if [ -f ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt ]; then cmd='sed -n 2p'; else cmd='cat'; fi 
    131151 
    132   $cmd trusting_${DATE_STR}_${CFG_USER}_${CFG_ARCH}.txt >> ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt 
     152  $cmd trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt >> ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt 
    133153 
    134154  # Send mail only when FAILED 
    135   if [[ ! -z $MAIL && $STATUS == 'FAILED' ]]; then 
     155  if [[ ! -z $EMAIL && $STATUS == 'FAILED' ]]; then 
    136156      cat << END_MAIL > trusting.mail 
    137157XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 
     
    139159 
    140160The trusting sequence for ${CONF_REF} has failed at step: 
    141 `tail -n 1 mesg_03_step_${CFG_USER}_${CFG_ARCH}.txt` 
     161`tail -n 1 ${TEST_DIR}/mesg_04_step_${CFG_USER}_${CFG_ARCH}.txt` 
    142162Directory : ${TEST_DIR} 
    143163 
     
    145165 
    146166END_MAIL 
    147       cat mesg_*_${CFG_USER}_${CFG_ARCH}.txt        >> trusting.mail 
    148       mail -s "[trusting ${CONF_REF}] FAILED" $MAIL  < trusting.mail 
     167      if [ -e ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt ]; then 
     168     cat ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt  >> trusting.mail 
     169      fi 
     170      mail -s "[trusting ${CONF_REF}] $STATUS $STEP" $EMAIL  <  trusting.mail 
    149171  fi 
    150172 
     
    152174} 
    153175 
    154 print_step() { STEP=$1; export STEP && printf "Step.....\n$TEP\n"; } 
     176print_step() { 
     177    [ ! -z "$1" ] && STEP=$1 
     178    export STEP && printf "Step.....\n$STEP\n" 
     179} 
    155180 
    156181get_out() { 
     
    162187   touch mesg_08_inputfiles_${CFG_USER}_${CFG_ARCH}.txt mesg_09_realcputime_${CFG_USER}_${CFG_ARCH}.txt \ 
    163188         mesg_10_comments_${CFG_USER}_${CFG_ARCH}.txt 
    164    #tar -czf ${REFE_DIR}/trusting_${DATE_STR}_${CFG_USER}_${CFG_ARCH}.tar.gz * 
     189   #tar -czf ${REFE_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.tar.gz * 
    165190    fi 
    166191 
     
    170195    exit 1 
    171196} 
     197 
     198job_pending() { 
     199    time_elapsed=0; time_increment=30 
     200    sleep ${time_increment} 
     201 
     202    while [[ $( ${JOB_LIST} | grep ${JOB_ID} ) && ${time_elapsed} -lt ${TIME_LIMI} ]]; do 
     203   printf "\n####################################################" >> computation.log 
     204   ${JOB_VIEW} ${JOB_ID}                                           >> computation.log 
     205   sleep ${time_increment} 
     206   let time_elapsed+=${time_increment} 
     207    done 
     208 
     209    sleep ${time_increment} 
     210 
     211    if [ ${time_elapsed} -eq ${TIME_LIMI} ]; then 
     212   STEP='Exceeded time limit' 
     213 
     214   if [ $( ${JOB_LIST} | grep ${JOB_ID} ) ]; then 
     215       STEP='Job not finished on time: '$(( ${TIME_LIMI}/3600 ))'h' 
     216   fi 
     217 
     218   print_step 
     219   ${JOB_DELE} ${JOB_ID} &> /dev/null 
     220   get_out $STATUS $STEP 
     221    fi 
     222} 
Note: See TracChangeset for help on using the changeset viewer.