Changeset 5472


Ignore:
Timestamp:
2015-06-24T15:12:16+02:00 (5 years ago)
Author:
nicolasmartin
Message:

dev_r5092_CNRS_SETTE Ping pong updates between Curie & Ada to finalize

Location:
branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/config/X64_CURIE.cfg

    r5402 r5472  
    88 
    99# Job commands: ccc_* 
    10 JOB_SUBM=ccc_msub 
     10JOB_SUBM="ccc_msub ${NEMO_TRUS}/batch/${CFG_ARCH}.sh | awk '{print \$NF}'" 
    1111JOB_LIST=ccc_mpp 
    1212JOB_VIEW=ccc_macct 
     13JOB_RCPU="${JOB_VIEW} ${JOB_ID} | awk 'END{print \$9}'" 
    1314JOB_DELE=ccc_mdel                           
    1415 
  • branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/config/arch_template.cfg

    r5464 r5472  
    1 # Copy to ./$ARCH.cfg, fill in according to your installation 
     1# Copy to ./${CFG_ARCH}.cfg, fill in according to your installation 
    22 
    33# Super-computer configuration 
    44 
    55# Available softwares: 
    6 CDO=''                    # CDO (Climate Data Operators) 
    7                                            # (root directory name in $LOADEDMODULES or $PATH) 
    8 COMPILER=''               # ifort/gfortran/pgfortran/... 
    9                                            # (to get compiler release with command `$COMPILER --version`) 
    10 MPI=''                    # MPI    libraries 
    11                                            # ( ""     " "     ""  ""       ""       ""  " " ) 
    12 NETCDF=''                 # NetCDF    " "    
    13                                            # ( ""     " "     ""  ""       ""       ""  " " ) 
     6CDO=''                    # CDO (Climate Data Operators) (directory name in $LOADEDMODULES or $PATH) 
     7                                             # ex: 'cdo'/'nco'/... 
     8COMPILER=''               # Fortran compiler (to get compiler release with `$COMPILER --version`) 
     9                                             # ex: 'ifort'/'gfortran'/'pgfortran'/... 
     10MPI=''                    # MPI    libraries             (directory name in $LOADEDMODULES or $PATH) 
     11                                             # ex: 'opempi'/'mpich'/... 
     12NETCDF=''                 # NetCDF    " "                (   " "     ""  ""       ""       ""  " " ) 
     13                                             # ex: 'netcdf42'/'netcdf-parallel'/... 
    1414 
    1515# Job commands (works with job ID): 
    1616JOB_SUBM=''               # Submit a job && get his ID in return 
    17 JOB_LIST=''                  # List all submitted jobs to search for job ID with `grep` 
    18 JOB_VIEW=''               # Get job informations (to fill in a log during computing) 
    19 JOB_DELE=''               # Kill a job (in case of crash or time limit reaching) 
     17                                             # ex: "cmd ${NEMO_TRUS}/batch/${CFG_ARCH}.sh | awk '{print \$NF}'" 
     18JOB_LIST=''                  # List all submitted jobs (to determine by `grep` if a job completed or not) 
     19JOB_INFO=''               # Get job informations          with job ID as argument (filling a computation log) 
     20JOB_TIME=''                               # Get real CPU time computation  ""   "  "" ""    ""    
     21                                             # ex: "${JOB_VIEW} ${JOB_ID} | awk 'END{print \$9}'" 
     22JOB_DELE=''               # Kill submitted job             ""   "  "" ""    ""    
    2023 
    2124# Compiling & computing settings 
    2225[ -z "$NPROC"       ] && NPROC=1          # Processing units for compilation 
    23 [ -z "${TIME_LIMI}" ] && TIME_LIMI=''     # Utmost time in seconds to wait for finished job 
    24                                            # (pending & running) 
     26[ -z "${TIME_LIMI}" ] && TIME_LIMI=''     # Utmost time in seconds to wait for completed job 
    2527 
    2628# Architecture environment (be careful with compliance with XIOS) 
  • branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/config/user_template.cfg

    r5402 r5472  
    1 # Copy to ./user.cfg, fill in according to your installation 
     1# Copy to ./${CFG_USER}.cfg, fill in according to your installation 
    22 
    33# User configuration 
    44 
    5 # Global Directories: 
    6 DIR_WORK=''               # Directory for compilation 
    7 DIR_SCRA=''                  #    " "    " " computation to increase perfs on some super-computers 
    8                           # (will copy into it all requested forcing input & ".*(cfg|opa|ref|xml)" files) 
    9 DIR_STOR=''                  #    " "     for comparison (ASCII output & restart files of reference config) 
    105 
    116# NEMO overall configuration: 
    12 [ -z "${NEMO_BRAN}" ] && NEMO_BRAN=''     # trunk, dev_v3_4_STABLE_2012, ... 
    13 [ -z "${CONF_REFE}" ] && REFE_CONF=''     # ORCA2_LIM_PISCES, GYRE, ... 
    14 [ -z "${CONF_TEST}" ] && TEST_CONF=''     # ${CONF_REFE}_trust, ... 
    15 NEMO_HOME=''                              # NEMOGCM equivalent root directory to find ./ARCH,./CONFIG, ... 
    16                                           # (${DIR_WORK}/NEMO/${NEMO_BRAN}/NEMOGCM, ...) 
     7[ -z "${NEMO_BRAN}" ] && NEMO_BRAN=''        # ex: 'trunk', 'dev_v3_4_STABLE_2012', ... 
     8NEMO_HOME=''                              # NEMOGCM directory for computation (./ARCH, ./CONFIG, ...) 
     9                                             # ex: ".../NEMO/${NEMO_BRAN}/NEMOGCM", ... 
     10[ -z "${CONF_REFE}" ] && REFE_CONF=''        # ex: 'ORCA2_LIM_PISCES', 'GYRE', ... 
     11[ -z "${CONF_TEST}" ] && TEST_CONF=''        # ex: "${CONF_REFE}_trust", ... 
    1712 
    1813# Comparatives directories localization: 
    19 REFE_DIR=''                  # ${DIR_STOR}/${CONF_REFE}/${NEMO_BRAN} 
    20 TEST_DIR=''                  # ${DIR_SCRA}/${CONF_REFE}/${NEMO_BRAN}/trusting_${DATE_STR} 
     14REFE_DIR=''                  # "Standard" folder where previous result files are stored for comparison 
     15                                             # ex: ".../${CONF_REFE}/${NEMO_BRAN}" 
     16TEST_DIR=''                  # Test       folder for computation 
     17                                             # ex: ".../${CONF_REFE}/${NEMO_BRAN}/trusting_${DATE_STR}" 
    2118 
    2219# External softs directories: 
    23 DIR_XIOS=''                  # ${DIR_WORK}/XIOS/xios-1.0 
    24 #DIR_OASI=''                    # ${DIR_WORK}/OASIS         
     20DIR_XIOS=''                     # ex: '.../XIOS/xios-1.0' 
     21#DIR_OASI=''                       # ex: '.../OASIS' 
    2522 
    26 # NEMO inputs (fill in if necessary) 
    27 NEMO_FORC=''                              # Directory with forcing archive to extract from ESGF server 
    28                                           #        or  ""  all inputs files to copy 
    29 FORC_TARF=''                              # Archive name (ORCA2_LIM_nemo_v3.6.tar, ...) 
    30                                           # Leave blank if not requested 
     23# NEMO inputs 
     24NEMO_FORC=''                              #     Directory with forcing archive from DODS/ESGF server to extract 
     25                                           # or    " "     ""  all inputs files to copy into ${TEST_DIR} 
     26FORC_TARF=''                              # Archive name (ex: 'ORCA2_LIM_nemo_v3.6.tar', ...), let blank if not requested 
    3127 
    32 # Mailing list to inform of a failure, -p|--publish option required 
    33 [ -z "$MAIL"] && MAIL='' 
     28# Mailing list to inform of a failure (-p|--publish option required) 
     29[ -z "$EMAIL"] && EMAIL='' 
  • branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting.env

    r5464 r5472  
    99#-------------------------------------- 
    1010. ${NEMO_TRUS}/config/${CFG_USER}.cfg 
    11 export DIR_WORK  DIR_SCRA  DIR_STOR 
    1211export NEMO_HOME NEMO_BRAN 
    1312export REFE_CONF TEST_CONF REFE_DIR TEST_DIR 
     
    2726#-------------------------------------- 
    2827. ${NEMO_TRUS}/config/${CFG_ARCH}.cfg 
    29 export CDO      COMPILER  MPI      NETCDF 
    30 export JOB_SUBM JOB_LIST  JOB_VIEW JOB_DELE 
     28export CDO      COMPILER MPI      NETCDF 
     29export JOB_SUBM JOB_LIST JOB_INFO JOB_TIME JOB_DELE 
    3130export NPROC    TIME_LIMI 
    3231export ARCH_ENV 
  • branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting.sh

    r5465 r5472  
    5555#--------------------------------------------------- 
    5656print_step 'Get softwares release' 
    57 . ${ARCH_ENV} > /dev/null 
     57[ -e ${ARCH_ENV} ] && . ${ARCH_ENV} > /dev/null 
    5858get_soft_rel 
    5959cat model.log 
     
    9191#--------------------------------------------------- 
    9292print_step 'Submitting job' 
    93 JOB_ID=$( ${JOB_SUBM} | awk '{print $NF}' ) 
     93JOB_ID=$( eval ${JOB_SUBM} ) 
    9494[ $? -ne 0 ] && get_out $STATUS $STEP 
    9595print_step 'Holding-Running job' 
     
    9797print_step 'Job finished' 
    9898 
    99 # Eventual crash during computing 
     99# Job state 
    100100#--------------------------------------------------- 
    101 print_step 'Test if job crashed' 
     101print_step 'Test job state' 
    102102if   [[ ! -e ocean.output || $( grep 'E R R O R' ocean.output ) ]]; then 
    103103    comments 'E R R O R' 
    104104    get_out $STATUS $STEP 
     105else 
     106    # Get time computation 
     107    #--------------------------------------------------- 
     108    print_step 'Get real CPU time' 
     109    get_cpu_time 
    105110fi 
    106111 
     
    110115print_step 'Test ASCII output files diff' 
    111116diff_textfiles 
    112 if [ "$STATUS" == 'FAILED' ] && get_out $STATUS $STEP 
     117[ "$STATUS" == 'FAILED' ] && get_out $STATUS $STEP 
    113118 
    114119# Inspect output NetCDF files 
     
    116121print_step 'Test last restart NetCDF files diff' 
    117122diff_restart 
    118 if [ "$STATUS" == 'FAILED' ] && get_out $STATUS $STEP 
    119  
    120 # Get computation duration 
    121 #--------------------------------------------------- 
    122 print_step 'Get real CPU time' 
    123 get_cpu_time 
     123[ "$STATUS" == 'FAILED' ] && get_out $STATUS $STEP 
    124124 
    125125# Get comments (ocean.output & diff model.log) 
  • branches/2015/dev_r5092_CNRS_SETTE/NEMOGCM/TRUST/trusting_func.sh

    r5465 r5472  
    2323} 
    2424 
    25 get_cpu_time() { 
    26     # Portability ! 
    27     real_cpu_time=$( ${JOB_VIEW} ${JOB_ID} | awk 'END{print $9}' ) 
    28     printf "Real CPU time\n${real_cpu_time}\n" | tee mesg_10_realcputime_${CFG_USER}_${CFG_ARCH}.txt 
    29 } 
    30  
    31 get_inputs() { 
    32     if [ ! -z "${FORC_TAR}" ]; then 
    33    cmd_iol="tar -tvf ${NEMO_FORC}/${FORC_TAR}"; cmd_iof="tar -vxf ${NEMO_FORC}/${FORC_TAR}" 
    34     else 
    35    cmd_iol="ls ${NEMO_FORC}/*"                ; cmd_iof="\cp ${NEMO_FORC}/* ." 
    36     fi 
    37  
    38     ${cmd_iol} > inputs_list.txt 
    39     ${cmd_iof} > /dev/null 
    40 } 
    41  
    4225diff_inputs() { 
    4326    mesg='Same' 
     
    5639 
    5740    if [ $( find ${TEST_DIR} -name "*${last_time_step}_restart*.nc" -print -quit ) ]; then 
    58    base_name=$( find ${TEST_DIR} -name "*${last_time_step}_restart*.nc" -print -quit           \ 
    59                 | awk -F/ '{print $NF}' | sed "s/^\(.*\)$last_time_step\_.*/\1$last_time_step/" ) 
     41   base_name=$( find ${TEST_DIR} -name "*${last_time_step}_restart*.nc" -print -quit         \ 
     42                | awk -F/ '{print $NF}' | sed "s/^\(.*\)$last_time_step.*/\1$last_time_step/" ) 
    6043 
    6144   for comp in restart restart_ice restart_trc; do 
     
    6851 
    6952       # UNIX `cmp` not suitable (filename & timestamp in .nc file) 
    70        nc_diff=$( $CDO diffn ${TEST_DIR}/$file'.nc' ${REFE_DIR}/$file'.nc' 2> /dev/null | sed -n '$p' ) 
     53       nc_diff=$( $CDO diffn ${TEST_DIR}/$file'.nc' ${REFE_DIR}/$file'.nc' 2> /dev/null | tail -1 ) 
    7154       if [ ! -z "${nc_diff}" ]; then 
    7255      export STATUS='FAILED' STEP='Restart files different' 
     
    8669} 
    8770 
     71get_cpu_time() { 
     72    real_cpu_time=$( eval ${JOB_TIME} ) 
     73    printf "Real CPU time\n${real_cpu_time}\n" | tee mesg_10_realcputime_${CFG_USER}_${CFG_ARCH}.txt 
     74} 
     75 
     76get_inputs() { 
     77    if [ ! -z "${FORC_TAR}" ]; then 
     78   cmd_iol="tar -tvf ${NEMO_FORC}/${FORC_TAR}"; cmd_iof="tar -vxf ${NEMO_FORC}/${FORC_TAR}" 
     79    else 
     80   cmd_iol="ls ${NEMO_FORC}/*"                ; cmd_iof="\cp ${NEMO_FORC}/* ." 
     81    fi 
     82 
     83    ${cmd_iol} > inputs_list.txt 
     84    ${cmd_iof} > /dev/null 
     85} 
     86 
    8887diff_textfiles() { 
    8988    for file in ocean.output $( ls *.stat ); do 
     
    101100    done 
    102101 
    103     sed -n 4p model.log | sed 'i\Compiler'    > mesg_06_compiler_${CFG_USER}_${CFG_ARCH}.txt 
    104     sed -n 5p model.log | sed 'i\MPI libs'    > mesg_07_mpi_${CFG_USER}_${CFG_ARCH}.txt 
    105     sed -n 6p model.log | sed 'i\NetCDF libs' > mesg_08_netcdf_${CFG_USER}_${CFG_ARCH}.txt 
     102    sed -n 4p model.log | sed 'i\Fortran compiler' > mesg_06_compiler_${CFG_USER}_${CFG_ARCH}.txt 
     103    sed -n 5p model.log | sed 'i\MPI libs'         > mesg_07_mpi_${CFG_USER}_${CFG_ARCH}.txt 
     104    sed -n 6p model.log | sed 'i\NetCDF libs'      > mesg_08_netcdf_${CFG_USER}_${CFG_ARCH}.txt 
    106105} 
    107106 
     
    127126   if [ "$dir" == "${DIR_XIOS}" ]; then 
    128127       rev_loc=$( svn info $dir | awk '(NR == 5) {print $NF}' ) 
    129        echo   'XIOS '${rev_loc}         >> model.log 
    130        printf "XIOS rev.\n${rev_loc}\n" >  mesg_05_xios__${CFG_USER}_${CFG_ARCH}.txt 
     128       echo   'XIOS '${rev_loc} >> model.log 
     129       printf "XIOS rev.\n"     >  mesg_05_xios_${CFG_USER}_${CFG_ARCH}.txt 
     130       printf "<a href=\"https://forge.ipsl.jussieu.fr/ioserver/changeset/${rev_loc}\" target=\"_blank\">${rev_loc}</a>" \ 
     131                                >> mesg_05_xios_${CFG_USER}_${CFG_ARCH}.txt 
    131132       continue 
    132133   fi 
     
    137138    done 
    138139 
    139     echo   'NEMOGCM '$rev         >> model.log 
     140    echo   'NEMOGCM '$rev   >> model.log 
    140141#   printf "NEMOGCM rev.\n$rev\n" >  mesg_04_nemogcm_${CFG_USER}_${CFG_ARCH}.txt 
    141     printf "NEMOGCM rev.\n\ 
    142             <a href=\"https://forge.ipsl.jussieu.fr/nemo/changeset/$rev\" target=\"_blank\">$rev</a>" \ 
    143     > mesg_04_nemogcm_${CFG_USER}_${CFG_ARCH}.txt 
     142    printf "NEMOGCM rev.\n" >  mesg_04_nemogcm_${CFG_USER}_${CFG_ARCH}.txt 
     143    printf "<a href=\"https://forge.ipsl.jussieu.fr/nemo/changeset/$rev\" target=\"_blank\">$rev</a>" \ 
     144                       >> mesg_04_nemogcm_${CFG_USER}_${CFG_ARCH}.txt 
    144145} 
    145146 
     
    153154if [ $PUBLISH -eq 1 ]; then 
    154155 
    155   if [ -f ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt ]; then cmd='sed -n 2p'; else cmd='cat'; fi 
     156  if [ -f ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt ]; then cmd='tail -1'; else cmd='cat'; fi 
    156157 
    157158  $cmd trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt >> ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt 
     
    161162      cat << END_MAIL > trusting.mail 
    162163XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 
     164 
    163165Dear $USER, 
    164166 
    165 The trusting sequence for ${CONF_REF} has failed at step: 
    166 `tail -n 1 ${TEST_DIR}/mesg_03_step_${CFG_USER}_${CFG_ARCH}.txt` 
    167 Directory : ${TEST_DIR} 
     167The trusting sequence for ${REFE_CONF} has failed. 
     168Directory: ${TEST_DIR} 
     169Archive: ${REFE_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.tar.gz 
    168170 
    169171XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 
    170172 
    171173END_MAIL 
     174#`tail -n 1 ${TEST_DIR}/mesg_03_step_${CFG_USER}_${CFG_ARCH}.txt` 
    172175      if [ -e ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt ]; then 
    173176     cat ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt  >> trusting.mail 
    174177      fi 
    175       mail -s "[trusting ${CONF_REF}] $STATUS $STEP" $EMAIL  <  trusting.mail 
     178      mail -s "[trusting ${REFE_CONF}] $STATUS $STEP" $EMAIL  <  trusting.mail 
    176179  fi 
    177180 
     
    188191    if [ "$STATUS" == 'FAILED' ]; then 
    189192   cd ${TEST_DIR} 
    190    touch mesg_09_inputfiles_${CFG_USER}_${CFG_ARCH}.txt mesg_10_realcputime_${CFG_USER}_${CFG_ARCH}.txt \ 
    191          mesg_11_comments_${CFG_USER}_${CFG_ARCH}.txt 
     193   printf "Input files\n\n"   > mesg_09_inputfiles_${CFG_USER}_${CFG_ARCH}.txt 
     194   printf "Real CPU time\n\n" > mesg_10_realcputime_${CFG_USER}_${CFG_ARCH}.txt 
     195   printf "Comments\n\n"      > mesg_11_comments_${CFG_USER}_${CFG_ARCH}.txt 
    192196   #tar -czf ${REFE_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.tar.gz * 
    193197    fi 
     
    207211    while [[ $( ${JOB_LIST} | grep ${JOB_ID} ) && ${time_elapsed} -lt ${TIME_LIMI} ]]; do 
    208212   printf "\n####################################################" >> computation.log 
    209    ${JOB_VIEW} ${JOB_ID}                                           >> computation.log 
     213   ${JOB_INFO} ${JOB_ID}                                           >> computation.log 
    210214   sleep ${time_increment} 
    211215   let time_elapsed+=${time_increment} 
Note: See TracChangeset for help on using the changeset viewer.