#!/bin/bash ## Messenger filenames file_date=mesg_01_date.txt ; file_rslt=mesg_02_result.txt file_stat=mesg_03_status.txt; file_nemo=mesg_04_nemo.txt file_xios=mesg_05_xios.txt ; file_cmpf=mesg_06_compiler.txt file_lmpi=mesg_07_mpi.txt ; file_ncdf=mesg_08_netcdf.txt file_inpt=mesg_09_inputs.txt; file_time=mesg_10_time.txt file_memy=mesg_11_memory.txt; file_note=mesg_12_comments.txt ## Functions in order of use print_step() { local char_nb=$( echo "$1" | wc -c ) local outline=$( printf "%${char_nb}s" ) printf "\nStep.....\n%s\n%s\n" "$1" ${outline// /-} } init_files() { echo 'Date' > ${file_date} echo 'Result' > ${file_rslt} echo 'Status' > ${file_stat} echo 'NEMOGCM rev.' > ${file_nemo} echo 'XIOS rev.' > ${file_xios} echo 'Fortran compiler' > ${file_cmpf} echo 'MPI libs' > ${file_lmpi} echo 'NetCDF libs' > ${file_ncdf} echo 'Input files' > ${file_inpt} echo 'Elapsed time' > ${file_time} echo 'Memory (Phy./Virt.)' > ${file_memy} echo 'Comments' > ${file_note} ## 'Failed' status with 'Unknown error' by default echo ${TRUST_FLAG_RESULT} \ >> ${file_rslt} echo 'Unknown error' \ >> ${file_stat} } get_date() { ## UTC time zone for timestamping local dat=$( date -ud "${TRUST_TEST_DATE}" +"%F %R %Z" ) echo $dat \ >> ${file_date} } get_nemo_rev() { local dir rev_loc local rev=0 ## Loop on essential NEMO directories for dir in ${TRUST_SVN_CO} ${TRUST_IO_XIOS}; do ## For time being, just get revision from XIOS with no action on directory if [ $dir == ${TRUST_IO_XIOS} ]; then rev_loc=$( svn info $dir | awk '/Last Changed Rev/ {print $NF}' ) echo 'XIOS '${rev_loc} \ >> model.log echo "${rev_loc}" >> ${file_xios} continue fi echo $dir && ${TRUST_SVN_ACTION} ${TRUST_DIR_NEMOGCM}/$dir rev_loc=$( svn info ${TRUST_DIR_NEMOGCM}/$dir \ | awk '/Last Changed Rev/ {print $NF}' ) ## Keep last rev. nb [ ${rev_loc} -gt $rev ] && rev=${rev_loc} done echo 'NEMOGCM '$rev \ >> model.log echo "$rev" >> ${file_nemo} } get_soft_rel() { local ver str ## Sourcing environment . ${TRUST_JOB_ENV} for str in ${TRUST_COMPILE_FORTRAN} \ ${TRUST_COMPILE_MPI} ${TRUST_COMPILE_NETCDF} \ ${TRUST_IO_CDO} ; do [ -z "$str" ] && continue ver='' ## Extract version number after searching pattern in PATH env. variable ver=$( echo $PATH | sed "s|.*\($str[0-9.]*\).*|\1|" ) ## option --version would work for main Fortran compilers and CDO if [[ $str =~ ${TRUST_COMPILE_FORTRAN}|${TRUST_IO_CDO} ]]; then ver=$( $str --version 2>&1 | grep -m1 -oe '\<[0-9. ]*\>' \ | xargs echo $str ) fi ## Cleaning characters string to display proper soft name #str=$( echo $str | sed 's|[/-]||g' ) ver=$( echo $ver | sed 's|[/-]| |g' ) echo $ver \ >> model.log done sed -n 3p model.log \ >> ${file_cmpf} sed -n 4p model.log \ >> ${file_lmpi} sed -n 5p model.log \ >> ${file_ncdf} } get_inputs() { # List archive content & extract it by default local inputs_list=$( eval " for archive in ${TRUST_IO_FORC_TAR}; do tar -tvf ${TRUST_IO_FORC_PATH}/\$archive >> inputs_list.txt; done " ) local inputs_get=$( eval " for archive in ${TRUST_IO_FORC_TAR}; do tar -vxf ${TRUST_IO_FORC_PATH}/\$archive > /dev/null; done " ) ## List & copy files without archive if [ -z "${TRUST_IO_FORC_TAR}" ]; then inputs_list=" ls -lh ${TRUST_IO_FORC_PATH}/* >> inputs_list.txt" inputs_get=" \cp ${TRUST_IO_FORC_PATH}/* . " fi ${inputs_list}; ${inputs_get} # for entry in ${TRUST_IO_FORC_PATH}; do # # If path to file (assuming it is an archive) # if [ -e $entry ]; then # tar -tvf $entry >> inputs_list.txt; # tar -vxf $entry > /dev/null; # If path to directory # elif [ -d $entry ]; then # inputs_list=" ls -lh ${TRUST_IO_FORC_PATH}/* >> inputs_list.txt" # inputs_get=" \cp ${TRUST_IO_FORC_PATH}/* . " # fi # done if [ $( find -name '*.gz' -print -quit ) ]; then find . -name '*.gz' -exec gzip -d {} \; fi } diff_inputs() { local dif file local files_list='' mesg='Same' ################################### ## Think of copying initial test ## ################################### ## Simple diff for file in cpp_* 'inputs_list.txt' *namelist_* *.xml; do dif='' ## Continue even if input file is not in here (see after) if [ -e ${TRUST_TEST_BENCHMARK}/$file ]; then dif=$( diff -q $file ${TRUST_TEST_BENCHMARK}/$file ) else dif=0 fi ## Pass over useless file omission in benckmark directory if [[ -n "$dif" && "$dif" != '0' ]]; then mesg='Different' echo $dif files_list+=$file' ' fi done [ $mesg == 'Same' ] && echo $mesg echo $mesg \ >> ${file_inpt} ## List different files for web comment [ -n "${files_list}" ] && echo 'Inputs : '${files_list}'differ
' \ >> temp_${file_note} } job_pending() { local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30 sleep ${time_increment} ## Append a log file while pending while [[ $( eval ${TRUST_JOB_STATE} ) \ && ${time_elapsed} -lt ${TRUST_JOB_TIMEOUT} ]]; do printf "\n%s\n" ${outline// /#} \ >> computation.log [ -n "${TRUST_JOB_INFO}" ] && eval ${TRUST_JOB_INFO} \ >> computation.log sleep ${time_increment} time_elapsed=$(( ${time_elapsed} + ${time_increment} )) done sleep ${time_increment} ## Kill remaining job & stop the test if it's too long if [ ${time_elapsed} -eq ${TRUST_JOB_TIMEOUT} ]; then eval ${TRUST_JOB_KILL} &> /dev/null get_out 6 fi } diff_results() { local file local files_list='' mesg='Same' ################################### ## Think of copying initial test ## ################################### ## Simple diff for file in 'ocean.output' *.stat; do ## Stop if no minimal benchmark files (ocean.output, eventual stat files) [ ! -e ${TRUST_TEST_BENCHMARK}/$file ] && get_out 7 diff -q $file ${TRUST_TEST_BENCHMARK}/$file ## Continue even if it differs if [ $? -ne 0 ]; then mesg='Different'; files_list+=$file' '; fi done [ $mesg == 'Same' ] && echo $mesg ## List different files for web comment [ -n "${files_list}" ] && echo 'Results : '${files_list}'differ
' \ >> temp_${file_note} } diff_restarts() { local dif filebase filebases ndomain out local files_list='' dif_sum='0' #bcmk='false' ## Find all restart files to rebuild if [ $( find -regex ".*_restart.*[0-9]\.nc" -print -quit ) ]; then ############################################################### ## Think to set the configuration name in the 'namelist_cfg' ## ############################################################### filebases=$( find -regextype sed -regex ".*${TRUST_CFG_NEW}.*_[0-9]\{4\}\.nc" \ | sed 's/\(.*\)_.*/\1/' | sort -u ) for filebase in $filebases; do ndomain=$( find -regex ".*${filebase}_[0-9]*.nc" \ | wc -l | awk '{print $1}' ) [ $ndomain -eq 0 ] && get_out X ##################################################### ## Handle 2 possibilities of 'rebuild_nemo' origin ## ##################################################### ${TRUST_DIR_NEMOGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo \ -t ${TRUST_COMPILE_NPROC} $filebase $ndomain \ > /dev/null ## Possibility of remaining decomposed restarts (even after rebuild) if [ $? -eq 0 ]; then rm -f ${filebase}_[0-9]*.nc \ > /dev/null else get_out X fi ## Stop if no benchmark files (restart file) if [ -e ${TRUST_TEST_BENCHMARK}/$filebase.nc ]; then #bcmk='true' cdo diffn $filebase.nc ${TRUST_TEST_BENCHMARK}/$filebase.nc \ > cdo_diff.out 2> /dev/null ## Identical if cdo_diff.out is zero size [ ! -s cdo_diff.out ] && continue dif=$( grep -om1 '[0-9]* of [0-9]* records' cdo_diff.out ) if [ -n "$dif" ]; then files_list+=$filebase' ' && echo $filebase'.nc: '$dif let dif_sum+=$( echo $dif | sed '|^\([0-9]*\).*|\1|' ) fi fi done ## List modified restart(s) for web comment with sum of differences if [ ${dif_sum} -ne 0 ]; then echo 'Restarts: '${files_list}${dif_sum}' record(s) differ
' \ >> temp_${file_note} else echo 'Same' fi fi } get_time() { [ -z "${TRUST_JOB_TIME}" ] && return ## Interest for checking unusual time computation local time_cpu=$( eval ${TRUST_JOB_TIME} ) printf "Elapsed time: " echo ${time_cpu} | tee -a ${file_time} } get_memy() { [[ -z "${TRUST_JOB_RAM_P}" && -z "${TRUST_JOB_RAM_V}" ]] && return ## Interest for checking unusual memory usage local memory_pmax=$( eval ${TRUST_JOB_RAM_P} ) local memory_vmax=$( eval ${TRUST_JOB_RAM_V} ) printf "Memory max usage (physical/virtual): " echo ${memory_pmax}' / '${memory_vmax} | tee -a ${file_memy} } comments() { local opat local line='' state=$1 if [ -e ocean.output ]; then ## 'W A R N I N G' pattern by default opat="-A2 \"^ $state\"" [ "$state" == 'E R R O R' ] && opat="-A4 \"$state\"" ## Select first occurence for web comment line=$( eval grep -m1 $opat ocean.output | tr -d '\n' ) fi [ -n "$line" ] && ( echo $line; printf "$line
" \ >> temp_${file_note} ) } log_make() { ## Format comments for web if [ -e temp_${file_note} ]; then cat temp_${file_note} | tr -d '\n' | sed 's/
$//' \ >> ${file_note} fi ## Construct txt file with all messenger files paste -d ';' mesg_*.txt | tee ${TRUST_TEST_SUMMARY} } prod_publish() { local cmd local rev=$( awk '/NEMOGCM/ {print $NF}' model.log ) ## Production mode (-p|--prod) if [ ${TRUST_FLAG_PROD} -eq 1 ]; then ## Create or append trusting logfile if [ -f ${TRUST_TEST_LOG} ]; then cmd='tail -1'; else cmd='cat'; fi $cmd ${TRUST_TEST_SUMMARY} \ >> ${TRUST_TEST_LOG} ## Send mail only when FAILED if [[ ! -z "${TRUST_TEST_MAILING}" \ && ${TRUST_FLAG_RESULT} == 'FAILED' ]]; then ## Content cat < trusting.mail Dear all, The following trusting sequence has not completed successfully: Testing configuration ${TRUST_CFG_NEW} based on ${TRUST_CFG_REF}. User installation ${TRUST_MAIN_USER} HPC environment ${TRUST_MAIN_HPCC} Here is the running environment summary: `cat model.log` For more details, look into the testing folder at: ${TRUST_TEST_DIR} An archive is also available to share the questionable configuration: ${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP} END_MAIL ## Send with detailed subject mail -s "[NEMO Trusting][${TRUST_CFG_REF}][${TRUST_SVN_BRANCH}] \ ${TRUST_FLAG_RESULT} ${TRUST_FLAG_ERROR}" \ ${TRUST_TEST_MAILING} \ < trusting.mail fi fi } get_out() { local time_step=0 TRUST_FLAG_ERROR=$1 printf "\n\nEnd of test\n" ## In case of compilation error cd ${TRUST_TEST_DIR} if [ ${TRUST_FLAG_RESULT} == 'FAILED' ]; then echo 'Failure' ## Error identification case ${TRUST_FLAG_ERROR} in ## Compilation '1') TRUST_FLAG_ERROR='XIOS compilation failed' ;; '2') TRUST_FLAG_ERROR='NEMO compilation failed' ;; ## Submission '3') TRUST_FLAG_ERROR='Missing input files' ;; '4') TRUST_FLAG_ERROR='Job submission error' ;; ## Computing '5') TRUST_FLAG_ERROR='Crashed at time step' ;; '6') TRUST_FLAG_ERROR='Exceeded time limit' ;; ## Results '7') TRUST_FLAG_ERROR='Missing previous outputs';; '8') TRUST_FLAG_ERROR='New outputs differ' ;; ## Other '*') TRUST_FLAG_ERROR='Unknown error' ;; esac else echo 'Success' && TRUST_FLAG_ERROR='Code is reliable' fi ## Eventual comments from ocean.output if [ "${TRUST_FLAG_ERROR}" == 'Crashed at time step' ]; then comments 'E R R O R' [ -e time.step ] && time_step=$( cat time.step ) TRUST_FLAG_ERROR+=' '$time_step else comments 'W A R N I N G' if [ "${TRUST_FLAG_ERROR}" == 'Exceeded time limit' ]; then TRUST_FLAG_ERROR+=' '$(( ${TRUST_JOB_TIMEOUT}/3600 ))'h' fi fi ## Last messenger files sed -i "2 s/.*/$TRUST_RESULT/" ${file_rslt} sed -i "2 s/.*/$TRUST_FLAG_ERROR/" ${file_stat} ## Save tested configuration if trusting failed in production mode (-p|--prod) if [[ ${TRUST_FLAG_RESULT} == 'FAILED' && ${TRUST_FLAG_PROD} -eq 1 ]]; then echo 'Creating archive '${TRUST_TEST_BACKUP}' under '${TRUST_TEST_BENCHMARK} tar -czf ${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP} * \ -C ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/MY_SRC . \ -C ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW} \ cpp_${TRUST_CFG_NEW}.fcm fi ## Logfile construct & eventual sending of notification email printf "\nTrusting digest:\n----------------\n" log_make prod_publish exit 0 }