#!/bin/bash
## Messenger filenames
file_date=mesg_01_date.txt ; file_rslt=mesg_02_result.txt
file_stat=mesg_03_status.txt; file_nemo=mesg_04_nemo.txt
file_xios=mesg_05_xios.txt ; file_cmpf=mesg_06_compiler.txt
file_lmpi=mesg_07_mpi.txt ; file_ncdf=mesg_08_netcdf.txt
file_inpt=mesg_09_inputs.txt; file_time=mesg_10_time.txt
file_memy=mesg_11_memory.txt; file_note=mesg_12_comments.txt
## Functions in order of use
print_step() {
local char_nb=$( echo "$1" | wc -c )
local outline=$( printf "%${char_nb}s" )
printf "\nStep.....\n%s\n%s\n" "$1" ${outline// /-}
}
init_files() {
echo 'Date' > ${file_date}
echo 'Result' > ${file_rslt}
echo 'Status' > ${file_stat}
echo 'NEMOGCM rev.' > ${file_nemo}
echo 'XIOS rev.' > ${file_xios}
echo 'Fortran compiler' > ${file_cmpf}
echo 'MPI libs' > ${file_lmpi}
echo 'NetCDF libs' > ${file_ncdf}
echo 'Input files' > ${file_inpt}
echo 'Elapsed time' > ${file_time}
echo 'Memory (Phy./Virt.)' > ${file_memy}
echo 'Comments' > ${file_note}
## 'Failed' status with 'Unknown error' by default
echo ${TRUST_FLAG_RESULT} \
>> ${file_rslt}
echo 'Unknown error' \
>> ${file_stat}
}
get_date() {
## UTC time zone for timestamping
local dat=$( date -ud "${TRUST_TEST_DATE}" +"%F %R %Z" )
echo $dat \
>> ${file_date}
}
get_nemo_rev() {
local dir rev_loc
local rev=0
## Loop on essential NEMO directories
for dir in ${TRUST_SVN_CO} ${TRUST_IO_XIOS}; do
## For time being, just get revision from XIOS with no action on directory
if [ $dir == ${TRUST_IO_XIOS} ]; then
rev_loc=$( svn info $dir | awk '/Last Changed Rev/ {print $NF}' )
echo 'XIOS '${rev_loc} \
>> model.log
echo "${rev_loc}"
>> ${file_xios}
continue
fi
echo $dir && ${TRUST_SVN_ACTION} ${TRUST_DIR_NEMOGCM}/$dir
rev_loc=$( svn info ${TRUST_DIR_NEMOGCM}/$dir \
| awk '/Last Changed Rev/ {print $NF}' )
## Keep last rev. nb
[ ${rev_loc} -gt $rev ] && rev=${rev_loc}
done
echo 'NEMOGCM '$rev \
>> model.log
echo "$rev"
>> ${file_nemo}
}
get_soft_rel() {
local ver str
## Sourcing environment
. ${TRUST_JOB_ENV}
for str in ${TRUST_COMPILE_FORTRAN} \
${TRUST_COMPILE_MPI} ${TRUST_COMPILE_NETCDF} \
${TRUST_IO_CDO} ; do
[ -z "$str" ] && continue
ver=''
## Extract version number after searching pattern in PATH env. variable
ver=$( echo $PATH | sed "s|.*\($str[0-9.]*\).*|\1|" )
## option --version would work for main Fortran compilers and CDO
if [[ $str =~ ${TRUST_COMPILE_FORTRAN}|${TRUST_IO_CDO} ]]; then
ver=$( $str --version 2>&1 | grep -m1 -oe '\<[0-9. ]*\>' \
| xargs echo $str )
fi
## Cleaning characters string to display proper soft name
#str=$( echo $str | sed 's|[/-]||g' )
ver=$( echo $ver | sed 's|[/-]| |g' )
echo $ver \
>> model.log
done
sed -n 3p model.log \
>> ${file_cmpf}
sed -n 4p model.log \
>> ${file_lmpi}
sed -n 5p model.log \
>> ${file_ncdf}
}
get_inputs() {
# List archive content & extract it by default
local inputs_list=$( eval "
for archive in ${TRUST_IO_FORC_TAR}; do
tar -tvf ${TRUST_IO_FORC_PATH}/\$archive >> inputs_list.txt;
done
" )
local inputs_get=$( eval "
for archive in ${TRUST_IO_FORC_TAR}; do
tar -vxf ${TRUST_IO_FORC_PATH}/\$archive > /dev/null;
done
" )
## List & copy files without archive
if [ -z "${TRUST_IO_FORC_TAR}" ]; then
inputs_list=" ls -lh ${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
inputs_get=" \cp ${TRUST_IO_FORC_PATH}/* . "
fi
${inputs_list}; ${inputs_get}
# for entry in ${TRUST_IO_FORC_PATH}; do
#
# If path to file (assuming it is an archive)
# if [ -e $entry ]; then
# tar -tvf $entry >> inputs_list.txt;
# tar -vxf $entry > /dev/null;
# If path to directory
# elif [ -d $entry ]; then
# inputs_list=" ls -lh ${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
# inputs_get=" \cp ${TRUST_IO_FORC_PATH}/* . "
# fi
# done
if [ $( find -name '*.gz' -print -quit ) ]; then
find . -name '*.gz' -exec gzip -d {} \;
fi
}
diff_inputs() {
local dif file
local files_list='' mesg='Same'
###################################
## Think of copying initial test ##
###################################
## Simple diff
for file in cpp_* 'inputs_list.txt' *namelist_* *.xml; do
dif=''
## Continue even if input file is not in here (see after)
if [ -e ${TRUST_TEST_BENCHMARK}/$file ]; then
dif=$( diff -q $file ${TRUST_TEST_BENCHMARK}/$file )
else
dif=0
fi
## Pass over useless file omission in benckmark directory
if [[ -n "$dif" && "$dif" != '0' ]]; then
mesg='Different'
echo $dif
files_list+=$file' '
fi
done
[ $mesg == 'Same' ] && echo $mesg
echo $mesg \
>> ${file_inpt}
## List different files for web comment
[ -n "${files_list}" ] && echo 'Inputs : '${files_list}'differ
' \
>> temp_${file_note}
}
job_pending() {
local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
sleep ${time_increment}
## Append a log file while pending
while [[ $( eval ${TRUST_JOB_STATE} ) \
&& ${time_elapsed} -lt ${TRUST_JOB_TIMEOUT} ]]; do
printf "\n%s\n" ${outline// /#} \
>> computation.log
[ -n "${TRUST_JOB_INFO}" ] && eval ${TRUST_JOB_INFO} \
>> computation.log
sleep ${time_increment}
time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
done
sleep ${time_increment}
## Kill remaining job & stop the test if it's too long
if [ ${time_elapsed} -eq ${TRUST_JOB_TIMEOUT} ]; then
eval ${TRUST_JOB_KILL} &> /dev/null
get_out 6
fi
}
diff_results() {
local file
local files_list='' mesg='Same'
###################################
## Think of copying initial test ##
###################################
## Simple diff
for file in 'ocean.output' *.stat; do
## Stop if no minimal benchmark files (ocean.output, eventual stat files)
[ ! -e ${TRUST_TEST_BENCHMARK}/$file ] && get_out 7
diff -q $file ${TRUST_TEST_BENCHMARK}/$file
## Continue even if it differs
if [ $? -ne 0 ]; then mesg='Different'; files_list+=$file' '; fi
done
[ $mesg == 'Same' ] && echo $mesg
## List different files for web comment
[ -n "${files_list}" ] && echo 'Results : '${files_list}'differ
' \
>> temp_${file_note}
}
diff_restarts() {
local dif filebase filebases ndomain out
local files_list='' dif_sum='0' #bcmk='false'
## Find all restart files to rebuild
if [ $( find -regex ".*_restart.*[0-9]\.nc" -print -quit ) ]; then
###############################################################
## Think to set the configuration name in the 'namelist_cfg' ##
###############################################################
filebases=$( find -regextype sed -regex ".*${TRUST_CFG_NEW}.*_[0-9]\{4\}\.nc" \
| sed 's/\(.*\)_.*/\1/' | sort -u )
for filebase in $filebases; do
ndomain=$( find -regex ".*${filebase}_[0-9]*.nc" \
| wc -l | awk '{print $1}' )
[ $ndomain -eq 0 ] && get_out X
#####################################################
## Handle 2 possibilities of 'rebuild_nemo' origin ##
#####################################################
${TRUST_DIR_NEMOGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo \
-t ${TRUST_COMPILE_NPROC} $filebase $ndomain \
> /dev/null
## Possibility of remaining decomposed restarts (even after rebuild)
if [ $? -eq 0 ]; then
rm -f ${filebase}_[0-9]*.nc \
> /dev/null
else
get_out X
fi
## Stop if no benchmark files (restart file)
if [ -e ${TRUST_TEST_BENCHMARK}/$filebase.nc ]; then
#bcmk='true'
cdo diffn $filebase.nc ${TRUST_TEST_BENCHMARK}/$filebase.nc \
> cdo_diff.out 2> /dev/null
## Identical if cdo_diff.out is zero size
[ ! -s cdo_diff.out ] && continue
dif=$( grep -om1 '[0-9]* of [0-9]* records' cdo_diff.out )
if [ -n "$dif" ]; then
files_list+=$filebase' ' && echo $filebase'.nc: '$dif
let dif_sum+=$( echo $dif | sed '|^\([0-9]*\).*|\1|' )
fi
fi
done
## List modified restart(s) for web comment with sum of differences
if [ ${dif_sum} -ne 0 ]; then
echo 'Restarts: '${files_list}${dif_sum}' record(s) differ
' \
>> temp_${file_note}
else
echo 'Same'
fi
fi
}
get_time() {
[ -z "${TRUST_JOB_TIME}" ] && return
## Interest for checking unusual time computation
local time_cpu=$( eval ${TRUST_JOB_TIME} )
printf "Elapsed time: "
echo ${time_cpu} | tee -a ${file_time}
}
get_memy() {
[[ -z "${TRUST_JOB_RAM_P}" && -z "${TRUST_JOB_RAM_V}" ]] && return
## Interest for checking unusual memory usage
local memory_pmax=$( eval ${TRUST_JOB_RAM_P} )
local memory_vmax=$( eval ${TRUST_JOB_RAM_V} )
printf "Memory max usage (physical/virtual): "
echo ${memory_pmax}' / '${memory_vmax} | tee -a ${file_memy}
}
comments() {
local opat
local line='' state=$1
if [ -e ocean.output ]; then
## 'W A R N I N G' pattern by default
opat="-A2 \"^ $state\""
[ "$state" == 'E R R O R' ] && opat="-A4 \"$state\""
## Select first occurence for web comment
line=$( eval grep -m1 $opat ocean.output | tr -d '\n' )
fi
[ -n "$line" ] && ( echo $line; printf "$line
" \
>> temp_${file_note} )
}
log_make() {
## Format comments for web
if [ -e temp_${file_note} ]; then
cat temp_${file_note} | tr -d '\n' | sed 's/
$//' \
>> ${file_note}
fi
## Construct txt file with all messenger files
paste -d ';' mesg_*.txt | tee ${TRUST_TEST_SUMMARY}
}
prod_publish() {
local cmd
local rev=$( awk '/NEMOGCM/ {print $NF}' model.log )
## Production mode (-p|--prod)
if [ ${TRUST_FLAG_PROD} -eq 1 ]; then
## Create or append trusting logfile
if [ -f ${TRUST_TEST_LOG} ]; then cmd='tail -1'; else cmd='cat'; fi
$cmd ${TRUST_TEST_SUMMARY} \
>> ${TRUST_TEST_LOG}
## Send mail only when FAILED
if [[ ! -z "${TRUST_TEST_MAILING}" \
&& ${TRUST_FLAG_RESULT} == 'FAILED' ]]; then
## Content
cat < trusting.mail
Dear all,
The following trusting sequence has not completed successfully:
Testing configuration ${TRUST_CFG_NEW} based on ${TRUST_CFG_REF}.
User installation ${TRUST_MAIN_USER}
HPC environment ${TRUST_MAIN_HPCC}
Here is the running environment summary:
`cat model.log`
For more details, look into the testing folder at:
${TRUST_TEST_DIR}
An archive is also available to share the questionable configuration:
${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}
END_MAIL
## Send with detailed subject
mail -s "[NEMO Trusting][${TRUST_CFG_REF}][${TRUST_SVN_BRANCH}] \
${TRUST_FLAG_RESULT} ${TRUST_FLAG_ERROR}" \
${TRUST_TEST_MAILING} \
< trusting.mail
fi
fi
}
get_out() {
local time_step=0
TRUST_FLAG_ERROR=$1
printf "\n\nEnd of test\n"
## In case of compilation error
cd ${TRUST_TEST_DIR}
if [ ${TRUST_FLAG_RESULT} == 'FAILED' ]; then
echo 'Failure'
## Error identification
case ${TRUST_FLAG_ERROR} in
## Compilation
'1') TRUST_FLAG_ERROR='XIOS compilation failed' ;;
'2') TRUST_FLAG_ERROR='NEMO compilation failed' ;;
## Submission
'3') TRUST_FLAG_ERROR='Missing input files' ;;
'4') TRUST_FLAG_ERROR='Job submission error' ;;
## Computing
'5') TRUST_FLAG_ERROR='Crashed at time step' ;;
'6') TRUST_FLAG_ERROR='Exceeded time limit' ;;
## Results
'7') TRUST_FLAG_ERROR='Missing previous outputs';;
'8') TRUST_FLAG_ERROR='New outputs differ' ;;
## Other
'*') TRUST_FLAG_ERROR='Unknown error' ;;
esac
else
echo 'Success' && TRUST_FLAG_ERROR='Code is reliable'
fi
## Eventual comments from ocean.output
if [ "${TRUST_FLAG_ERROR}" == 'Crashed at time step' ]; then
comments 'E R R O R'
[ -e time.step ] && time_step=$( cat time.step )
TRUST_FLAG_ERROR+=' '$time_step
else
comments 'W A R N I N G'
if [ "${TRUST_FLAG_ERROR}" == 'Exceeded time limit' ]; then
TRUST_FLAG_ERROR+=' '$(( ${TRUST_JOB_TIMEOUT}/3600 ))'h'
fi
fi
## Last messenger files
sed -i "2 s/.*/$TRUST_RESULT/" ${file_rslt}
sed -i "2 s/.*/$TRUST_FLAG_ERROR/" ${file_stat}
## Save tested configuration if trusting failed in production mode (-p|--prod)
if [[ ${TRUST_FLAG_RESULT} == 'FAILED' && ${TRUST_FLAG_PROD} -eq 1 ]]; then
echo 'Creating archive '${TRUST_TEST_BACKUP}' under '${TRUST_TEST_BENCHMARK}
tar -czf ${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP} * \
-C ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/MY_SRC . \
-C ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW} \
cpp_${TRUST_CFG_NEW}.fcm
fi
## Logfile construct & eventual sending of notification email
printf "\nTrusting digest:\n----------------\n"
log_make
prod_publish
exit 0
}