#!/bin/bash
##--------------------------------------------------------------------------------
## Messenger filenames
##--------------------------------------------------------------------------------
file_date=mesg_01_date.txt ; file_rslt=mesg_02_result.txt
file_stat=mesg_03_status.txt; file_nemo=mesg_04_nemo.txt
file_xios=mesg_05_xios.txt ; file_cmpf=mesg_06_compiler.txt
file_lmpi=mesg_07_mpi.txt ; file_ncdf=mesg_08_netcdf.txt
file_inpt=mesg_09_inputs.txt; file_time=mesg_10_time.txt
file_memy=mesg_11_memory.txt; file_note=mesg_12_comments.txt
##--------------------------------------------------------------------------------
## Functions in order of use
##--------------------------------------------------------------------------------
step() {
local char_nb=$( echo "$1" | wc -c )
local outline=$( printf "%${char_nb}s" )
printf "\n%s\n%s\n\n" "$1" ${outline// /-}
}
##
##--------------------------------------------------------------------------------
init() {
mkdir -p ${TRUST_TEST_DIR} ${TRUST_TEST_BENCHMARK} || get_out B
cd ${TRUST_TEST_DIR}
echo ${TRUST_TEST_DIR}
echo 'Date' > ${file_date}; echo 'Result' > ${file_rslt}
echo 'Status' > ${file_stat}; echo 'NEMOGCM' > ${file_nemo}
echo 'XIOS' > ${file_xios}; echo 'Fortran' > ${file_cmpf}
echo 'MPI' > ${file_lmpi}; echo 'NetCDF' > ${file_ncdf}
echo 'Inputs' > ${file_inpt}; echo 'Time' > ${file_time}
echo 'RAM (Phy./Virt.)' > ${file_memy}; echo 'Comments' > ${file_note}
## 'FAILED' status with 'Unknown error' by default
echo ${TRUST_FLAG_RESULT} \
>> ${file_rslt}
echo 'Unknown error' \
>> ${file_stat}
## UTC time zone for timestamping
local dat=$( date -ud "${TRUST_TEST_DATE}" +"%F %R %Z" )
echo $dat \
>> ${file_date}
}
##
##--------------------------------------------------------------------------------
get_nemo_rev() {
local dir rev_loc
local rev=0
## Loop on essential NEMO directories
for dir in ${TRUST_SVN_UP}; do
echo $dir && ${TRUST_SVN_ACTION} ${TRUST_SVN_NEMOGCM}/$dir || get_out C
rev_loc=$( svn info ${TRUST_SVN_NEMOGCM}/$dir \
| awk '/Last Changed Rev/ {print $NF}' )
## Keep last rev. nb
[ ${rev_loc} -gt $rev ] && rev=${rev_loc}
done
echo 'NEMOGCM '$rev \
>> model.log
echo "$rev" \
>> ${file_nemo}
}
##
##--------------------------------------------------------------------------------
get_soft_rel() {
local ver str
## Sourcing environment
. ${TRUST_JOB_ENV} >& /dev/null
for str in ${TRUST_COMPILE_FORTRAN} \
${TRUST_COMPILE_MPI} ${TRUST_COMPILE_NETCDF} \
${TRUST_IO_CDO} ; do
[ -z "$str" ] && continue
ver=''
## Extract version number after searching pattern in PATH env. variable
ver=$( echo $PATH | sed "s|.*\($str[0-9.]*\).*|\1|" )
## option --version would work for main Fortran compilers and CDO
if [[ $str =~ ${TRUST_COMPILE_FORTRAN}|${TRUST_IO_CDO} ]]; then
ver=$( $str --version 2>&1 | grep -m1 -oe '\<[0-9. ]*\>' \
| xargs echo $str )
fi
## Cleaning characters string to display proper soft name
ver=$( echo $ver | sed 's|[/-]| |g' )
echo $ver \
>> model.log
done
sed -n 2p model.log \
>> ${file_cmpf}
sed -n 3p model.log \
>> ${file_lmpi}
sed -n 4p model.log \
>> ${file_ncdf}
cat model.log | awk '{printf "%-20s %s %s\n", $1, $2, $3}'
env | sort > env.log
}
##
##--------------------------------------------------------------------------------
compile_xios() {
cd ${TRUST_IO_XIOS}
rev=$( svn info | awk '/Last Changed Rev/ {print $NF}' )
printf 'XIOS branch %s rev. %s\n' \
$( basename ${TRUST_IO_XIOS} ) $rev \
| tee -a ${TRUST_TEST_DIR}/model.log
echo "$rev" \
>> ${TRUST_TEST_DIR}/${file_xios}
eval "
./make_xios ${TRUST_IO_XIOS_MODE} --arch ${TRUST_MAIN_HPCC} \
--job ${TRUST_COMPILE_NPROC} \
${TRUST_MAIN_STDOUT}
"
[ ! -e ./lib/libxios.a ] && get_out D || echo 'Success'
}
##
##--------------------------------------------------------------------------------
compile_nemo() {
cd ${TRUST_SVN_NEMOGCM}/CONFIG
## Recompiling from scratch if not in debug or dev mode
if [[ ${TRUST_FLAG_DEBUG} == 'false' && ${TRUST_FLAG_DEV} == 'false' ]]; then
if [[ -d ${TRUST_CFG_NEW} ]]; then
./makenemo -n ${TRUST_CFG_NEW} clean_config \
> /dev/null <> ${file_inpt}
## List different files for web comment
[ -n "${files_list}" ] && echo 'Inputs : '${files_list}'differ
' \
>> temp_${file_note}
}
##
##--------------------------------------------------------------------------------
job_submit() {
## Copy the submitting script to testing folder
cp ${TRUST_JOB_SCRIPT} ${TRUST_TEST_DIR}
TRUST_JOB_ID=$( eval ${TRUST_JOB_SUBMIT} )
[ $? -ne 0 ] && get_out G || printf "Success (job ID %s)\n" ${TRUST_JOB_ID}
}
##
##--------------------------------------------------------------------------------
job_pending() {
local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
sleep ${time_increment}
## Append a log file while pending
while [[ $( eval ${TRUST_JOB_STATE} ) \
&& ${time_elapsed} -lt ${TRUST_JOB_TIMEOUT} ]]; do
printf "\n%s\n" ${outline// /#} \
>> computation.log
[ -n "${TRUST_JOB_INFO}" ] && eval ${TRUST_JOB_INFO} \
>> computation.log
sleep ${time_increment}
time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
done
sleep ${time_increment}
## Kill remaining job & stop the test if it's too long
if [ ${time_elapsed} -eq ${TRUST_JOB_TIMEOUT} ]; then
eval ${TRUST_JOB_KILL} &> /dev/null
get_out I
fi
}
##
##--------------------------------------------------------------------------------
job_perfs() {
if [ -n "${TRUST_JOB_TIME}" ]; then
## Interest for checking unusual time computation
local time_cpu=$( eval ${TRUST_JOB_TIME} )
printf "Time: "
echo ${time_cpu} | tee -a ${file_time}
fi
if [[ -n "${TRUST_JOB_RAM_P}" && -n "${TRUST_JOB_RAM_V}" ]]; then
## Interest for checking unusual memory usage
local memory_pmax=$( eval ${TRUST_JOB_RAM_P} )
local memory_vmax=$( eval ${TRUST_JOB_RAM_V} )
printf "Max memory usage (physical/virtual): "
echo ${memory_pmax}' / '${memory_vmax} | tee -a ${file_memy}
fi
}
##
##--------------------------------------------------------------------------------
job_state() {
if [[ ! -e time.step || $( grep 'E R R O R' ocean.output ) ]]; then
get_out H
else
echo 'Success' ## Must be reviewed
fi
}
##
##--------------------------------------------------------------------------------
diff_results() {
local file
local files_list='' mesg='Same'
###################################
## Think of copying initial test ##
###################################
## Now test is good by default ('OK')
TRUST_FLAG_RESULT='OK'
## Simple diff
for file in 'ocean.output' *.stat; do
## Stop if no minimal benchmark files (ocean.output, eventual stat files)
[ ! -e ${TRUST_TEST_BENCHMARK}/$file ] && get_out J
diff -q $file ${TRUST_TEST_BENCHMARK}/$file
## Continue even if it differs
if [ $? -ne 0 ]; then mesg='Different'; files_list+=$file' '; fi
done
[ $mesg == 'Same' ] && echo $mesg
## List different files for web comment
[ -n "${files_list}" ] && echo 'Results : '${files_list}'differ
' \
>> temp_${file_note}
}
##
##--------------------------------------------------------------------------------
diff_restarts() {
local dif filebase filebases ndomain out
local files_list='' dif_sum='0'
## Find all restart files to rebuild
if [ $( find -regex ".*_restart.*[0-9]\.nc" -print -quit ) ]; then
###############################################################
## Think to set the configuration name in the 'namelist_cfg' ##
###############################################################
filebases=$( find -regextype sed -regex ".*${TRUST_CFG_NEW}.*_[0-9]\{4\}\.nc" \
| sed 's/\(.*\)_.*/\1/' | sort -u )
for filebase in $filebases; do
ndomain=$( find -regex ".*${filebase}_[0-9]*.nc" \
| wc -l | awk '{print $1}' )
#####################################################
## Handle 2 possibilities of 'rebuild_nemo' origin ##
#####################################################
${TRUST_SVN_NEMOGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo \
-t ${TRUST_COMPILE_NPROC} $filebase $ndomain \
> /dev/null
## Possibility of remaining decomposed restarts (even after rebuild)
if [ $? -eq 0 ]; then
rm -f ${filebase}_[0-9]*.nc \
> /dev/null
else
get_out K
fi
## Stop if no benchmark files (restart file)
if [ -e ${TRUST_TEST_BENCHMARK}/$filebase.nc ]; then
cdo diffn $filebase.nc ${TRUST_TEST_BENCHMARK}/$filebase.nc \
> cdo_diff.out 2> /dev/null
## Identical if cdo_diff.out is zero size
[ ! -s cdo_diff.out ] && continue
dif=$( grep -om1 '[0-9]* of [0-9]* records' cdo_diff.out )
if [ -n "$dif" ]; then
files_list+=$filebase' ' && echo $filebase'.nc: '$dif
let dif_sum+=$( echo $dif | sed '|^\([0-9]*\).*|\1|' )
fi
fi
done
## List modified restart(s) for web comment with sum of differences
if [ ${dif_sum} -ne 0 ]; then
echo 'Restarts: '${files_list}${dif_sum}' record(s) differ
' \
>> temp_${file_note}
else
echo 'Same'
fi
fi
[ $TRUST_FLAG_RESULT == 'FAILED' ] && get_out L
}
##
##--------------------------------------------------------------------------------
comments() {
local opat
local line='' state=$1
if [ -e ocean.output ]; then
## 'W A R N I N G' pattern by default
opat="-A2 \"^ $state\""
[ "$state" == 'E R R O R' ] && opat="-A4 \"$state\""
## Select first occurence for web comment
line=$( eval grep -m1 $opat ocean.output | tr -d '\n' )
fi
[ -n "$line" ] && ( echo $line; printf "$line
" \
>> temp_${file_note} )
}
##
##--------------------------------------------------------------------------------
log_make() {
## Format comments for web
if [ -e temp_${file_note} ]; then
cat temp_${file_note} | tr -d '\n' | sed 's/
$//' \
>> ${file_note}
fi
## Construct txt file with all messenger files
paste -d ';' mesg_*.txt | tee ${TRUST_TEST_SUMMARY}
}
##
##--------------------------------------------------------------------------------
prod_publish() {
local cmd
local rev=$( awk '/NEMOGCM/ {print $NF}' model.log )
## Production mode (-p|--prod)
if [ ${TRUST_FLAG_PROD} == 'true' ]; then
## Create or append trusting logfile
if [ -f ${TRUST_TEST_LOG} ]; then cmd='tail -1'; else cmd='cat'; fi
$cmd ${TRUST_TEST_SUMMARY} \
>> ${TRUST_TEST_LOG}
## Send mail only when FAILED
if [[ -n "${TRUST_TEST_MAILING}" \
&& ${TRUST_FLAG_RESULT} == 'FAILED' ]]; then
## Content
cat < trusting.mail
Dear all,
The following trusting sequence has not completed successfully:
Testing configuration ${TRUST_CFG_NEW} based on ${TRUST_CFG_REF}.
User installation ${TRUST_MAIN_USER}
HPC environment ${TRUST_MAIN_HPCC}
Here is the running environment summary:
`cat model.log`
For more details, look into the testing folder at:
${TRUST_TEST_DIR}
An archive is also available to share the questionable configuration:
${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}
END_MAIL
## Send with detailed subject
mail -s "[NEMO Trusting][${TRUST_CFG_REF}][${TRUST_SVN_BRANCH}] \
${TRUST_FLAG_RESULT} ${TRUST_FLAG_ERROR}" \
${TRUST_TEST_MAILING} \
< trusting.mail
fi
fi
}
##
##--------------------------------------------------------------------------------
get_out() {
local time_step=0
TRUST_FLAG_ERROR=$1
printf "\n\nEnd of test\n"
## In case of compilation error
cd ${TRUST_TEST_DIR}
if [ ${TRUST_FLAG_RESULT} == 'FAILED' ]; then
echo 'Failure'
## Error identification
case ${TRUST_FLAG_ERROR} in
## Initialisation
'A') TRUST_FLAG_ERROR='Missing environment variable' ;;
'B') TRUST_FLAG_ERROR='Unable to create testing directory';;
'C') TRUST_FLAG_ERROR='SVN issue on local working copy' ;;
## Compilation
'D') TRUST_FLAG_ERROR='XIOS compilation failed' ;;
'E') TRUST_FLAG_ERROR='NEMO compilation failed' ;;
## Submission
'F') TRUST_FLAG_ERROR='Missing input files' ;;
'G') TRUST_FLAG_ERROR='Job submission error' ;;
## Computing
'H') TRUST_FLAG_ERROR='Crashed at time step '
comments 'E R R O R'
[ -e time.step ] && time_step=$( cat time.step )
TRUST_FLAG_ERROR+=${time_step:=0} ;;
'I') TRUST_FLAG_ERROR='Exceeded time limit of '
TRUST_FLAG_ERROR+=$(( ${TRUST_JOB_TIMEOUT}/3600 ))'h' ;;
## Results
'J') TRUST_FLAG_ERROR='Missing previous outputs' ;;
'K') TRUST_FLAG_ERROR='Restart rebuild error' ;;
'L') TRUST_FLAG_ERROR='New outputs differ' ;;
## Other
'*') TRUST_FLAG_ERROR='Unknown error' ;;
esac
else
echo 'Success' && TRUST_FLAG_ERROR='Code is reliable'
fi
## Eventual comments from ocean.output
[[ ! ${TRUST_FLAG_ERROR} =~ 'Crashed at time step' ]] && comments 'W A R N I N G'
## Last messenger files
sed -i "2 s/.*/$TRUST_FLAG_RESULT/" ${file_rslt}
sed -i "2 s/.*/$TRUST_FLAG_ERROR/" ${file_stat}
## Save tested configuration if trusting failed in production mode ('-p')
if [[ ${TRUST_FLAG_RESULT} == 'FAILED' && ${TRUST_FLAG_PROD} == 'true' ]]; then
echo 'Creating archive '${TRUST_TEST_BACKUP}' under '${TRUST_TEST_BENCHMARK}
tar -czf ${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP} * \
-C ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/MY_SRC . \
-C ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW} \
cpp_${TRUST_CFG_NEW}.fcm
fi
## Logfile construct & eventual sending of notification email
printf "\nTrusting digest:\n----------------\n"
log_make
prod_publish
exit 0
}