#!/bin/bash
comments() {
state=$1; LAST_TIME_STEP=0
if [ "$state" == 'E R R O R' ]; then
[ -e time.step ] && LAST_TIME_STEP=$( cat time.step | tr -d [:space:] )
export ${LAST_TIME_STEP}
fi
[ -e ocean.output ] && line=$( grep -m1 -A5 "$state" ocean.output | tr -d '\n' )
if [[ -e ${REFE_DIR}/model.log && $( diff -q model.log ${REFE_DIR}/model.log ) ]]; then
line=$( diff model.log ${REFE_DIR}/model.log | grep '>' )
fi
printf "Comments\n$line\n" | tee mesg_11_comments_${CFG_USER}_${CFG_ARCH}.txt
}
diff_inputs() {
mesg='Same'
for file in inputs_list.txt $( ls namelist_* ) $( ls *.xml ); do
diff -q $file ${REFE_DIR}/$file > /dev/null
[ $? -ne 0 ] && mesg='Different'
done
printf "Input files\n$mesg\n" | tee mesg_09_inputs_${CFG_USER}_${CFG_ARCH}.txt
}
diff_results() {
for file in ocean.output $( ls *.stat ); do
[ ! -e ${REFE_DIR}/$file ] && export ST='FAILED' && get_out 7
diff -q $file ${REFE_DIR}/$file
[ $? -ne 0 ] && export ST='FAILED'
done
}
diff_restart() {
[ ! -e ${REFE_DIR}/time.step ] && export ST='FAILED' && get_out 9
export LAST_TIME_STEP=$( cat ${REFE_DIR}/time.step | tr -d [:space:] )
echo 'Last time step of standard run: '${LAST_TIME_STEP}
# if [ $( find -name "*${LAST_TIME_STEP}_restart*.nc" -print -quit ) ]; then
if [ $( find -regex ".*_0+${LAST_TIME_STEP}_restart.*\.nc" -print -quit ) ]; then
# base_name=$( find -name "*${LAST_TIME_STEP}_restart*.nc" -print -quit | awk -F/ '{print $NF}' \
# | sed "s/^\(.*\)$LAST_TIME_STEP\_restart.*/\1$LAST_TIME_STEP\_/" )
base_name=$( find -regex ".*_0+${LAST_TIME_STEP}_restart.*\.nc" -print -quit | awk -F/ '{print $NF}' \
| sed "s/^\(.*\)$LAST_TIME_STEP\_restart.*/\1$LAST_TIME_STEP\_/" )
for comp in restart restart_ice restart_trc; do
file=${base_name}${comp} && printf "$file.nc: "
nb_dom=$( find -name "${file}_[0-9]*.nc" | wc -l | awk '{print $1}' )
if [ ${nb_dom} -gt 1 ]; then
${NEMO_TRBD}/rebuild_nemo -t $NPROC $file ${nb_dom} > /dev/null
[ $? -eq 0 ] && rm -f ${base_name}${comp}_[0-9]*.nc > /dev/null
elif [ ${nb_dom} -eq 0 ]; then
export ST='FAILED' && get_out 10
fi
if [ -e ${REFE_DIR}/$file.nc ]; then
# UNIX `cmp` not suitable (filename & timestamp in .nc file)
nc_diff=$( $CDO diffn $file.nc ${REFE_DIR}/$file.nc 2> /dev/null \
| awk '/records/ {print $1}' )
if [ ${nc_diff} -ne 0 ]; then
export ST='FAILED'
printf "$CDO ${nc_diff}\n"
else
printf "identical\n"
fi
else
export ST='FAILED' && get_out 9
fi
done
else
export ST='FAILED'
fi
}
get_cpu_time() {
real_cpu_time=$( eval ${JOB_TIME} )
printf "Real CPU time\n${real_cpu_time}\n" | tee mesg_10_cputime_${CFG_USER}_${CFG_ARCH}.txt
}
get_inputs() {
if [ ! -z "${FORC_TAR}" ]; then
cmd_iol="tar -tvf ${NEMO_FORC}/${FORC_TAR}"; cmd_iof="tar -vxf ${NEMO_FORC}/${FORC_TAR}"
else
cmd_iol="ls ${NEMO_FORC}/*" ; cmd_iof="\cp ${NEMO_FORC}/* ."
fi
${cmd_iol} > inputs_list.txt
${cmd_iof} > /dev/null
}
get_soft_rel() {
for rel in $CDO $COMPILER $MPI $NETCDF; do
arch_rel=$( echo $LOADEDMODULES | sed "s#.*$rel/\([^:]*\).*#\1#" )
[ -z "${arch_rel}" ] && arch_rel=$( echo $PATH | sed "s#.*$rel/\([^/]*\).*#\1#" )
[ "$rel" == "$COMPILER" ] && arch_rel=$( $rel --version | grep -m1 -o ' [0-9.]* ' )
echo $rel ${arch_rel} >> model.log
done
sed -n 4p model.log | sed 'i\Fortran compiler' > mesg_06_compiler_${CFG_USER}_${CFG_ARCH}.txt
sed -n 5p model.log | sed 'i\MPI libs' > mesg_07_mpi_${CFG_USER}_${CFG_ARCH}.txt
sed -n 6p model.log | sed 'i\NetCDF libs' > mesg_08_netcdf_${CFG_USER}_${CFG_ARCH}.txt
}
get_nemo_rev() {
svn_opt='status'
# If -v|--version option has been set, modify svn command
if [ $( echo ${NEMO_VERS} | grep "HEAD\|up\|update" ) ]; then
svn_cmd='svn update -r HEAD'
elif [ $( echo ${NEMO_VERS} | tr -d '[:alpha:][:punct:]' ) ]; then
svn_cmd='svn update -r '$( echo ${NEMO_VERS} | tr -d '[:alpha:][:punct:]' )
else
svn_cmd='svn '${svn_opt}
fi
rev=0
for dir in ${NEMO_ARCH} ${NEMO_CONF} ${NEMO_ENGI} \
${NEMO_EIOI} ${NEMO_EFCM} \
${NEMO_TCMP} ${NEMO_TRBD} \
${DIR_XIOS} ; do
# For time being, just get revision number from XIOS
if [ "$dir" == "${DIR_XIOS}" ]; then
rev_loc=$( svn info $dir | awk '(NR == 5) {print $NF}' )
echo 'XIOS '${rev_loc} >> model.log
printf "XIOS rev.\n" > mesg_05_xios_${CFG_USER}_${CFG_ARCH}.txt
printf "${rev_loc}" \
>> mesg_05_xios_${CFG_USER}_${CFG_ARCH}.txt
continue
fi
echo ${svn_cmd} $dir': '$( ${svn_cmd} $dir )
rev_loc=$( svn info $dir | awk '(NR == 5) {print $NF}' )
(( ${rev_loc} >= $rev )) && rev=${rev_loc}
done
echo 'NEMOGCM '$rev >> model.log
printf "NEMOGCM rev.\n" > mesg_04_nemogcm_${CFG_USER}_${CFG_ARCH}.txt
printf "$rev" \
>> mesg_04_nemogcm_${CFG_USER}_${CFG_ARCH}.txt
}
mesg_make() {
paste -d ';' mesg_*_${CFG_USER}_${CFG_ARCH}.txt > trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt
cat trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt
}
mesg_publish() {
if [ $PUBLISH -eq 1 ]; then
if [ -f ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt ]; then cmd='tail -1'; else cmd='cat'; fi
$cmd trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt >> ${REFE_DIR}/trusting_${CFG_USER}_${CFG_ARCH}.txt
# Send mail only when FAILED
if [[ ! -z $EMAIL && "$ST" == 'FAILED' ]]; then
cat << END_MAIL > trusting.mail
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Dear $USER,
The trusting sequence for ${REFE_CONF} has failed.
Directory: ${TEST_DIR}
Archive: ${REFE_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.tar.gz
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
END_MAIL
if [ -e ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt ]; then
cat ${TEST_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.txt >> trusting.mail
fi
mail -s "[trusting ${REFE_CONF}] $ST $ERR" $EMAIL < trusting.mail
fi
fi
}
print_step() { printf "Step.....\n$1\n"; }
get_out() {
ERR=$1
printf "Status\n$ST\n" > mesg_02_status_${CFG_USER}_${CFG_ARCH}.txt
if [ "$ST" == 'FAILED' ]; then
cd ${TEST_DIR}
if [ ! -e mesg_09_inputs_${CFG_USER}_${CFG_ARCH}.txt ]; then
printf "Input files\n\n" > mesg_09_inputs_${CFG_USER}_${CFG_ARCH}.txt
fi
if [ ! -e mesg_10_cputime_${CFG_USER}_${CFG_ARCH}.txt ]; then
printf "Real CPU time\n\n" > mesg_10_cputime_${CFG_USER}_${CFG_ARCH}.txt
fi
if [ ! -e mesg_11_comments_${CFG_USER}_${CFG_ARCH}.txt ]; then
printf "Comments\n\n" > mesg_11_comments_${CFG_USER}_${CFG_ARCH}.txt
fi
# Error identification
case $ERR in
# Compilation
1) ERR='XIOS compilation failed' ;; 2) ERR='NEMO compilation failed' ;;
# Submission
3) ERR='Missing input files' ;; 4) ERR='Job submission error' ;;
# Running
5) ERR='nemo.exe crashed at '${LAST_TIME_ERR};; 6) ERR='Exceeded time limit '${TIME_LIMI}'h';;
# Results
7) ERR='Missing previous outputs ' ;; 8) ERR='New outputs differ/missing' ;;
9) ERR='Missing previous restarts' ;; 10) ERR='New restarts differ/missing' ;;
# Undefined
*) ERR='Unknown error' ;;
esac
export ERR
fi
printf "Result.....\n$ERR\n" > mesg_03_result_${CFG_USER}_${CFG_ARCH}.txt
# Save tested configuration if trusting failed
[ "$ST" == 'FAILED' ] && tar -czf ${REFE_DIR}/trusting_${DATESTR}_${CFG_USER}_${CFG_ARCH}.tar.gz *
mesg_make
mesg_publish
exit 1
}
job_pending() {
time_elapsed=0; time_increment=30
sleep ${time_increment}
while [[ $( ${JOB_LIST} | grep ${JOB_ID} ) && ${time_elapsed} -lt ${TIME_LIMI} ]]; do
printf "\n####################################################\n" >> computation.log
${JOB_INFO} ${JOB_ID} >> computation.log
sleep ${time_increment}
time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
done
sleep ${time_increment}
if [ ${time_elapsed} -eq ${TIME_LIMI} ]; then
${JOB_DELE} ${JOB_ID} &> /dev/null
TIME_LIMIT=$(( ${TIME_LIMI} / 3600 ))
export ${TIME_LIMIT}
get_out $ST ERR=6
fi
}