New URL for NEMO forge! http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.

trusting_func.sh in branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc – NEMO

Context Navigation

source: branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc/trusting_func.sh @ 8818

Last change on this file since 8818 was 8818, checked in by nicolasmartin, 6 years ago
Continuation of global refactoring of the tool, set cfg files mime property and modify trusting.env trying to handle default settings
Property eol-style set to `native` Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Rev URL`
File size: 13.4 KB

Rev	Line
[5268]	1	#!/bin/bash
	2
[5383]	3
[5644]	4	## Messenger filenames
[8780]	5	file_date=mesg_01_date.txt ; file_rslt=mesg_02_result.txt
[8797]	6	file_stat=mesg_03_status.txt; file_nemo=mesg_04_nemo.txt
[8780]	7	file_xios=mesg_05_xios.txt ; file_cmpf=mesg_06_compiler.txt
	8	file_lmpi=mesg_07_mpi.txt ; file_ncdf=mesg_08_netcdf.txt
	9	file_inpt=mesg_09_inputs.txt; file_time=mesg_10_time.txt
	10	file_memy=mesg_11_memory.txt; file_note=mesg_12_comments.txt
[5453]	11
[5644]	12
[5788]	13	## Functions in order of use
	14	print_step() {
	15	local char_nb=$( echo "$1" \| wc -c )
	16	local outline=$( printf "%${char_nb}s" )
[5644]	17
[5788]	18	printf "\nStep.....\n%s\n%s\n" "$1" ${outline// /-}
	19	}
[5681]	20
[5644]	21	init_files() {
[8808]	22	echo 'Date' > ${file_date}
	23	echo 'Result' > ${file_rslt}
	24	echo 'Status' > ${file_stat}
	25	echo 'NEMOGCM rev.' > ${file_nemo}
	26	echo 'XIOS rev.' > ${file_xios}
	27	echo 'Fortran compiler' > ${file_cmpf}
	28	echo 'MPI libs' > ${file_lmpi}
	29	echo 'NetCDF libs' > ${file_ncdf}
	30	echo 'Input files' > ${file_inpt}
	31	echo 'Elapsed time' > ${file_time}
	32	echo 'Memory (Phy./Virt.)' > ${file_memy}
	33	echo 'Comments' > ${file_note}
[5664]	34
[5690]	35	## 'Failed' status with 'Unknown error' by default
[8808]	36	echo ${TRUST_FLAG_RESULT} \
[8780]	37	>> ${file_rslt}
[5788]	38	echo 'Unknown error' \
[8780]	39	>> ${file_stat}
[5644]	40	}
	41
	42	get_date() {
[5690]	43	## UTC time zone for timestamping
[8808]	44	local dat=$( date -ud "${TRUST_TEST_DATE}" +"%F %R %Z" )
[5644]	45
[5788]	46	echo $dat \
[8780]	47	>> ${file_date}
[5644]	48	}
	49
	50	get_nemo_rev() {
[5695]	51	local dir rev_loc
[5799]	52	local rev=0
[5644]	53
[5788]	54	## Loop on essential NEMO directories
[8818]	55	for dir in ${TRUST_SVN_CO} ${TRUST_IO_XIOS}; do
[5455]	56
[5690]	57	## For time being, just get revision from XIOS with no action on directory
[8818]	58	if [ $dir == ${TRUST_IO_XIOS} ]; then
[5799]	59	rev_loc=$( svn info $dir \| awk '/Last Changed Rev/ {print $NF}' )
[5689]	60	echo 'XIOS '${rev_loc} \
	61	>> model.log
[8818]	62	echo "<a href=\"https://forge.ipsl.jussieu.fr/ioserver/changeset/${rev_loc}\" target=\"_blank\">${rev_loc}</a>"
[8780]	63	>> ${file_xios}
[5644]	64	continue
	65	fi
	66
[8808]	67	echo $dir && ${TRUST_SVN_ACTION} ${TRUST_DIR_NEMOGCM}/$dir
	68	rev_loc=$( svn info ${TRUST_DIR_NEMOGCM}/$dir \
	69	\| awk '/Last Changed Rev/ {print $NF}' )
[5690]	70
	71	## Keep last rev. nb
[5689]	72	[ ${rev_loc} -gt $rev ] && rev=${rev_loc}
[5644]	73	done
	74
[5689]	75	echo 'NEMOGCM '$rev \
	76	>> model.log
[8818]	77	echo "<a href=\"https://forge.ipsl.jussieu.fr/nemo/changeset/$rev\" target=\"_blank\">$rev</a>"
[8780]	78	>> ${file_nemo}
[5438]	79	}
	80
[5644]	81	get_soft_rel() {
[8808]	82	local ver str
[5509]	83
[5934]	84	## Sourcing environment
[8808]	85	. ${TRUST_JOB_ENV}
[5788]	86
[8808]	87	for str in ${TRUST_COMPILE_FORTRAN} \
	88	${TRUST_COMPILE_MPI} ${TRUST_COMPILE_NETCDF} \
[8818]	89	${TRUST_IO_CDO} ; do
[5799]	90	[ -z "$str" ] && continue
[8808]	91	ver=''
[5664]	92
[8808]	93	## Extract version number after searching pattern in PATH env. variable
	94	ver=$( echo $PATH \| sed "s\|.\($str[0-9.]\).*\|\1\|" )
[5664]	95
[8808]	96	## option --version would work for main Fortran compilers and CDO
[8818]	97	if [[ $str =~ ${TRUST_COMPILE_FORTRAN}\|${TRUST_IO_CDO} ]]; then
[8808]	98	ver=$( $str --version 2>&1 \| grep -m1 -oe '\<[0-9. ]*\>' \
	99	\| xargs echo $str )
	100	fi
[5690]	101
[5672]	102	## Cleaning characters string to display proper soft name
[8808]	103	#str=$( echo $str \| sed 's\|[/-]\|\|g' )
	104	ver=$( echo $ver \| sed 's\|[/-]\| \|g' )
[5690]	105
[8808]	106	echo $ver \
[5689]	107	>> model.log
[5644]	108	done
	109
[5799]	110	sed -n 3p model.log \
[8780]	111	>> ${file_cmpf}
[5689]	112	sed -n 4p model.log \
[8780]	113	>> ${file_lmpi}
[5689]	114	sed -n 5p model.log \
[8780]	115	>> ${file_ncdf}
[5644]	116	}
	117
	118	get_inputs() {
[8818]	119	# List archive content & extract it by default
[8808]	120	local inputs_list=$( eval "
[8818]	121	for archive in ${TRUST_IO_FORC_TAR}; do
	122	tar -tvf ${TRUST_IO_FORC_PATH}/\$archive >> inputs_list.txt;
	123	done
	124	" )
[8808]	125	local inputs_get=$( eval "
[8818]	126	for archive in ${TRUST_IO_FORC_TAR}; do
	127	tar -vxf ${TRUST_IO_FORC_PATH}/\$archive > /dev/null;
	128	done
	129	" )
	130
[8808]	131	## List & copy files without archive
[8818]	132	if [ -z "${TRUST_IO_FORC_TAR}" ]; then
	133	inputs_list=" ls -lh ${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
	134	inputs_get=" \cp ${TRUST_IO_FORC_PATH}/* . "
[8797]	135	fi
[5690]	136
[8808]	137	${inputs_list}; ${inputs_get}
[8797]	138
[8818]	139	# for entry in ${TRUST_IO_FORC_PATH}; do
	140	#
	141	# If path to file (assuming it is an archive)
	142	# if [ -e $entry ]; then
	143	# tar -tvf $entry >> inputs_list.txt;
	144	# tar -vxf $entry > /dev/null;
	145	# If path to directory
	146	# elif [ -d $entry ]; then
	147	# inputs_list=" ls -lh ${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
	148	# inputs_get=" \cp ${TRUST_IO_FORC_PATH}/* . "
	149	# fi
	150
	151	# done
	152
[8808]	153	if [ $( find -name '*.gz' -print -quit ) ]; then
[8797]	154	find . -name '*.gz' -exec gzip -d {} \;
	155	fi
[5644]	156	}
	157
[5402]	158	diff_inputs() {
[5695]	159	local dif file
[5664]	160	local files_list='' mesg='Same'
[5644]	161
[8818]	162	###################################
	163	## Think of copying initial test ##
	164	###################################
	165
[5690]	166	## Simple diff
[8818]	167	for file in cpp_* 'inputs_list.txt' namelist_ *.xml; do
[5644]	168	dif=''
[5690]	169
	170	## Continue even if input file is not in here (see after)
[8818]	171	if [ -e ${TRUST_TEST_BENCHMARK}/$file ]; then
	172	dif=$( diff -q $file ${TRUST_TEST_BENCHMARK}/$file )
[8808]	173	else
	174	dif=0
	175	fi
[5690]	176
	177	## Pass over useless file omission in benckmark directory
[8808]	178	if [[ -n "$dif" && "$dif" != '0' ]]; then
	179	mesg='Different'
	180	echo $dif
	181	files_list+=$file' '
	182	fi
	183
[5402]	184	done
	185
[5689]	186	[ $mesg == 'Same' ] && echo $mesg
[5788]	187	echo $mesg \
[8780]	188	>> ${file_inpt}
[5690]	189
	190	## List different files for web comment
[5672]	191	[ -n "${files_list}" ] && echo 'Inputs : '${files_list}'differ<br>' \
[8780]	192	>> temp_${file_note}
[5402]	193	}
	194
[5644]	195	job_pending() {
[5672]	196	local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
[5644]	197
	198	sleep ${time_increment}
	199
[5690]	200	## Append a log file while pending
[8808]	201	while [[ $( eval ${TRUST_JOB_STATE} ) \
	202	&& ${time_elapsed} -lt ${TRUST_JOB_TIMEOUT} ]]; do
	203	printf "\n%s\n" ${outline// /#} \
[5689]	204	>> computation.log
[8808]	205	[ -n "${TRUST_JOB_INFO}" ] && eval ${TRUST_JOB_INFO} \
[5689]	206	>> computation.log
[5644]	207	sleep ${time_increment}
	208	time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
	209	done
	210
	211	sleep ${time_increment}
	212
[5690]	213	## Kill remaining job & stop the test if it's too long
[8808]	214	if [ ${time_elapsed} -eq ${TRUST_JOB_TIMEOUT} ]; then
	215	eval ${TRUST_JOB_KILL} &> /dev/null
	216	get_out 6
	217	fi
	218
[5644]	219	}
	220
[5509]	221	diff_results() {
[5695]	222	local file
[5696]	223	local files_list='' mesg='Same'
[5664]	224
[8818]	225	###################################
	226	## Think of copying initial test ##
	227	###################################
	228
[5690]	229	## Simple diff
[5689]	230	for file in 'ocean.output' *.stat; do
[8808]	231	## Stop if no minimal benchmark files (ocean.output, eventual stat files)
[8818]	232	[ ! -e ${TRUST_TEST_BENCHMARK}/$file ] && get_out 7
[5690]	233
[8818]	234	diff -q $file ${TRUST_TEST_BENCHMARK}/$file
[5690]	235
	236	## Continue even if it differs
[8818]	237	if [ $? -ne 0 ]; then mesg='Different'; files_list+=$file' '; fi
[8808]	238
[5509]	239	done
[5664]	240
[5696]	241	[ $mesg == 'Same' ] && echo $mesg
	242
[5690]	243	## List different files for web comment
[5672]	244	[ -n "${files_list}" ] && echo 'Results : '${files_list}'differ<br>' \
[8780]	245	>> temp_${file_note}
[5509]	246	}
	247
[8808]	248	diff_restarts() {
[8797]	249	local dif filebase filebases ndomain out
[8808]	250	local files_list='' dif_sum='0' #bcmk='false'
[5644]	251
[5672]	252	## Find all restart files to rebuild
	253	if [ $( find -regex "._restart.[0-9]\.nc" -print -quit ) ]; then
[8818]	254	###############################################################
	255	## Think to set the configuration name in the 'namelist_cfg' ##
	256	###############################################################
[8808]	257	filebases=$( find -regextype sed -regex ".${TRUST_CFG_NEW}._[0-9]\{4\}\.nc" \
	258	\| sed 's/\(.\)_./\1/' \| sort -u )
[5383]	259
[8808]	260	for filebase in $filebases; do
[5788]	261
[8808]	262	ndomain=$( find -regex ".${filebase}_[0-9].nc" \
	263	\| wc -l \| awk '{print $1}' )
[5424]	264
[8818]	265	[ $ndomain -eq 0 ] && get_out X
[5934]	266
[8818]	267	#####################################################
	268	## Handle 2 possibilities of 'rebuild_nemo' origin ##
	269	#####################################################
	270
[8808]	271	${TRUST_DIR_NEMOGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo \
	272	-t ${TRUST_COMPILE_NPROC} $filebase $ndomain \
[8797]	273	> /dev/null
[5934]	274
[8797]	275	## Possibility of remaining decomposed restarts (even after rebuild)
[8818]	276	if [ $? -eq 0 ]; then
	277	rm -f ${filebase}_[0-9]*.nc \
	278	> /dev/null
	279	else
	280	get_out X
	281	fi
[5424]	282
[8797]	283	## Stop if no benchmark files (restart file)
[8818]	284	if [ -e ${TRUST_TEST_BENCHMARK}/$filebase.nc ]; then
[5690]	285
[8808]	286	#bcmk='true'
[8818]	287	cdo diffn $filebase.nc ${TRUST_TEST_BENCHMARK}/$filebase.nc \
[8808]	288	> cdo_diff.out 2> /dev/null
[5485]	289
[8808]	290	## Identical if cdo_diff.out is zero size
	291	[ ! -s cdo_diff.out ] && continue
[5672]	292
[8808]	293	dif=$( grep -om1 '[0-9]* of [0-9]* records' cdo_diff.out )
	294
[8797]	295	if [ -n "$dif" ]; then
[8808]	296	files_list+=$filebase' ' && echo $filebase'.nc: '$dif
	297	let dif_sum+=$( echo $dif \| sed '\|^\([0-9]\).\|\1\|' )
[5689]	298	fi
[5485]	299
[8808]	300	fi
	301
[5424]	302	done
[5383]	303
[8808]	304	## List modified restart(s) for web comment with sum of differences
	305	if [ ${dif_sum} -ne 0 ]; then
[5788]	306	echo 'Restarts: '${files_list}${dif_sum}' record(s) differ<br>' \
[8780]	307	>> temp_${file_note}
[5696]	308	else
	309	echo 'Same'
[5689]	310	fi
[5455]	311
[5689]	312	fi
[5681]	313
[5383]	314	}
	315
[5788]	316	get_time() {
[8808]	317	[ -z "${TRUST_JOB_TIME}" ] && return
[5799]	318
[5788]	319	## Interest for checking unusual time computation
[8808]	320	local time_cpu=$( eval ${TRUST_JOB_TIME} )
[5681]	321
[5788]	322	printf "Elapsed time: "
[8780]	323	echo ${time_cpu} \| tee -a ${file_time}
[5788]	324	}
[5472]	325
[5788]	326	get_memy() {
[8808]	327	[[ -z "${TRUST_JOB_RAM_P}" && -z "${TRUST_JOB_RAM_V}" ]] && return
[5799]	328
[5788]	329	## Interest for checking unusual memory usage
[8808]	330	local memory_pmax=$( eval ${TRUST_JOB_RAM_P} )
	331	local memory_vmax=$( eval ${TRUST_JOB_RAM_V} )
[5788]	332
	333	printf "Memory max usage (physical/virtual): "
[8780]	334	echo ${memory_pmax}' / '${memory_vmax} \| tee -a ${file_memy}
[5472]	335	}
	336
[5644]	337	comments() {
[5695]	338	local opat
	339	local line='' state=$1
[5438]	340
[5695]	341	if [ -e ocean.output ]; then
[5788]	342	## 'W A R N I N G' pattern by default
[5696]	343	opat="-A2 \"^ $state\""
	344	[ "$state" == 'E R R O R' ] && opat="-A4 \"$state\""
[5438]	345
[5695]	346	## Select first occurence for web comment
[5696]	347	line=$( eval grep -m1 $opat ocean.output \| tr -d '\n' )
[5695]	348	fi
[5690]	349
[5788]	350	[ -n "$line" ] && ( echo $line; printf "$line<br>" \
[8780]	351	>> temp_${file_note} )
[5383]	352	}
	353
[5788]	354	log_make() {
[5669]	355	## Format comments for web
[8808]	356	if [ -e temp_${file_note} ]; then
	357	cat temp_${file_note} \| tr -d '\n' \| sed 's/<br>$//' \
	358	>> ${file_note}
	359	fi
[5669]	360
[5788]	361	## Construct txt file with all messenger files
[8808]	362	paste -d ';' mesg_*.txt \| tee ${TRUST_TEST_SUMMARY}
[5268]	363	}
	364
[5788]	365	prod_publish() {
[5644]	366	local cmd
[5788]	367	local rev=$( awk '/NEMOGCM/ {print $NF}' model.log )
[5268]	368
[5788]	369	## Production mode (-p\|--prod)
[8808]	370	if [ ${TRUST_FLAG_PROD} -eq 1 ]; then
[5268]	371
[5788]	372	## Create or append trusting logfile
[8808]	373	if [ -f ${TRUST_TEST_LOG} ]; then cmd='tail -1'; else cmd='cat'; fi
[5424]	374
[8808]	375	$cmd ${TRUST_TEST_SUMMARY} \
	376	>> ${TRUST_TEST_LOG}
[5644]	377
[5690]	378	## Send mail only when FAILED
[8808]	379	if [[ ! -z "${TRUST_TEST_MAILING}" \
	380	&& ${TRUST_FLAG_RESULT} == 'FAILED' ]]; then
[5690]	381
	382	## Content
[5788]	383	cat <<END_MAIL \
[5681]	384	> trusting.mail
[5788]	385	Dear all,
[5472]	386
[5268]	387
[8808]	388	The following trusting sequence has not completed successfully:
[5268]	389
[8808]	390	Testing configuration ${TRUST_CFG_NEW} based on ${TRUST_CFG_REF}.
	391	User installation ${TRUST_MAIN_USER}
	392	HPC environment ${TRUST_MAIN_HPCC}
	393
	394	Here is the running environment summary:
[5788]	395	`cat model.log`
[5268]	396
[5929]	397	For more details, look into the testing folder at:
[8818]	398	${TRUST_TEST_DIR}
[8808]	399	An archive is also available to share the questionable configuration:
[8818]	400	${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}
[5788]	401
[5268]	402	END_MAIL
[5690]	403
	404	## Send with detailed subject
[8808]	405	mail -s "[NEMO Trusting][${TRUST_CFG_REF}][${TRUST_SVN_BRANCH}] \
[8818]	406	${TRUST_FLAG_RESULT} ${TRUST_FLAG_ERROR}" \
	407	${TRUST_TEST_MAILING} \
[5672]	408	< trusting.mail
[5644]	409	fi
[5521]	410
[5644]	411	fi
[5268]	412	}
	413
[5383]	414	get_out() {
[5695]	415	local time_step=0
	416
[8808]	417	TRUST_FLAG_ERROR=$1
[5521]	418
[5788]	419	printf "\n\nEnd of test\n"
[5696]	420
	421	## In case of compilation error
[8818]	422	cd ${TRUST_TEST_DIR}
[5696]	423
[8808]	424	if [ ${TRUST_FLAG_RESULT} == 'FAILED' ]; then
[5695]	425	echo 'Failure'
[5521]	426
[5629]	427	## Error identification
[8808]	428	case ${TRUST_FLAG_ERROR} in
[5695]	429	## Compilation
[8808]	430	'1') TRUST_FLAG_ERROR='XIOS compilation failed' ;;
	431	'2') TRUST_FLAG_ERROR='NEMO compilation failed' ;;
[5629]	432	## Submission
[8808]	433	'3') TRUST_FLAG_ERROR='Missing input files' ;;
	434	'4') TRUST_FLAG_ERROR='Job submission error' ;;
	435	## Computing
	436	'5') TRUST_FLAG_ERROR='Crashed at time step' ;;
	437	'6') TRUST_FLAG_ERROR='Exceeded time limit' ;;
[5629]	438	## Results
[8808]	439	'7') TRUST_FLAG_ERROR='Missing previous outputs';;
	440	'8') TRUST_FLAG_ERROR='New outputs differ' ;;
[5799]	441	## Other
[8808]	442	'*') TRUST_FLAG_ERROR='Unknown error' ;;
[5509]	443	esac
	444
[5695]	445	else
[8808]	446	echo 'Success' && TRUST_FLAG_ERROR='Code is reliable'
[5424]	447	fi
[5438]	448
[5690]	449	## Eventual comments from ocean.output
[8808]	450	if [ "${TRUST_FLAG_ERROR}" == 'Crashed at time step' ]; then
[5695]	451	comments 'E R R O R'
[8808]	452	[ -e time.step ] && time_step=$( cat time.step )
	453	TRUST_FLAG_ERROR+=' '$time_step
[5695]	454	else
	455	comments 'W A R N I N G'
[8808]	456
	457	if [ "${TRUST_FLAG_ERROR}" == 'Exceeded time limit' ]; then
	458	TRUST_FLAG_ERROR+=' '$(( ${TRUST_JOB_TIMEOUT}/3600 ))'h'
	459	fi
	460
[5690]	461	fi
[5438]	462
[5696]	463	## Last messenger files
[8808]	464	sed -i "2 s/.*/$TRUST_RESULT/" ${file_rslt}
	465	sed -i "2 s/.*/$TRUST_FLAG_ERROR/" ${file_stat}
[5690]	466
[5788]	467	## Save tested configuration if trusting failed in production mode (-p\|--prod)
[8808]	468	if [[ ${TRUST_FLAG_RESULT} == 'FAILED' && ${TRUST_FLAG_PROD} -eq 1 ]]; then
[8818]	469	echo 'Creating archive '${TRUST_TEST_BACKUP}' under '${TRUST_TEST_BENCHMARK}
	470	tar -czf ${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP} * \
[8808]	471	-C ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/MY_SRC . \
	472	-C ${TRUST_DIR_NEMOGCM}/CONFIG/${TRUST_CFG_NEW} \
	473	cpp_${TRUST_CFG_NEW}.fcm
[5635]	474	fi
[5521]	475
[5788]	476	## Logfile construct & eventual sending of notification email
	477	printf "\nTrusting digest:\n----------------\n"
	478	log_make
	479	prod_publish
[5689]	480
[5623]	481	exit 0
[5268]	482	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: