Context Navigation

trusting_func.sh @ 8859

Last change on this file since 8859 was 8859, checked in by nicolasmartin, 6 years ago

Continuation of global refactoring of Trusting tool

Introduce new 'dev' mode beetween 'debug' and 'prod' modes to skip XIOS compilation from scratch and working in a solely testing directory (thanks Andrew for the suggestion)
Switch cfg files from 'svn:mime-type=text/x-shellscript' to 'svn:executable' trying to get syntax highlighting under Trac
Bugfixes: correct XIOS branch 'XIOS_DEV_CMIP6' for ORCA1 and remove remaining 'TRUST_DIR_BENCHMARK' (replace by TRUST_TEST_BENCHMARK)
Improve the workflow displayed in the terminal

Property svn:executable set to *

File size: 16.9 KB

Line
1	#!/bin/bash
2
3
4	##--------------------------------------------------------------------------------
5	## Messenger filenames
6	##--------------------------------------------------------------------------------
7
8	file_date=mesg_01_date.txt ; file_rslt=mesg_02_result.txt
9	file_stat=mesg_03_status.txt; file_nemo=mesg_04_nemo.txt
10	file_xios=mesg_05_xios.txt ; file_cmpf=mesg_06_compiler.txt
11	file_lmpi=mesg_07_mpi.txt ; file_ncdf=mesg_08_netcdf.txt
12	file_inpt=mesg_09_inputs.txt; file_time=mesg_10_time.txt
13	file_memy=mesg_11_memory.txt; file_note=mesg_12_comments.txt
14
15
16	##--------------------------------------------------------------------------------
17	## Functions in order of use
18	##--------------------------------------------------------------------------------
19
20	step() {
21	local char_nb=$( echo "$1" \| wc -c )
22	local outline=$( printf "%${char_nb}s" )
23
24	printf "\n%s\n%s\n\n" "$1" ${outline// /-}
25	}
26
27
28	##
29	##--------------------------------------------------------------------------------
30
31	init() {
32	mkdir -p ${TRUST_TEST_DIR} ${TRUST_TEST_BENCHMARK} \|\| get_out B
33	cd ${TRUST_TEST_DIR}
34	echo ${TRUST_TEST_DIR}
35
36	echo 'Date' > ${file_date}; echo 'Result' > ${file_rslt}
37	echo 'Status' > ${file_stat}; echo 'NEMOGCM' > ${file_nemo}
38	echo 'XIOS' > ${file_xios}; echo 'Fortran' > ${file_cmpf}
39	echo 'MPI' > ${file_lmpi}; echo 'NetCDF' > ${file_ncdf}
40	echo 'Inputs' > ${file_inpt}; echo 'Time' > ${file_time}
41	echo 'RAM (Phy./Virt.)' > ${file_memy}; echo 'Comments' > ${file_note}
42
43	## 'FAILED' status with 'Unknown error' by default
44	echo ${TRUST_FLAG_RESULT} \
45	>> ${file_rslt}
46	echo 'Unknown error' \
47	>> ${file_stat}
48
49	## UTC time zone for timestamping
50	local dat=$( date -ud "${TRUST_TEST_DATE}" +"%F %R %Z" )
51
52	echo $dat \
53	>> ${file_date}
54	}
55
56
57	##
58	##--------------------------------------------------------------------------------
59
60	get_nemo_rev() {
61	local dir rev_loc
62	local rev=0
63
64	## Loop on essential NEMO directories
65	for dir in ${TRUST_SVN_UP}; do
66
67	echo $dir && ${TRUST_SVN_ACTION} ${TRUST_SVN_NEMOGCM}/$dir \|\| get_out C
68	rev_loc=$( svn info ${TRUST_SVN_NEMOGCM}/$dir \
69	\| awk '/Last Changed Rev/ {print $NF}' )
70
71	## Keep last rev. nb
72	[ ${rev_loc} -gt $rev ] && rev=${rev_loc}
73	done
74
75	echo 'NEMOGCM '$rev \
76	>> model.log
77	echo "<a href=\"${TRUST_SVN_REPO}/nemo/changeset/$rev\" target=\"_blank\">$rev</a>" \
78	>> ${file_nemo}
79	}
80
81
82	##
83	##--------------------------------------------------------------------------------
84
85	get_soft_rel() {
86	local ver str
87
88	## Sourcing environment
89	. ${TRUST_JOB_ENV} >& /dev/null
90
91	for str in ${TRUST_COMPILE_FORTRAN} \
92	${TRUST_COMPILE_MPI} ${TRUST_COMPILE_NETCDF} \
93	${TRUST_IO_CDO} ; do
94	[ -z "$str" ] && continue
95	ver=''
96
97	## Extract version number after searching pattern in PATH env. variable
98	ver=$( echo $PATH \| sed "s\|.$$str[0-9.]$.*\|\1\|" )
99
100	## option --version would work for main Fortran compilers and CDO
101	if [[ $str =~ ${TRUST_COMPILE_FORTRAN}\|${TRUST_IO_CDO} ]]; then
102	ver=$( $str --version 2>&1 \| grep -m1 -oe '\<[0-9. ]*\>' \
103	\| xargs echo $str )
104	fi
105
106	## Cleaning characters string to display proper soft name
107	ver=$( echo $ver \| sed 's\|[/-]\| \|g' )
108
109	echo $ver \
110	>> model.log
111	done
112
113	sed -n 2p model.log \
114	>> ${file_cmpf}
115	sed -n 3p model.log \
116	>> ${file_lmpi}
117	sed -n 4p model.log \
118	>> ${file_ncdf}
119
120	cat model.log \| awk '{printf "%-20s %s %s\n", $1, $2, $3}'
121	env \| sort > env.log
122	}
123
124
125	##
126	##--------------------------------------------------------------------------------
127
128	compile_xios() {
129	cd ${TRUST_IO_XIOS}
130
131	rev=$( svn info \| awk '/Last Changed Rev/ {print $NF}' )
132	printf 'XIOS branch %s rev. %s\n' \
133	$( basename ${TRUST_IO_XIOS} ) $rev \
134	\| tee -a ${TRUST_TEST_DIR}/model.log
135	echo "<a href=\"${TRUST_SVN_REPO}/ioserver/changeset/$rev target=\"_blank\">$rev</a>" \
136	>> ${TRUST_TEST_DIR}/${file_xios}
137
138	eval "
139	./make_xios ${TRUST_IO_XIOS_MODE} --arch ${TRUST_MAIN_HPCC} \
140	--job ${TRUST_COMPILE_NPROC} \
141	${TRUST_MAIN_STDOUT}
142	"
143
144	[ ! -e ./lib/libxios.a ] && get_out D \|\| echo 'Success'
145	}
146
147
148	##
149	##--------------------------------------------------------------------------------
150
151	compile_nemo() {
152	cd ${TRUST_SVN_NEMOGCM}/CONFIG
153
154	## Recompiling from scratch if not in debug or dev mode
155	if [[ ${TRUST_FLAG_DEBUG} == 'false' && ${TRUST_FLAG_DEV} == 'false' ]]; then
156
157	if [[ -d ${TRUST_CFG_NEW} ]]; then
158	./makenemo -n ${TRUST_CFG_NEW} clean_config \
159	> /dev/null <<EOF
160	y
161	EOF
162	fi
163
164	fi
165
166	eval "
167	./makenemo -n ${TRUST_CFG_NEW} -r ${TRUST_CFG_REF} \
168	-m ${TRUST_MAIN_HPCC} -j ${TRUST_COMPILE_NPROC} \
169	${TRUST_CFG_KEY_ADD} ${TRUST_CFG_KEY_DEL} \
170	${TRUST_MAIN_STDOUT}
171	"
172
173	[ ! -e ./${TRUST_CFG_NEW}/BLD/bin/nemo.exe ] && get_out E \|\| echo 'Success'
174	}
175
176
177	##
178	##--------------------------------------------------------------------------------
179
180	get_inputs() {
181	cd ${TRUST_TEST_DIR}
182
183	## Test forcing directory
184	if [[ ! ${TRUST_IO_FORC_PATH} && ${TRUST_IO_FORC_PATH-_} ]]; then
185
186	echo 'No forcing files needed'
187
188	else
189
190	## Test forcing archive
191	if [ -n "${TRUST_IO_FORC_TAR}" ]; then
192
193	echo 'Forcing archive(s): '${TRUST_IO_FORC_TAR}
194
195	# List archive content & extract it by default
196	local inputs_list="
197	for archive in \${TRUST_IO_FORC_TAR}; do
198	tar -tvf \${TRUST_IO_FORC_PATH}/\$archive >> inputs_list.txt;
199	done
200	"
201	local inputs_get="
202	for archive in \${TRUST_IO_FORC_TAR}; do
203	tar -vxf \${TRUST_IO_FORC_PATH}/\$archive > /dev/null;
204	done
205	"
206
207	else
208
209	echo 'Forcing directory: '${TRUST_IO_FORC_PATH}
210
211	## List & copy files without archive
212	local inputs_list=" ls -lh \${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
213	local inputs_get=" \cp \${TRUST_IO_FORC_PATH}/* . "
214	fi
215
216	fi
217
218	eval ${inputs_list}
219	eval ${inputs_get}
220
221	if [ $( find -name '*.gz' -print -quit ) ]; then
222	find . -name '*.gz' -exec gzip -d {} \;
223	fi
224
225	cp ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/cpp_* .
226	find ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/EXP00 \
227	-regex '.*$_cfg\\|.in\\|opa\\|_ref\\|.xml$' \
228	-exec cp {} . \;
229	}
230
231
232	##
233	##--------------------------------------------------------------------------------
234
235	diff_inputs() {
236	local dif file
237	local files_list='' mesg='Same'
238
239	###################################
240	## Think of copying initial test ##
241	###################################
242
243	## Simple diff
244	for file in cpp_* 'inputs_list.txt' namelist_ *.xml; do
245	dif=''
246
247	## Continue even if input file is not in here (see after)
248	if [ -e ${TRUST_TEST_BENCHMARK}/$file ]; then
249	dif=$( diff -q $file ${TRUST_TEST_BENCHMARK}/$file )
250	else
251	dif=0
252	fi
253
254	## Pass over useless file omission in benckmark directory
255	if [[ -n "$dif" && "$dif" != '0' ]]; then
256	mesg='Different'
257	echo $dif
258	files_list+=$file' '
259	fi
260
261	done
262
263	[ $mesg == 'Same' ] && echo $mesg
264	echo $mesg \
265	>> ${file_inpt}
266
267	## List different files for web comment
268	[ -n "${files_list}" ] && echo 'Inputs : '${files_list}'differ<br>' \
269	>> temp_${file_note}
270	}
271
272
273	##
274	##--------------------------------------------------------------------------------
275
276	job_submit() {
277	## Copy the submitting script to testing folder
278	cp ${TRUST_JOB_SCRIPT} ${TRUST_TEST_DIR}
279
280	TRUST_JOB_ID=$( eval ${TRUST_JOB_SUBMIT} )
281	[ $? -ne 0 ] && get_out G \|\| printf "Success (job ID %s)\n" ${TRUST_JOB_ID}
282	}
283
284
285	##
286	##--------------------------------------------------------------------------------
287
288	job_pending() {
289	local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
290
291	sleep ${time_increment}
292
293	## Append a log file while pending
294	while [[ $( eval ${TRUST_JOB_STATE} ) \
295	&& ${time_elapsed} -lt ${TRUST_JOB_TIMEOUT} ]]; do
296	printf "\n%s\n" ${outline// /#} \
297	>> computation.log
298	[ -n "${TRUST_JOB_INFO}" ] && eval ${TRUST_JOB_INFO} \
299	>> computation.log
300	sleep ${time_increment}
301	time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
302	done
303
304	sleep ${time_increment}
305
306	## Kill remaining job & stop the test if it's too long
307	if [ ${time_elapsed} -eq ${TRUST_JOB_TIMEOUT} ]; then
308	eval ${TRUST_JOB_KILL} &> /dev/null
309	get_out I
310	fi
311
312	}
313
314
315	##
316	##--------------------------------------------------------------------------------
317
318	job_perfs() {
319	if [ -n "${TRUST_JOB_TIME}" ]; then
320
321	## Interest for checking unusual time computation
322	local time_cpu=$( eval ${TRUST_JOB_TIME} )
323
324	printf "Time: "
325	echo ${time_cpu} \| tee -a ${file_time}
326
327	fi
328
329	if [[ -n "${TRUST_JOB_RAM_P}" && -n "${TRUST_JOB_RAM_V}" ]]; then
330
331	## Interest for checking unusual memory usage
332	local memory_pmax=$( eval ${TRUST_JOB_RAM_P} )
333	local memory_vmax=$( eval ${TRUST_JOB_RAM_V} )
334
335	printf "Max memory usage (physical/virtual): "
336	echo ${memory_pmax}' / '${memory_vmax} \| tee -a ${file_memy}
337	fi
338	}
339
340
341	##
342	##--------------------------------------------------------------------------------
343
344	job_state() {
345	if [[ ! -e time.step \|\| $( grep 'E R R O R' ocean.output ) ]]; then
346	get_out H
347	else
348	echo 'Success' ## Must be reviewed
349	fi
350	}
351
352
353	##
354	##--------------------------------------------------------------------------------
355
356	diff_results() {
357	local file
358	local files_list='' mesg='Same'
359
360	###################################
361	## Think of copying initial test ##
362	###################################
363
364	## Now test is good by default ('OK')
365	TRUST_FLAG_RESULT='OK'
366
367	## Simple diff
368	for file in 'ocean.output' *.stat; do
369	## Stop if no minimal benchmark files (ocean.output, eventual stat files)
370	[ ! -e ${TRUST_TEST_BENCHMARK}/$file ] && get_out J
371
372	diff -q $file ${TRUST_TEST_BENCHMARK}/$file
373
374	## Continue even if it differs
375	if [ $? -ne 0 ]; then mesg='Different'; files_list+=$file' '; fi
376
377	done
378
379	[ $mesg == 'Same' ] && echo $mesg
380
381	## List different files for web comment
382	[ -n "${files_list}" ] && echo 'Results : '${files_list}'differ<br>' \
383	>> temp_${file_note}
384	}
385
386
387	##
388	##--------------------------------------------------------------------------------
389
390	diff_restarts() {
391	local dif filebase filebases ndomain out
392	local files_list='' dif_sum='0'
393
394	## Find all restart files to rebuild
395	if [ $( find -regex "._restart.[0-9]\.nc" -print -quit ) ]; then
396	###############################################################
397	## Think to set the configuration name in the 'namelist_cfg' ##
398	###############################################################
399	filebases=$( find -regextype sed -regex ".${TRUST_CFG_NEW}._[0-9]\{4\}\.nc" \
400	\| sed 's/$.$_./\1/' \| sort -u )
401
402	for filebase in $filebases; do
403
404	ndomain=$( find -regex ".${filebase}_[0-9].nc" \
405	\| wc -l \| awk '{print $1}' )
406
407	#####################################################
408	## Handle 2 possibilities of 'rebuild_nemo' origin ##
409	#####################################################
410
411	${TRUST_SVN_NEMOGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo \
412	-t ${TRUST_COMPILE_NPROC} $filebase $ndomain \
413	> /dev/null
414
415	## Possibility of remaining decomposed restarts (even after rebuild)
416	if [ $? -eq 0 ]; then
417	rm -f ${filebase}_[0-9]*.nc \
418	> /dev/null
419	else
420	get_out K
421	fi
422
423	## Stop if no benchmark files (restart file)
424	if [ -e ${TRUST_TEST_BENCHMARK}/$filebase.nc ]; then
425
426	cdo diffn $filebase.nc ${TRUST_TEST_BENCHMARK}/$filebase.nc \
427	> cdo_diff.out 2> /dev/null
428
429	## Identical if cdo_diff.out is zero size
430	[ ! -s cdo_diff.out ] && continue
431
432	dif=$( grep -om1 '[0-9]* of [0-9]* records' cdo_diff.out )
433
434	if [ -n "$dif" ]; then
435	files_list+=$filebase' ' && echo $filebase'.nc: '$dif
436	let dif_sum+=$( echo $dif \| sed '\|^$[0-9]$.\|\1\|' )
437	fi
438
439	fi
440
441	done
442
443	## List modified restart(s) for web comment with sum of differences
444	if [ ${dif_sum} -ne 0 ]; then
445	echo 'Restarts: '${files_list}${dif_sum}' record(s) differ<br>' \
446	>> temp_${file_note}
447	else
448	echo 'Same'
449	fi
450
451	fi
452
453	[ $TRUST_FLAG_RESULT == 'FAILED' ] && get_out L
454	}
455
456
457	##
458	##--------------------------------------------------------------------------------
459
460	comments() {
461	local opat
462	local line='' state=$1
463
464	if [ -e ocean.output ]; then
465	## 'W A R N I N G' pattern by default
466	opat="-A2 \"^ $state\""
467	[ "$state" == 'E R R O R' ] && opat="-A4 \"$state\""
468
469	## Select first occurence for web comment
470	line=$( eval grep -m1 $opat ocean.output \| tr -d '\n' )
471	fi
472
473	[ -n "$line" ] && ( echo $line; printf "$line<br>" \
474	>> temp_${file_note} )
475	}
476
477
478	##
479	##--------------------------------------------------------------------------------
480
481	log_make() {
482	## Format comments for web
483	if [ -e temp_${file_note} ]; then
484	cat temp_${file_note} \| tr -d '\n' \| sed 's/<br>$//' \
485	>> ${file_note}
486	fi
487
488	## Construct txt file with all messenger files
489	paste -d ';' mesg_*.txt \| tee ${TRUST_TEST_SUMMARY}
490	}
491
492
493	##
494	##--------------------------------------------------------------------------------
495
496	prod_publish() {
497	local cmd
498	local rev=$( awk '/NEMOGCM/ {print $NF}' model.log )
499
500	## Production mode (-p\|--prod)
501	if [ ${TRUST_FLAG_PROD} == 'true' ]; then
502
503	## Create or append trusting logfile
504	if [ -f ${TRUST_TEST_LOG} ]; then cmd='tail -1'; else cmd='cat'; fi
505
506	$cmd ${TRUST_TEST_SUMMARY} \
507	>> ${TRUST_TEST_LOG}
508
509	## Send mail only when FAILED
510	if [[ -n "${TRUST_TEST_MAILING}" \
511	&& ${TRUST_FLAG_RESULT} == 'FAILED' ]]; then
512
513	## Content
514	cat <<END_MAIL \
515	> trusting.mail
516	Dear all,
517
518
519	The following trusting sequence has not completed successfully:
520
521	Testing configuration ${TRUST_CFG_NEW} based on ${TRUST_CFG_REF}.
522	User installation ${TRUST_MAIN_USER}
523	HPC environment ${TRUST_MAIN_HPCC}
524
525	Here is the running environment summary:
526	`cat model.log`
527
528	For more details, look into the testing folder at:
529	${TRUST_TEST_DIR}
530	An archive is also available to share the questionable configuration:
531	${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}
532
533	END_MAIL
534
535	## Send with detailed subject
536	mail -s "[NEMO Trusting][${TRUST_CFG_REF}][${TRUST_SVN_BRANCH}] \
537	${TRUST_FLAG_RESULT} ${TRUST_FLAG_ERROR}" \
538	${TRUST_TEST_MAILING} \
539	< trusting.mail
540	fi
541
542	fi
543	}
544
545
546	##
547	##--------------------------------------------------------------------------------
548
549	get_out() {
550	local time_step=0
551
552	TRUST_FLAG_ERROR=$1
553
554	printf "\n\nEnd of test\n"
555
556	## In case of compilation error
557	cd ${TRUST_TEST_DIR}
558
559	if [ ${TRUST_FLAG_RESULT} == 'FAILED' ]; then
560	echo 'Failure'
561
562	## Error identification
563	case ${TRUST_FLAG_ERROR} in
564
565	## Initialisation
566	'A') TRUST_FLAG_ERROR='Missing environment variable' ;;
567	'B') TRUST_FLAG_ERROR='Unable to create testing directory';;
568	'C') TRUST_FLAG_ERROR='SVN issue on local working copy' ;;
569
570	## Compilation
571	'D') TRUST_FLAG_ERROR='XIOS compilation failed' ;;
572	'E') TRUST_FLAG_ERROR='NEMO compilation failed' ;;
573
574	## Submission
575	'F') TRUST_FLAG_ERROR='Missing input files' ;;
576	'G') TRUST_FLAG_ERROR='Job submission error' ;;
577
578	## Computing
579	'H') TRUST_FLAG_ERROR='Crashed at time step '
580	comments 'E R R O R'
581	[ -e time.step ] && time_step=$( cat time.step )
582	TRUST_FLAG_ERROR+=${time_step:=0} ;;
583	'I') TRUST_FLAG_ERROR='Exceeded time limit of '
584	TRUST_FLAG_ERROR+=$(( ${TRUST_JOB_TIMEOUT}/3600 ))'h' ;;
585
586	## Results
587	'J') TRUST_FLAG_ERROR='Missing previous outputs' ;;
588	'K') TRUST_FLAG_ERROR='Restart rebuild error' ;;
589	'L') TRUST_FLAG_ERROR='New outputs differ' ;;
590
591	## Other
592	'*') TRUST_FLAG_ERROR='Unknown error' ;;
593
594	esac
595
596	else
597	echo 'Success' && TRUST_FLAG_ERROR='Code is reliable'
598	fi
599
600	## Eventual comments from ocean.output
601	[[ ! ${TRUST_FLAG_ERROR} =~ 'Crashed at time step' ]] && comments 'W A R N I N G'
602
603	## Last messenger files
604	sed -i "2 s/.*/$TRUST_FLAG_RESULT/" ${file_rslt}
605	sed -i "2 s/.*/$TRUST_FLAG_ERROR/" ${file_stat}
606
607	## Save tested configuration if trusting failed in production mode ('-p')
608	if [[ ${TRUST_FLAG_RESULT} == 'FAILED' && ${TRUST_FLAG_PROD} == 'true' ]]; then
609	echo 'Creating archive '${TRUST_TEST_BACKUP}' under '${TRUST_TEST_BENCHMARK}
610	tar -czf ${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP} * \
611	-C ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/MY_SRC . \
612	-C ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW} \
613	cpp_${TRUST_CFG_NEW}.fcm
614	fi
615
616	## Logfile construct & eventual sending of notification email
617	printf "\nTrusting digest:\n----------------\n"
618	log_make
619	prod_publish
620
621	exit 0
622	}

Note: See TracBrowser for help on using the repository browser.

New URL for NEMO forge! http://forge.nemo-ocean.eu

Context Navigation

source: branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc/trusting_func.sh @ 8859

Download in other formats: