New URL for NEMO forge! http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.

trusting_func.sh in branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc – NEMO

Context Navigation

source: branches/2015/dev_r5092_CNRS18_TRUST/NEMOGCM/TRUST/inc/trusting_func.sh @ 8843

Last change on this file since 8843 was 8843, checked in by nicolasmartin, 6 years ago
Review forcing inputs management + cosmetic changes
Property eol-style set to `native` Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Rev URL`
File size: 16.9 KB

Line
1	#!/bin/bash
2
3
4	##--------------------------------------------------------------------------------
5	## Messenger filenames
6	##--------------------------------------------------------------------------------
7
8	file_date=mesg_01_date.txt ; file_rslt=mesg_02_result.txt
9	file_stat=mesg_03_status.txt; file_nemo=mesg_04_nemo.txt
10	file_xios=mesg_05_xios.txt ; file_cmpf=mesg_06_compiler.txt
11	file_lmpi=mesg_07_mpi.txt ; file_ncdf=mesg_08_netcdf.txt
12	file_inpt=mesg_09_inputs.txt; file_time=mesg_10_time.txt
13	file_memy=mesg_11_memory.txt; file_note=mesg_12_comments.txt
14
15
16	##--------------------------------------------------------------------------------
17	## Functions in order of use
18	##--------------------------------------------------------------------------------
19
20	print_step() {
21	local char_nb=$( echo "$1" \| wc -c )
22	local outline=$( printf "%${char_nb}s" )
23
24	printf "\n\nStep.....\n%s\n%s\n\n" "$1" ${outline// /-}
25	}
26
27
28	##
29	##--------------------------------------------------------------------------------
30
31	init() {
32	mkdir -p ${TRUST_TEST_DIR} ${TRUST_TEST_BENCHMARK} \|\| get_out B
33	cd ${TRUST_TEST_DIR}
34	echo ${TRUST_TEST_DIR}
35
36	echo 'Date' > ${file_date}; echo 'Result' > ${file_rslt}
37	echo 'Status' > ${file_stat}; echo 'NEMOGCM' > ${file_nemo}
38	echo 'XIOS' > ${file_xios}; echo 'Fortran' > ${file_cmpf}
39	echo 'MPI' > ${file_lmpi}; echo 'NetCDF' > ${file_ncdf}
40	echo 'Inputs' > ${file_inpt}; echo 'Time' > ${file_time}
41	echo 'RAM (Phy./Virt.)' > ${file_memy}; echo 'Comments' > ${file_note}
42
43	## 'FAILED' status with 'Unknown error' by default
44	echo ${TRUST_FLAG_RESULT} \
45	>> ${file_rslt}
46	echo 'Unknown error' \
47	>> ${file_stat}
48
49	## UTC time zone for timestamping
50	local dat=$( date -ud "${TRUST_TEST_DATE}" +"%F %R %Z" )
51
52	echo $dat \
53	>> ${file_date}
54	}
55
56
57	##
58	##--------------------------------------------------------------------------------
59
60	get_nemo_rev() {
61	local dir rev_loc
62	local rev=0
63
64	## Loop on essential NEMO directories
65	for dir in ${TRUST_SVN_UP}; do
66
67	echo $dir && ${TRUST_SVN_ACTION} ${TRUST_SVN_NEMOGCM}/$dir \|\| get_out C
68	rev_loc=$( svn info ${TRUST_SVN_NEMOGCM}/$dir \
69	\| awk '/Last Changed Rev/ {print $NF}' )
70
71	## Keep last rev. nb
72	[ ${rev_loc} -gt $rev ] && rev=${rev_loc}
73	done
74
75	echo 'NEMOGCM '$rev \
76	>> model.log
77	echo "<a href=\"${TRUST_SVN_REPO}/nemo/changeset/$rev\" target=\"_blank\">$rev</a>" \
78	>> ${file_nemo}
79	}
80
81
82	##
83	##--------------------------------------------------------------------------------
84
85	get_soft_rel() {
86	local ver str
87
88	## Sourcing environment
89	. ${TRUST_JOB_ENV} >& /dev/null
90
91	for str in ${TRUST_COMPILE_FORTRAN} \
92	${TRUST_COMPILE_MPI} ${TRUST_COMPILE_NETCDF} \
93	${TRUST_IO_CDO} ; do
94	[ -z "$str" ] && continue
95	ver=''
96
97	## Extract version number after searching pattern in PATH env. variable
98	ver=$( echo $PATH \| sed "s\|.\($str[0-9.]\).*\|\1\|" )
99
100	## option --version would work for main Fortran compilers and CDO
101	if [[ $str =~ ${TRUST_COMPILE_FORTRAN}\|${TRUST_IO_CDO} ]]; then
102	ver=$( $str --version 2>&1 \| grep -m1 -oe '\<[0-9. ]*\>' \
103	\| xargs echo $str )
104	fi
105
106	## Cleaning characters string to display proper soft name
107	ver=$( echo $ver \| sed 's\|[/-]\| \|g' )
108
109	echo $ver \
110	>> model.log
111	done
112
113	sed -n 2p model.log \
114	>> ${file_cmpf}
115	sed -n 3p model.log \
116	>> ${file_lmpi}
117	sed -n 4p model.log \
118	>> ${file_ncdf}
119
120	cat model.log \| awk '{printf "%-20s %s %s\n", $1, $2, $3}'
121	env \| sort > env.log
122	}
123
124
125	##
126	##--------------------------------------------------------------------------------
127
128	compile_xios() {
129	cd ${TRUST_IO_XIOS}
130
131	rev=$( svn info \| awk '/Last Changed Rev/ {print $NF}' )
132	printf 'XIOS branch %s rev. %s\n' \
133	$( basename ${TRUST_IO_XIOS} ) $rev \
134	\| tee -a ${TRUST_TEST_DIR}/model.log
135	echo "<a href=\"${TRUST_SVN_REPO}/ioserver/changeset/$rev target=\"_blank\">$rev</a>" \
136	>> ${TRUST_TEST_DIR}/${file_xios}
137
138	eval "
139	./make_xios ${TRUST_IO_XIOS_MODE} --arch ${TRUST_MAIN_HPCC} \
140	--job ${TRUST_COMPILE_NPROC} \
141	${TRUST_MAIN_STDOUT}
142	"
143
144	[ ! -e ./lib/libxios.a ] && get_out D \|\| echo 'Success'
145	}
146
147
148	##
149	##--------------------------------------------------------------------------------
150
151	compile_nemo() {
152	cd ${TRUST_SVN_NEMOGCM}/CONFIG
153
154	## Recompiling from scratch if not in debug mode
155	if [[ -d ${TRUST_CFG_NEW} && ${TRUST_FLAG_DEBUG} == 'false' ]]; then
156	./makenemo -n ${TRUST_CFG_NEW} clean_config \
157	> /dev/null <<EOF
158	y
159	EOF
160	fi
161
162	eval "
163	./makenemo -n ${TRUST_CFG_NEW} -r ${TRUST_CFG_REF} \
164	-m ${TRUST_MAIN_HPCC} -j ${TRUST_COMPILE_NPROC} \
165	${TRUST_CFG_KEY_ADD} ${TRUST_CFG_KEY_DEL} \
166	${TRUST_MAIN_STDOUT}
167	"
168
169	[ ! -e ./${TRUST_CFG_NEW}/BLD/bin/nemo.exe ] && get_out E \|\| echo 'Success'
170	}
171
172
173	##
174	##--------------------------------------------------------------------------------
175
176	get_inputs() {
177	cd ${TRUST_TEST_DIR}
178
179	## Test forcing directory
180	if [[ ! ${TRUST_IO_FORC_PATH} && ${TRUST_IO_FORC_PATH-_} ]]; then
181
182	echo 'No forcing files needed'
183
184	else
185
186	## Test forcing archive
187	if [ -n "${TRUST_IO_FORC_TAR}" ]; then
188
189	echo 'Forcing archive(s): '${TRUST_IO_FORC_TAR}
190
191	# List archive content & extract it by default
192	local inputs_list="
193	for archive in \${TRUST_IO_FORC_TAR}; do
194	tar -tvf \${TRUST_IO_FORC_PATH}/\$archive >> inputs_list.txt;
195	done
196	"
197	local inputs_get="
198	for archive in \${TRUST_IO_FORC_TAR}; do
199	tar -vxf \${TRUST_IO_FORC_PATH}/\$archive > /dev/null;
200	done
201	"
202
203	else
204
205	echo 'Forcing directory: '${TRUST_IO_FORC_PATH}
206
207	## List & copy files without archive
208	local inputs_list=" ls -lh \${TRUST_IO_FORC_PATH}/* >> inputs_list.txt"
209	local inputs_get=" \cp \${TRUST_IO_FORC_PATH}/* . "
210	fi
211
212	fi
213
214	eval ${inputs_list}
215	eval ${inputs_get}
216
217	if [ $( find -name '*.gz' -print -quit ) ]; then
218	find . -name '*.gz' -exec gzip -d {} \;
219	fi
220
221	cp ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/cpp_* .
222	find ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/EXP00 \
223	-regex '.*\(_cfg\\|.in\\|opa\\|_ref\\|.xml\)' \
224	-exec cp {} . \;
225	}
226
227
228	##
229	##--------------------------------------------------------------------------------
230
231	diff_inputs() {
232	local dif file
233	local files_list='' mesg='Same'
234
235	###################################
236	## Think of copying initial test ##
237	###################################
238
239	## Simple diff
240	for file in cpp_* 'inputs_list.txt' namelist_ *.xml; do
241	dif=''
242
243	## Continue even if input file is not in here (see after)
244	if [ -e ${TRUST_TEST_BENCHMARK}/$file ]; then
245	dif=$( diff -q $file ${TRUST_TEST_BENCHMARK}/$file )
246	else
247	dif=0
248	fi
249
250	## Pass over useless file omission in benckmark directory
251	if [[ -n "$dif" && "$dif" != '0' ]]; then
252	mesg='Different'
253	echo $dif
254	files_list+=$file' '
255	fi
256
257	done
258
259	[ $mesg == 'Same' ] && echo $mesg
260	echo $mesg \
261	>> ${file_inpt}
262
263	## List different files for web comment
264	[ -n "${files_list}" ] && echo 'Inputs : '${files_list}'differ<br>' \
265	>> temp_${file_note}
266	}
267
268
269	##
270	##--------------------------------------------------------------------------------
271
272	job_submit() {
273	## Copy the submitting script to testing folder
274	cp ${TRUST_JOB_SCRIPT} ${TRUST_TEST_DIR}
275
276	TRUST_JOB_ID=$( eval ${TRUST_JOB_SUBMIT} )
277	[ $? -ne 0 ] && get_out G \|\| printf "Success (job ID %s)\n" ${TRUST_JOB_ID}
278	}
279
280
281	##
282	##--------------------------------------------------------------------------------
283
284	job_pending() {
285	local outline=$( printf "%100s" ) time_elapsed=0 time_increment=30
286
287	sleep ${time_increment}
288
289	## Append a log file while pending
290	while [[ $( eval ${TRUST_JOB_STATE} ) \
291	&& ${time_elapsed} -lt ${TRUST_JOB_TIMEOUT} ]]; do
292	printf "\n%s\n" ${outline// /#} \
293	>> computation.log
294	[ -n "${TRUST_JOB_INFO}" ] && eval ${TRUST_JOB_INFO} \
295	>> computation.log
296	sleep ${time_increment}
297	time_elapsed=$(( ${time_elapsed} + ${time_increment} ))
298	done
299
300	sleep ${time_increment}
301
302	## Kill remaining job & stop the test if it's too long
303	if [ ${time_elapsed} -eq ${TRUST_JOB_TIMEOUT} ]; then
304	eval ${TRUST_JOB_KILL} &> /dev/null
305	get_out I
306	fi
307
308	}
309
310
311	##
312	##--------------------------------------------------------------------------------
313
314	job_perfs() {
315	if [ -n "${TRUST_JOB_TIME}" ]; then
316
317	## Interest for checking unusual time computation
318	local time_cpu=$( eval ${TRUST_JOB_TIME} )
319
320	printf "Time: "
321	echo ${time_cpu} \| tee -a ${file_time}
322
323	fi
324
325	if [[ -n "${TRUST_JOB_RAM_P}" && -n "${TRUST_JOB_RAM_V}" ]]; then
326
327	## Interest for checking unusual memory usage
328	local memory_pmax=$( eval ${TRUST_JOB_RAM_P} )
329	local memory_vmax=$( eval ${TRUST_JOB_RAM_V} )
330
331	printf "Max memory usage (physical/virtual): "
332	echo ${memory_pmax}' / '${memory_vmax} \| tee -a ${file_memy}
333	fi
334	}
335
336
337	##
338	##--------------------------------------------------------------------------------
339
340	job_state() {
341	if [[ ! -e time.step \|\| $( grep 'E R R O R' ocean.output ) ]]; then
342	get_out H
343	else
344	echo 'Success' ## Must be reviewed
345	fi
346	}
347
348
349	##
350	##--------------------------------------------------------------------------------
351
352	diff_results() {
353	local file
354	local files_list='' mesg='Same'
355
356	###################################
357	## Think of copying initial test ##
358	###################################
359
360	## Now test is good by default ('OK')
361	TRUST_FLAG_RESULT='OK'
362
363	## Simple diff
364	for file in 'ocean.output' *.stat; do
365	## Stop if no minimal benchmark files (ocean.output, eventual stat files)
366	[ ! -e ${TRUST_TEST_BENCHMARK}/$file ] && get_out J
367
368	diff -q $file ${TRUST_TEST_BENCHMARK}/$file
369
370	## Continue even if it differs
371	if [ $? -ne 0 ]; then mesg='Different'; files_list+=$file' '; fi
372
373	done
374
375	[ $mesg == 'Same' ] && echo $mesg
376
377	## List different files for web comment
378	[ -n "${files_list}" ] && echo 'Results : '${files_list}'differ<br>' \
379	>> temp_${file_note}
380	}
381
382
383	##
384	##--------------------------------------------------------------------------------
385
386	diff_restarts() {
387	local dif filebase filebases ndomain out
388	local files_list='' dif_sum='0'
389
390	## Find all restart files to rebuild
391	if [ $( find -regex "._restart.[0-9]\.nc" -print -quit ) ]; then
392	###############################################################
393	## Think to set the configuration name in the 'namelist_cfg' ##
394	###############################################################
395	filebases=$( find -regextype sed -regex ".${TRUST_CFG_NEW}._[0-9]\{4\}\.nc" \
396	\| sed 's/\(.\)_./\1/' \| sort -u )
397
398	for filebase in $filebases; do
399
400	ndomain=$( find -regex ".${filebase}_[0-9].nc" \
401	\| wc -l \| awk '{print $1}' )
402
403	#####################################################
404	## Handle 2 possibilities of 'rebuild_nemo' origin ##
405	#####################################################
406
407	${TRUST_SVN_NEMOGCM}/TOOLS/REBUILD_NEMO/rebuild_nemo \
408	-t ${TRUST_COMPILE_NPROC} $filebase $ndomain \
409	> /dev/null
410
411	## Possibility of remaining decomposed restarts (even after rebuild)
412	if [ $? -eq 0 ]; then
413	rm -f ${filebase}_[0-9]*.nc \
414	> /dev/null
415	else
416	get_out K
417	fi
418
419	## Stop if no benchmark files (restart file)
420	if [ -e ${TRUST_TEST_BENCHMARK}/$filebase.nc ]; then
421
422	#bcmk='true'
423	cdo diffn $filebase.nc ${TRUST_TEST_BENCHMARK}/$filebase.nc \
424	> cdo_diff.out 2> /dev/null
425
426	## Identical if cdo_diff.out is zero size
427	[ ! -s cdo_diff.out ] && continue
428
429	dif=$( grep -om1 '[0-9]* of [0-9]* records' cdo_diff.out )
430
431	if [ -n "$dif" ]; then
432	files_list+=$filebase' ' && echo $filebase'.nc: '$dif
433	let dif_sum+=$( echo $dif \| sed '\|^\([0-9]\).\|\1\|' )
434	fi
435
436	fi
437
438	done
439
440	## List modified restart(s) for web comment with sum of differences
441	if [ ${dif_sum} -ne 0 ]; then
442	echo 'Restarts: '${files_list}${dif_sum}' record(s) differ<br>' \
443	>> temp_${file_note}
444	else
445	echo 'Same'
446	fi
447
448	fi
449
450	[ $TRUST_FLAG_RESULT == 'FAILED' ] && get_out L
451	}
452
453
454	##
455	##--------------------------------------------------------------------------------
456
457	comments() {
458	local opat
459	local line='' state=$1
460
461	if [ -e ocean.output ]; then
462	## 'W A R N I N G' pattern by default
463	opat="-A2 \"^ $state\""
464	[ "$state" == 'E R R O R' ] && opat="-A4 \"$state\""
465
466	## Select first occurence for web comment
467	line=$( eval grep -m1 $opat ocean.output \| tr -d '\n' )
468	fi
469
470	[ -n "$line" ] && ( echo $line; printf "$line<br>" \
471	>> temp_${file_note} )
472	}
473
474
475	##
476	##--------------------------------------------------------------------------------
477
478	log_make() {
479	## Format comments for web
480	if [ -e temp_${file_note} ]; then
481	cat temp_${file_note} \| tr -d '\n' \| sed 's/<br>$//' \
482	>> ${file_note}
483	fi
484
485	## Construct txt file with all messenger files
486	paste -d ';' mesg_*.txt \| tee ${TRUST_TEST_SUMMARY}
487	}
488
489
490	##
491	##--------------------------------------------------------------------------------
492
493	prod_publish() {
494	local cmd
495	local rev=$( awk '/NEMOGCM/ {print $NF}' model.log )
496
497	## Production mode (-p\|--prod)
498	if [ ${TRUST_FLAG_PROD} == 'true' ]; then
499
500	## Create or append trusting logfile
501	if [ -f ${TRUST_TEST_LOG} ]; then cmd='tail -1'; else cmd='cat'; fi
502
503	$cmd ${TRUST_TEST_SUMMARY} \
504	>> ${TRUST_TEST_LOG}
505
506	## Send mail only when FAILED
507	if [[ -n "${TRUST_TEST_MAILING}" \
508	&& ${TRUST_FLAG_RESULT} == 'FAILED' ]]; then
509
510	## Content
511	cat <<END_MAIL \
512	> trusting.mail
513	Dear all,
514
515
516	The following trusting sequence has not completed successfully:
517
518	Testing configuration ${TRUST_CFG_NEW} based on ${TRUST_CFG_REF}.
519	User installation ${TRUST_MAIN_USER}
520	HPC environment ${TRUST_MAIN_HPCC}
521
522	Here is the running environment summary:
523	`cat model.log`
524
525	For more details, look into the testing folder at:
526	${TRUST_TEST_DIR}
527	An archive is also available to share the questionable configuration:
528	${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP}
529
530	END_MAIL
531
532	## Send with detailed subject
533	mail -s "[NEMO Trusting][${TRUST_CFG_REF}][${TRUST_SVN_BRANCH}] \
534	${TRUST_FLAG_RESULT} ${TRUST_FLAG_ERROR}" \
535	${TRUST_TEST_MAILING} \
536	< trusting.mail
537	fi
538
539	fi
540	}
541
542
543	##
544	##--------------------------------------------------------------------------------
545
546	get_out() {
547	local time_step=0
548
549	TRUST_FLAG_ERROR=$1
550
551	printf "\n\nEnd of test\n"
552
553	## In case of compilation error
554	cd ${TRUST_TEST_DIR}
555
556	if [ ${TRUST_FLAG_RESULT} == 'FAILED' ]; then
557	echo 'Failure'
558
559	## Error identification
560	case ${TRUST_FLAG_ERROR} in
561
562	## Initialisation
563	'A') TRUST_FLAG_ERROR='Missing environment variable' ;;
564	'B') TRUST_FLAG_ERROR='Unable to create testing directory';;
565	'C') TRUST_FLAG_ERROR='SVN issue on local working copy' ;;
566
567	## Compilation
568	'D') TRUST_FLAG_ERROR='XIOS compilation failed' ;;
569	'E') TRUST_FLAG_ERROR='NEMO compilation failed' ;;
570
571	## Submission
572	'F') TRUST_FLAG_ERROR='Missing input files' ;;
573	'G') TRUST_FLAG_ERROR='Job submission error' ;;
574
575	## Computing
576	'H') TRUST_FLAG_ERROR='Crashed at time step' ;;
577	'I') TRUST_FLAG_ERROR='Exceeded time limit' ;;
578
579	## Results
580	'J') TRUST_FLAG_ERROR='Missing previous outputs' ;;
581	'K') TRUST_FLAG_ERROR='Restart rebuild error' ;;
582	'L') TRUST_FLAG_ERROR='New outputs differ' ;;
583
584	## Other
585	'*') TRUST_FLAG_ERROR='Unknown error' ;;
586
587	esac
588
589	else
590	echo 'Success' && TRUST_FLAG_ERROR='Code is reliable'
591	fi
592
593	## Eventual comments from ocean.output
594	if [ ${TRUST_FLAG_ERROR} == 'Crashed at time step' ]; then
595	comments 'E R R O R'
596	[ -e time.step ] && time_step=$( cat time.step )
597	TRUST_FLAG_ERROR+=' '${time_step:=0}
598	else
599	comments 'W A R N I N G'
600
601	if [ ${TRUST_FLAG_ERROR} == 'Exceeded time limit' ]; then
602	TRUST_FLAG_ERROR+=' '$(( ${TRUST_JOB_TIMEOUT}/3600 ))'h'
603	fi
604
605	fi
606
607	## Last messenger files
608	sed -i "2 s/.*/$TRUST_FLAG_RESULT/" ${file_rslt}
609	sed -i "2 s/.*/$TRUST_FLAG_ERROR/" ${file_stat}
610
611	## Save tested configuration if trusting failed in production mode (-p\|--prod)
612	if [[ ${TRUST_FLAG_RESULT} == 'FAILED' && ${TRUST_FLAG_PROD} == 'true' ]]; then
613	echo 'Creating archive '${TRUST_TEST_BACKUP}' under '${TRUST_TEST_BENCHMARK}
614	tar -czf ${TRUST_TEST_BENCHMARK}/${TRUST_TEST_BACKUP} * \
615	-C ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW}/MY_SRC . \
616	-C ${TRUST_SVN_NEMOGCM}/CONFIG/${TRUST_CFG_NEW} \
617	cpp_${TRUST_CFG_NEW}.fcm
618	fi
619
620	## Logfile construct & eventual sending of notification email
621	printf "\nTrusting digest:\n----------------\n"
622	log_make
623	prod_publish
624
625	exit 0
626	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: