source: trunk/libIGCM/libIGCM_debug/libIGCM_debug.ksh @ 1290

Last change on this file since 1290 was 1290, checked in by sdipsl, 8 years ago
  • broadcast postProcessingStopLevel value to every jobs. Default value is zero.
  • Property licence set to
    The following licence information concerns ONLY the libIGCM tools
    ==================================================================

    Copyright © Centre National de la Recherche Scientifique CNRS
    Commissariat à l'Énergie Atomique CEA

    libIGCM : Library for Portable Models Computation of IGCM Group.

    IGCM Group is the french IPSL Global Climate Model Group.

    This library is a set of shell scripts and functions whose purpose is
    the management of the initialization, the launch, the transfer of
    output files, the post-processing and the monitoring of datas produce
    by any numerical program on any plateforme.

    This software is governed by the CeCILL license under French law and
    abiding by the rules of distribution of free software. You can use,
    modify and/ or redistribute the software under the terms of the CeCILL
    license as circulated by CEA, CNRS and INRIA at the following URL
    "http://www.cecill.info".

    As a counterpart to the access to the source code and rights to copy,
    modify and redistribute granted by the license, users are provided only
    with a limited warranty and the software's author, the holder of the
    economic rights, and the successive licensors have only limited
    liability.

    In this respect, the user's attention is drawn to the risks associated
    with loading, using, modifying and/or developing or reproducing the
    software by the user in light of its specific status of free software,
    that may mean that it is complicated to manipulate, and that also
    therefore means that it is reserved for developers and experienced
    professionals having in-depth computer knowledge. Users are therefore
    encouraged to load and test the software's suitability as regards their
    requirements in conditions enabling the security of their systems and/or
    data to be ensured and, more generally, to use and operate it in the
    same conditions as regards security.

    The fact that you are presently reading this means that you have had
    knowledge of the CeCILL license and that you accept its terms.
  • Property svn:keywords set to Revision Author Date
File size: 38.0 KB
Line 
1#!/bin/ksh
2
3#**************************************************************
4# Author: Patrick Brockmann, Martial Mancip
5# Contact: Patrick.Brockmann__at__cea.fr Martial.Mancip__at__ipsl.jussieu.fr
6# $Revision::                                          $ Revision of last commit
7# $Author::                                            $ Author of last commit
8# $Date::                                              $ Date of last commit
9# IPSL (2006)
10#  This software is governed by the CeCILL licence see libIGCM/libIGCM_CeCILL.LIC
11#
12#**************************************************************
13
14#==================================================
15# The documentation of this file can be automatically generated
16# if you use the prefix #D- for comments to be extracted.
17# Extract with command: cat lib* | grep "^#D-" | cut -c "4-"
18#==================================================
19
20#==================================================
21# Add high level verbosity
22typeset -i Verbosity=${Verbosity:=3}
23
24#==================================================
25# DEBUG_debug
26# Add low level verbosity
27DEBUG_debug=${DEBUG_debug:=false}
28
29#D- postProcessingStopLevel (0,1,2,3)
30#D- 3 stop if any post-processing went wrong
31#D- 2 dont stop if atlas, monitoring or metrics failed
32#D- 1 dont stop if atlas, monitoring, metrics, create_ts or create_se failed
33#D- 0 dont stop if atlas, monitoring, metrics, create_ts, create_se, rebuild or pack_* failed
34postProcessingStopLevel=${postProcessingStopLevel:=0}
35
36#==================================================
37# GENERATE RANDOM ERROR ; only apply if ( ${DEBUG_debug} )
38typeset -r RandomError=false
39
40#==================================================
41# NULL_STR
42# Default null string
43typeset -r NULL_STR="_0_"
44
45#==================================================
46# libIGCM_CurrentTag
47# Current libIGCM tag, check compatibilty with *.card
48typeset -r libIGCMVersion="2.7"
49
50#==================================================
51# Exit Flag (internal debug)
52# When true, end the master loop AFTER SAVES FILES
53ExitFlag=false
54
55#==================================================
56# When we start to run the simulation is not finished
57simulationIsOver=false
58
59#==================================================
60# When we start to run we dont flush AMQP messages
61FlushAMQP=false
62
63#==================================================
64# Declare a stack of functions calls
65unset IGCM_debug_Stack
66unset IGCM_debug_StackArgs
67unset IGCM_debug_StackTiming
68IGCM_debug_Stack[0]=${NULL_STR}
69IGCM_debug_StackArgs[0]=${NULL_STR}
70IGCM_debug_StackTiming[0]=${NULL_STR}
71IGCM_debug_LenStack=0
72
73#D-#==================================================================
74#D-function IGCM_debug_getDate_ms
75#D- * Purpose: Give number of milliseconds since 01-jan-1970
76function IGCM_debug_getDate_ms
77{
78  typeset nanosecs ms
79  # nano secondes since 01-jan-1970
80  nanosecs=$( date +%s%N )
81
82  # truncate the last 6 digits to get milliseconds since 01-jan-1970
83  ms=${nanosecs:0:${#nanosecs}-6}
84
85  echo "$ms"
86}
87
88#D-#==================================================================
89#D-function IGCM_debug_sizeOfTabContent
90#D- * Purpose: Give sumed size of a list of files
91#D- * Usage: IGCM_debug_sizeOfTabContent entityList destination
92#D- *        where entityList is a list of files or directory
93#D- *        where dest is either a directory or a file name
94function IGCM_debug_sizeOfTabContent
95{
96  typeset entityListe destination iEntity sizeKo sumSizeKo sumSizeMo
97
98  eval set +A entityListe \${${1}}
99  destination=${2}
100  sumSizeKo=0
101
102  # Here we will try to compute size (file or directory size) from local path and not from archive.
103  for ((i = 0; i < ${#entityListe[*]}; i += 1)) ; do
104    if [ -f ${entityListe[$i]} ] ; then
105      # One file or a bunch of files has been copied without renaming from a visible filesystem
106      iEntity=${entityListe[$i]}
107    elif [ -f ${entityListe[$i]##/*/} ] ; then
108      # One file or a bunch of files has been copied without renaming from an non visible filesystem
109      # remove path /home/login/../ from entityListe elements
110      iEntity=${entityListe[$i]##/*/}
111    elif [ -f ${destination} ] ; then
112      # a file has been copied and renamed
113      iEntity=${destination}
114    elif [ -f ${destination}/${entityListe[$i]##/*/} ] ; then
115      # a copy in a directory but not in ${PWD}
116      iEntity=${destination}/${entityListe[$i]##/*/}
117    elif [ -d ${entityListe[$i]} ] ; then
118      # a directory has been copied from a non remote place
119      iEntity=${entityListe[$i]}
120    elif [ -d ${destination}/${entityListe[$i]##/*/} ] ; then
121      # a directory has been copied from a remote archive and not renamed
122      iEntity=${destination}/${entityListe[$i]##/*/}
123    elif [ -d ${destination} ] ; then
124      # a directory has been copied from a remote archive and renamed
125      iEntity=${destination}
126    fi
127    sizeKo=$( du --apparent-size -skL ${iEntity} | gawk '{print $1}' )
128    sumSizeKo=$(( $sumSizeKo + $sizeKo ))
129  done
130  sumSizeMo=$( echo "scale=6;${sumSizeKo}/1024" | bc )
131  echo "${sumSizeKo}|${sumSizeMo}"
132}
133
134#D-#==================================================================
135#D-function IGCM_debug_send_AMQP_msg__MAILTUNNEL
136#D- * Purpose: Take over AMQP C client using mail as a message recipient
137#D- * One argument : base64 encoded message
138#D- * Attach encoded config.card when starting the simulation
139
140function IGCM_debug_send_AMQP_msg__MAILTUNNEL {
141
142  typeset b64_encoded_msg mail_recipient
143  typeset buffer send_messages mail_frequency
144  typeset last_mail_date__file
145  typeset secondsBetweenRefAndLastMail secondsSinceLastMail
146
147  b64_encoded_msg=$1
148
149  mail_recipient="superviseur@ipsl.jussieu.fr"
150  send_messages=0
151  mail_frequency=3600 # in seconds
152  # use to keep track when was last mail sent (maybe to be replaced with global variable)
153  last_mail_date__file=${R_BUF}/.stamp.${config_UserChoices_TagName}.${config_UserChoices_JobName}
154  # use to accumulate messages before sending them
155  buffer=${R_BUF}/.buffer.${config_UserChoices_TagName}.${config_UserChoices_JobName}
156
157  # init
158  if [ ! -f "${buffer}" ]; then
159    touch ${buffer}
160  fi
161
162  if [ ! -f "${last_mail_date__file}" ]; then
163    touch ${last_mail_date__file}
164  else
165    # compute last time the file was changed (in seconds)
166    secondsBetweenRefAndLastMail=$(stat -c %Y ${last_mail_date__file})
167    status=$?
168    #
169    # Only execute this block when the stat command succeeded.
170    # The stat command might fail in some circumstance but we consider it is ok to continue anyway.
171    if [ ${status} -eq 0 ] ; then
172      secondsSinceLastMail=$(( $(date +%s) - ${secondsBetweenRefAndLastMail} ))
173      # send message when exceeding threshold
174      [ ${secondsSinceLastMail} -gt ${mail_frequency} ] && send_messages=1
175    fi
176  fi
177
178  # queue messages in the buffer
179  echo ${b64_encoded_msg} >> ${buffer}
180
181  # send mail
182
183  if [ X${initBigBro} = Xtrue ] ; then
184    #echo $(date +"%Y-%m-%dT%H:%M:%S.%N%z") > ${SUBMIT_DIR}/mail.txt
185    mailx -s "[TEMPORARY AMQP CHANNEL]" -a ${SUBMIT_DIR}/config.card.base64 ${mail_recipient} < ${buffer} # send buffer
186    rm -f $buffer ; touch ${buffer}                                    # clear buffer
187    touch ${last_mail_date__file}                                      # memorize last mail date
188    initBigBro=false
189  elif [ ${send_messages} -eq 1 ] ; then
190    #echo $(date +"%Y-%m-%dT%H:%M:%S.%N%z") >> ${SUBMIT_DIR}/mail.txt
191    mailx -s "[TEMPORARY AMQP CHANNEL]" ${mail_recipient}  < ${buffer} # send buffer
192    rm -f ${buffer} ; touch ${buffer}                                  # flush the buffer
193    touch ${last_mail_date__file}                                      # memorize last mail date
194  fi
195
196  if ( ${FlushAMQP} ) ; then
197    mailx -s "[TEMPORARY AMQP CHANNEL]" ${mail_recipient}  < ${buffer} # send buffer
198    rm -f ${buffer}                                                    # cleaning behind us
199    rm -f ${last_mail_date__file}                                      # cleaning behind us
200  fi
201
202  # Allways all good for now.
203  return 0
204}
205
206#D-#==================================================================
207#D-function IGCM_debug_sendAMQP_Metrics
208#D- * Purpose: Take over AMQP C client using mail as a message recipient
209#D- * Two arguments : - Directory where metrics.json files can be found
210#D- *                 - Metrics Group Name. metrics will be added to this group
211#D- * Attach encoded metrics.json files.
212
213function IGCM_debug_sendAMQP_Metrics {
214
215  typeset mail_recipient encodedBody
216  if [ X${ActivateBigBro} = Xtrue ] ; then
217    mail_recipient="superviseur@ipsl.jussieu.fr"
218    # Metrics tag on server side
219    code=7100
220    # Usual AMQP message to route messages on server side
221    encodedBody=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"metricsGroupName\":\"${2}\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" |  base64 -w 0 )
222    # send mail
223    attachmentsOptions=""
224    for metricsFile in $( ls $1/*json ) ; do
225      attachmentsOptions="-a ${metricsFile} ${attachmentsOptions}"
226    done
227    IGCM_debug_Print 2 "IGCM_debug_sendAMQP_Metrics "
228    echo ${encodedBody}|mailx -s "[TEMPORARY AMQP CHANNEL]" ${attachmentsOptions} ${mail_recipient}
229  fi
230
231  # Allways all good for now.
232  return 0
233}
234
235#D-#==================================================================
236#D-function IGCM_debug_sendAMQP_projectAccounting
237#D- * Purpose: Take over AMQP C client using mail as a message recipient
238#D- * One argument : - File name where project accounting details are stored
239#D- * Attach encoded accounting file.
240
241function IGCM_debug_sendAMQP_projectAccounting {
242
243  typeset mail_recipient encodedBody
244  if [ X${ActivateBigBro} = Xtrue ] ; then
245    mail_recipient="superviseur@ipsl.jussieu.fr"
246    # Metrics tag on server side
247    code=7010
248    # Usual AMQP message to route messages on server side
249    encodedBody=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"accountingProject\":\"${PROJECT}\",\"centre\":\"${CENTER}\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" |  base64 -w 0 )
250    # send mail
251    attachmentsOptions="-a ${1}"
252    IGCM_debug_Print 2 "IGCM_debug_sendAMQP_projectAccounting"
253    echo ${encodedBody}|mailx -s "[TEMPORARY AMQP CHANNEL]" ${attachmentsOptions} ${mail_recipient}
254    # clean behind
255    rm -f $1
256  fi
257
258  # Allways all good for now.
259  return 0
260}
261
262#D-#==================================================================
263#D-function IGCM_debug_SendAMQP
264#D- * Purpose: Send body; encoded body and config.card to rabbitMQ
265function IGCM_debug_sendAMQP {
266
267  typeset decal first additionnalOption encodedBody
268
269  # Encode message Body
270  encodedBody=$( echo "${Body}" | base64 -w 0 )
271
272  # Send config.card ?
273  if [ X${1} = Xactivate ] ; then
274    # Encode config.card
275    cat ${SUBMIT_DIR}/config.card | base64 -w 0 > ${SUBMIT_DIR}/config.card.base64
276    # Prepare additionnal option
277    additionnalOption="-f ${SUBMIT_DIR}/config.card.base64"
278    #
279    initBigBro=true
280  else
281    additionnalOption=
282    #
283    initBigBro=false
284  fi
285
286  # Only cosmetics : stack file
287  if [ X${ActivateStackFilling} = Xtrue ] ; then
288    decal=0
289    while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
290      printf ' ' >> ${StackFileLocation}/${StackFileName}
291      (( decal = decal + 1 ))
292    done
293    # Log to stack file using human readable format
294    echo "${Body}" >> ${StackFileLocation}/${StackFileName}
295  fi
296
297  # Log separately encoded AMQP message command for reuse in a mock up
298  #echo sendAMQPMsg -h localhost -p 5672 ${additionnalOption} -b ${encodedBody} >> ${RUN_DIR_PATH}/send.AMQP.${config_UserChoices_JobName}.${config_UserChoices_ExperimentName}.${config_UserChoices_SpaceName}.${config_UserChoices_TagName}.${CumulPeriod}.history.txt
299
300  # Send the message
301  if [ X${BigBrotherChannel} = XMAIL ] ; then
302    IGCM_debug_send_AMQP_msg__MAILTUNNEL "${encodedBody}"
303    status=$?
304  else
305    sendAMQPMsg -h localhost -p 5672 ${additionnalOption} -b ${encodedBody}
306    status=$?
307  fi
308
309  if [ ${status} -gt 0 ] ; then
310    IGCM_debug_Print 2 "IGCM_debug_Push/PopStack/ActivateBigBro : command sendAMQPMsg failed error code ${status}"
311    echo sendAMQPMsg -h localhost -p 5672 -b "${Body}"
312    exit 1
313  fi
314}
315
316#D-#==================================================================
317#D-function IGCM_debug_CallStack
318#D-* Purpose: Print the call stack tree from the oldest to the youngest (opposite of the display standard)
319#D-
320function IGCM_debug_CallStack {
321  if ( $DEBUG_debug ) ; then
322    # Cosmetics
323    typeset i decal
324    i=0
325    until [ $i -eq ${IGCM_debug_LenStack} ]; do
326      decal=0
327      until [ $decal -eq ${i} ]; do
328        printf -- ' '
329        (( decal = decal + 1 ))
330      done
331      echo "$i - ${IGCM_debug_Stack[$(( $IGCM_debug_LenStack-$i-1 ))]}" "(${IGCM_debug_StackArgs[$(( $IGCM_debug_LenStack-$i-1 ))]})"
332      ((i = i + 1))
333    done
334  fi
335}
336
337#D-#==================================================================
338#D-function IGCM_debug_PushStack
339#D-* Purpose: Push a function name in the stack
340#D-
341function IGCM_debug_PushStack {
342  if ( $DEBUG_debug ) ; then
343    typeset decal inputs startTime_ms
344
345    # Only cosmetics : stack file
346    if [ X${ActivateStackFilling} = Xtrue ] ; then
347      echo >> ${StackFileLocation}/${StackFileName}
348      decal=0
349      while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
350        printf ' ' >> ${StackFileLocation}/${StackFileName}
351        (( decal = decal + 1 ))
352      done
353
354      # Fill the stack file
355      echo "> ${IGCM_debug_LenStack} : ${@}" >> ${StackFileLocation}/${StackFileName}
356    fi
357
358    # Save input list in an indexed array
359    INPUTS=( $@ )
360
361    # Get timing information
362    startTime_ms=$( IGCM_debug_getDate_ms )
363
364    # We add function call name on beginning of the stack
365    set +A IGCM_debug_Stack -- ${1} ${IGCM_debug_Stack[*]}
366
367    # Save timing in milliseconds in an indexed array
368    set +A IGCM_debug_StackTiming -- ${startTime_ms} ${IGCM_debug_StackTiming[*]}
369
370    # We include the "null" Args in the beginning of the StackArgs
371    set +A IGCM_debug_StackArgs ${NULL_STR} ${IGCM_debug_StackArgs[*]}
372
373    # Then, we shift StackArgs tabular
374    # Replacing blank separated list by comma separated list of quoted elements (except the first and last element)
375    if [ $# -gt 1 ]; then
376      IGCM_debug_StackArgs[0]=$(echo ${INPUTS[*]:1} | sed -e "s/\ /\",\"/g" )
377    fi
378
379    # Increment LenStack
380    (( IGCM_debug_LenStack = IGCM_debug_LenStack + 1 ))
381
382    #IGCM_debug_CallStack
383  fi
384}
385
386#D-#==================================================================
387#D-function IGCM_debug_PopStack
388#D-* Purpose: Pop a function name in the stack
389#D-
390function IGCM_debug_PopStack {
391  if ( $DEBUG_debug ) ; then
392    typeset i decal command arguments startTime_ms endTime_ms
393    typeset instrumentation dest prefix
394    # they are not typeset because they are send "by adress" to son functions
395    # we unset them to avoid "memory effect"
396    unset fileList source
397
398    # INTRODUCE SIMPLE ERROR GENERATOR TO TEST SUPERVISOR
399    # PROBABILITY ERROR IS 0.0001 PER COMMAND OR FUNCTION CALL
400    # THERE ARE ~500 COMMAND OR FUNCTION CALL PER PERIOD
401    # ONLY WHEN TaskType is "computing".
402    if [ X${ActivateBigBro} = Xtrue ] ; then
403      if [ X${TaskType} = Xcomputing ]; then
404        if ( ${RandomError} ) ; then
405          if [ $((RANDOM%10000)) -le 10 ] ; then
406            IGCM_debug_Print 1 "Random error has been triggered"
407            if [ X${ActivateStackFilling} = Xtrue ] ; then
408              echo "RANDOM ERROR" >> ${StackFileLocation}/${StackFileName}
409            fi
410            ExitFlag=true
411          fi
412        fi
413      fi
414    fi
415
416    if [ "${IGCM_debug_Stack[0]}" = "${1}" ]; then
417      # Everything is cool
418
419      # Get timing information
420      endTime_ms=$( IGCM_debug_getDate_ms )
421
422      # Save Stack information before poping the stack
423      command=${IGCM_debug_Stack[0]}
424
425      # Go from comma separated list of quoted elements (except the first and the last element)
426      # to unquoted space separated elements in an array
427      set -A arguments -- $( echo ${IGCM_debug_StackArgs[0]} | sed -e "s/\",\"/\ /g" )
428
429      # Save Stack information before poping the stack
430      startTime_ms=${IGCM_debug_StackTiming[0]}
431
432      # Pop the stack
433      (( IGCM_debug_LenStack = IGCM_debug_LenStack - 1 ))
434      set -A IGCM_debug_Stack -- ${IGCM_debug_Stack[*]:1}
435      set -A IGCM_debug_StackArgs -- ${IGCM_debug_StackArgs[*]:1}
436      set -A IGCM_debug_StackTiming -- ${IGCM_debug_StackTiming[*]:1}
437    else
438      echo 'IGCM_debug_Exit : stack is corrupted ! LenStack =' ${IGCM_debug_LenStack}
439      IGCM_debug_Exit $@
440    fi
441
442    # Special actions depending on command to prepare IGCM_debug_PrintInfosActions call
443    # We are interested in:
444    #  0. Which command performs the work
445    #  1. Size of entity we are working with
446    #  2. Where are we reading
447    #  3. Where are we writing
448    #  4. How long it took
449
450    instrumentation=false
451
452    case ${command} in
453    # Classical copy (only files are given to IGCM_sys_Cp as options)
454    IGCM_sys_Cp)
455      instrumentation=true
456      # All but the latest
457      fileList=${arguments[*]:0:${#arguments[*]}-1}
458      # just need the first file to get the directory
459      source=${arguments[0]}
460      # Nothing but the latest
461      dest=${arguments[${#arguments[*]}-1]}
462      # Size of file whose name are stored in a list
463      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
464      ;;
465
466    # Copy from archive machine or from buffer
467    IGCM_sys_Get|IGCM_sys_GetBuffer)
468      instrumentation=true
469      if [ ${#arguments[*]} -eq 2 ] ; then
470        source=${arguments[0]}
471        dest=${arguments[1]}
472        # Size of file whose name are stored in a variable
473        entitySize=$( IGCM_debug_sizeOfTabContent source ${dest} )
474      elif ( [ ${#arguments[*]} -eq 3 ] && [ ${arguments[0]} = '/l' ] ) ; then
475        # IGCM_sys_Get /l liste_file[*] /ccc/scratch/cont003/dsm/p86denv/RUN_DIR/985998_14754/
476        # Keep the array name hosting the all list
477        eval set +A fileList \${${arguments[1]}}
478        # just need the first file to get the directory
479        source=${fileList[0]}
480        dest=${arguments[2]}
481        # Size of file whose name are stored in a list
482        entitySize=$( IGCM_debug_sizeOfTabContent fileList[*] ${dest} )
483      elif [ [ ${#arguments[*]} -ge 3 ] ; then
484       # All but the latest
485        fileList=${arguments[*]:0:${#arguments[*]}-1}
486        # just need the first file to get the directory
487        source=${arguments[0]}
488        # Nothing but the latest
489        dest=${arguments[${#arguments[*]}-1]}
490        # Size of file whose name are stored in a list
491        entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
492      fi
493      ;;
494
495    # Copy from compute node or copy to archive/buffer
496    IGCM_sys_Get_Master|IGCM_sys_Get_Dir|IGCM_sys_Put_Out|IGCM_sys_PutBuffer_Out)
497      instrumentation=true
498      source=${arguments[0]}
499      dest=${arguments[1]}
500      # Size of file whose name are stored in a variable
501      entitySize=$( IGCM_debug_sizeOfTabContent source ${dest} )
502      ;;
503
504    # Rebuild command
505    IGCM_sys_rebuild|IGCM_sys_rebuild_station)
506      instrumentation=true
507      # All but the first
508      fileList=${arguments[*]:1:${#arguments[*]}-1}
509      # just need a file to get the directory
510      source=${arguments[1]}
511      # Nothing but the first
512      dest=${arguments[0]}
513      # Size of file whose name are stored in a list
514      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
515      ;;
516
517    # NCO commands
518    IGCM_sys_ncrcat|IGCM_sys_ncecat|IGCM_sys_ncra|IGCM_sys_ncks|IGCM_sys_cdo)
519      # Example of what we want to catch : only filenames in those command lines
520      # IGCM_sys_ncrcat -O -v ${list_var_final_ncrcat} ${OUT_SE[*]} ${RESULT_SE}
521      # IGCM_sys_ncrcat --hst -v ${liste_coord}${var} ${file1} ${liste_file_tmp[*]} ${file_out}
522      # IGCM_sys_ncrcat -p ${dir} ${liste_file_tmp} --output ${output}
523      # IGCM_sys_ncrcat -x -v ${list_var} -p ${dir} ${liste_file_tmp} --output ${output}
524      instrumentation=true
525      keepGoing=true
526      prefix=.
527      i=0
528      while ( ${keepGoing} ) ; do
529        # the last one is not interesting
530        if [ ${i} -eq ${#arguments[*]}-1 ] ; then
531          keepGoing=false
532        # look after "-p" option. Path prefix is the following arguments
533        elif [ ${arguments[${i}]} = "-p" ] ; then
534          ((i = i + 1))
535          prefix=${arguments[${i}]}
536          ((i = i + 1))
537        elif [ ${i} -eq ${#arguments[*]}-1 ] ; then
538          keepGoing=false
539        # looking for files
540        elif [ -f ${prefix}/${arguments[${i}]} ] ; then
541          fileList="${fileList} ${prefix}/${arguments[${i}]}"
542          ((i = i + 1))
543        # other options are not interesting
544        else
545          ((i = i + 1))
546        fi
547      done
548
549      # i value is at least 1
550      # just need one file to get the directory
551      source=$( echo ${fileList} | gawk '{print $1}' )
552      # Nothing but the latest
553      dest=${arguments[${#arguments[*]}-1]}
554      # Size of file whose name are stored in a list
555      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
556      ;;
557    esac
558
559    # Print information related to instrumentation
560    ( ${instrumentation} ) && IGCM_debug_PrintInfosActions ${command} ${entitySize} ${startTime_ms} ${endTime_ms} ${dest} ${source}
561
562    # Only cosmetics : stack file
563    if [ X${ActivateStackFilling} = Xtrue ] ; then
564      decal=0
565      while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
566        printf ' ' >> ${StackFileLocation}/${StackFileName}
567        (( decal = decal + 1 ))
568      done
569    fi
570
571    if ( ${ExitFlag} ) ; then
572      # Inform the stack file
573      if [ X${ActivateStackFilling} = Xtrue ] ; then
574        echo '!!! ExitFlag has been activated !!!' >> ${StackFileLocation}/${StackFileName}
575      fi
576
577      # Unplugged message 4900 handling for now. To ease downstream treatment.
578      if [ X${ActivateBigBro} = Xtrue ] ; then
579        if [ X${TaskType} = Xcomputing ]; then
580          # RabbitMQ message code "COMPUTING JOBs COMMAND FAILURE"
581          code=1900
582        elif [ X${TaskType} = Xpost-processing ]; then
583          # RabbitMQ message code "POST-PROCESSING JOBs COMMAND FAILURE"
584          code=2900
585        elif [ X${TaskType} = Xchecking ]; then
586          # RabbitMQ message code "POST-PROCESSING FROM CHECKER JOBs COMMAND FAILURE"
587          code=3900
588        fi
589        # RabbitMQ message body
590        Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"command\":\"${command}\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
591
592        # Fill the rabbitMQ queue
593        IGCM_debug_sendAMQP
594      fi
595    else
596      # Inform the stack file
597      if [ X${ActivateStackFilling} = Xtrue ] ; then
598        echo "< ${IGCM_debug_LenStack} : ${@}" >> ${StackFileLocation}/${StackFileName}
599      fi
600    fi
601
602    # Reset array if necessary
603    if [ ${IGCM_debug_LenStack} = 0 ]; then
604      #echo
605      #IGCM_debug_Print 3 "Clean stack array"
606      #echo
607      unset IGCM_debug_Stack
608      unset IGCM_debug_StackArgs
609      unset IGCM_debug_StackTiming
610      IGCM_debug_Stack[0]=${NULL_STR}
611      IGCM_debug_StackArgs[0]=${NULL_STR}
612      IGCM_debug_StackTiming[0]=${NULL_STR}
613    fi
614  fi
615  #IGCM_debug_CallStack
616}
617
618#D-#==================================================================
619#D-function IGCM_debug_BigBro_Initialize
620#D-* Purpose: switch rabbitMQ on
621#D-
622function IGCM_debug_BigBro_Initialize {
623  IGCM_debug_PushStack "IGCM_debug_BigBro_Initialize"
624
625  typeset postProcessingIDLength postProcessingName postProcessingDate postProcessingDimn postProcessingComp postProcessingFile
626
627# Message type standard fields:
628# https://github.com/Prodiguer/prodiguer-docs/wiki/MQ-Standard-Message-Fields
629
630# Message type dictionnary and custom fields:
631# https://github.com/Prodiguer/prodiguer-docs/wiki/Monitoring-Message-Dictionary
632
633  if [ X${BigBrother} = Xtrue ] ; then
634    # create a unique ID for this specific job
635    jobuid=$(uuidgen)
636
637    # get the assigned id by the scheduler for that job
638    IGCM_sys_getJobSchedulerID jobSchedulerID
639
640    if [ X${TaskType} = Xcomputing ]; then
641      if ( ${FirstInitialize} ) ; then
642        # RabbitMQ message code "BEGIN A SIMULATION"
643        code=0000
644        # create and persist a unique id for this simulation
645        simuid=$(uuidgen)
646        IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration simuid ${simuid}
647        # Standard fields for the first message
648        genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"activity\":\"IPSL\",\"name\":\"${config_UserChoices_JobName}\",\"experiment\":\"${config_UserChoices_ExperimentName}\",\"space\":\"${config_UserChoices_SpaceName}\",\"model\":\"${config_UserChoices_TagName}\",\"startDate\":\"${config_UserChoices_DateBegin}\",\"endDate\":\"${config_UserChoices_DateEnd}\",\"login\":\"${LOGIN}\",\"centre\":\"${CENTER}\",\"machine\":\"${MASTER}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
649        # RabbitMQ message body with specific fields associated message codes treated here
650        Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"jobSchedulerID\":\"${jobSchedulerID}\",\"jobSubmissionPath\":\"${SUBMIT_DIR}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
651        # Fill the rabbitMQ queue (the config.card in use will be sent)
652        IGCM_debug_sendAMQP activate
653      else
654        # RabbitMQ message code "A NEW COMPUTING JOB IS RUNNING PART OF A SIMULATION"
655        code=1000
656        # retrieve this simulation's unique id
657        IGCM_card_DefineVariableFromOption ${SUBMIT_DIR}/run.card Configuration simuid
658        simuid=${run_Configuration_simuid}
659        # Using standard fields for message others than the first one. Still subject to change
660        genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
661        # RabbitMQ message body with specific fields associated message codes treated here
662        Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"jobSchedulerID\":\"${jobSchedulerID}\",\"jobSubmissionPath\":\"${SUBMIT_DIR}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
663        # Fill the rabbitMQ queue
664        IGCM_debug_sendAMQP
665      fi
666
667      # NOT VERY NICE BUT ... IT WORKS
668      # Be sure that the genericSimulationID will be small from now on
669      # Using standard fields for messages others than the first one. Still subject to change
670      genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
671
672    elif [ X${TaskType} = Xpost-processing ]; then
673      # RabbitMQ message code "A NEW POST-PROCESSING JOB IS RUNNING PART OF A SIMULATION"
674      code=2000
675      # retrieve this simulation's unique id
676      IGCM_card_DefineVariableFromOption ${SUBMIT_DIR}/run.card Configuration simuid
677      simuid=${run_Configuration_simuid}
678      # Using standard fields for message others than the first one. Still subject to change
679      genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
680     
681      # Specify the post-processing task we are dealing with
682      postProcessingIDLength=$( echo "${Script_Post_Output}" | tr -d -c "\." | wc -c )
683      postProcessingName=$( echo "${Script_Post_Output}" | gawk -F. '{print $1}' )
684      postProcessingDate=$( echo "${Script_Post_Output}" | gawk -F. '{print $2}' )
685      postProcessingDimn="null"
686      postProcessingComp="null"
687      postProcessingFile="null"
688      if [ ${postProcessingIDLength} -eq 2 ] ; then
689        postProcessingDimn=$( echo "${Script_Post_Output}" | gawk -F. '{print $3}' )
690      elif [ ${postProcessingIDLength} -eq 4 ] ; then
691        postProcessingComp=$( echo "${Script_Post_Output}" | gawk -F. '{print $4}' )
692        postProcessingFile=$( echo "${Script_Post_Output}" | gawk -F. '{print $5}' )
693      fi
694
695      # RabbitMQ message body with specific fields associated message codes treated here
696      Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"jobSchedulerID\":\"${jobSchedulerID}\",\"jobSubmissionPath\":\"${SUBMIT_DIR}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\",\"postProcessingName\":\"${postProcessingName}\",\"postProcessingDate\":\"${postProcessingDate}\",\"postProcessingDimn\":\"${postProcessingDimn}\",\"postProcessingComp\":\"${postProcessingComp}\",\"postProcessingFile\":\"${postProcessingFile}\"}" )
697      # Fill the rabbitMQ queue
698      IGCM_debug_sendAMQP
699    fi
700
701    # Save project accounting details in a file
702    IGCM_sys_projectAccounting cpt_${CENTER}_${PROJECT}_$( date +"%Y%m%d_%H%M" ).dat
703
704    # And send it
705    IGCM_debug_sendAMQP_projectAccounting cpt_${CENTER}_${PROJECT}_$( date +"%Y%m%d_%H%M" ).dat
706
707    # Turn the flag on
708    ActivateBigBro=true
709  fi
710  IGCM_debug_PopStack "IGCM_debug_BigBro_Initialize"
711}
712
713#D-#==================================================================
714#D-function IGCM_debug_BigBro_Finalize
715#D-* Purpose: Finalize rabbitMQ messages exchanges
716#D-
717function IGCM_debug_BigBro_Finalize {
718  IGCM_debug_PushStack "IGCM_debug_BigBro_Finalize"
719
720  # Message type standard fields:
721  # https://github.com/Prodiguer/prodiguer-docs/wiki/MQ-Standard-Message-Fields
722
723  # Message type dictionnary and custom fields:
724  # https://github.com/Prodiguer/prodiguer-docs/wiki/Monitoring-Message-Dictionary
725
726  if ( $DEBUG_debug ) ; then
727    if [ X${ActivateBigBro} = Xtrue ] ; then
728      if [ X${TaskType} = Xcomputing ]; then
729        if ( ${simulationIsOver} ) ; then
730          # RabbitMQ message code "SIMULATION ENDS"
731          code=0100
732          FlushAMQP=true
733        elif ( ${ExitFlag} ) ; then
734          # RabbitMQ message code "EXIT THE JOBS BECAUSE ERROR(S) HAS BEEN TRIGGERED"
735          code=1999
736          FlushAMQP=true
737        else
738          # RabbitMQ message code "COMPUTING JOB ENDS"
739          code=1100
740        fi
741      elif [ X${TaskType} = Xpost-processing ]; then
742        if ( ${ExitFlag} ) ; then
743          # RabbitMQ message code "POST-PROCESSING JOB FAILS"
744          code=2999
745          FlushAMQP=true
746        else
747          # RabbitMQ message code "POST-PROCESSING JOB ENDS"
748          code=2100
749          FlushAMQP=true
750        fi
751      elif [ X${TaskType} = Xchecking ]; then
752        if ( ${ExitFlag} ) ; then
753          # RabbitMQ message code "POST-PROCESSING JOB FAILS"
754          code=3999
755          FlushAMQP=true
756        else
757          # RabbitMQ message code "POST-PROCESSING JOB ENDS"
758          code=3100
759          FlushAMQP=true
760        fi
761      fi
762      # RabbitMQ message body
763      Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
764      # Fill the rabbitMQ queue
765      IGCM_debug_sendAMQP
766    fi
767  fi
768 
769  IGCM_debug_PopStack "IGCM_debug_BigBro_Finalize"
770}
771
772#D-#==================================================================
773#D-function IGCM_debug_Exit
774#D-* Purpose: Print Call Stack and set ExitFlag to true
775#D-
776function IGCM_debug_Exit {
777  IGCM_debug_PushStack "IGCM_debug_Exit"
778  echo "IGCM_debug_Exit : " "${@}"
779  echo
780  echo "!!!!!!!!!!!!!!!!!!!!!!!!!!"
781  echo "!!   ERROR TRIGGERED    !!"
782  echo "!!   EXIT FLAG SET      !!"
783  echo "!------------------------!"
784  echo
785  IGCM_debug_CallStack
786  ExitFlag=true
787  IGCM_debug_PopStack "IGCM_debug_Exit"
788}
789
790#D-#==================================================
791#D-function IGCM_debug_Verif_Exit
792#D-* Purpose: exit with number 1 if ExitFlag is true
793#D-
794function IGCM_debug_Verif_Exit {
795  if ( ${ExitFlag} ) ; then
796    echo "IGCM_debug_Verif_Exit : Something wrong happened previously."
797    echo "IGCM_debug_Verif_Exit : ERROR and EXIT keyword will help find out where."
798    # Only computing TaskType stops the job for now.
799    if [ X${TaskType} = Xcomputing ] ; then
800      IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration PeriodState "Fatal"
801      echo "                        EXIT THE JOB."
802      echo
803      IGCM_debug_CallStack
804
805      # Mail notification
806      IGCM_sys_SendMail
807
808      # Inform the rabbitMQ queue
809      IGCM_debug_BigBro_Finalize
810
811      # And Good Bye
812      date
813      exit 1
814
815    elif [ X${TaskType} = Xpost-processing ] ; then
816      # Test if we need to stop the computing job
817      case ${postProcessingName} in
818      atlas*|monitoring*|metrics*)
819        [ ${postProcessingStopLevel} -gt 2 ] && StopAll=true ;;
820      create_*)
821        [ ${postProcessingStopLevel} -gt 1 ] && StopAll=true ;;
822      rebuild*|pack_*)
823        [ ${postProcessingStopLevel} -gt 0 ] && StopAll=true ;;
824      esac
825      # Notify the computing job that something wrong happened and stop it.
826      ( ${StopALL} ) && IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration PeriodState "Fatal.${Script_Post_Output}"
827
828      # If SpaceName is PROD we stop when post_processing failed
829      if [ X${config_UserChoices_SpaceName} = XPROD ] ; then
830        echo "                        EXIT THE POST-PROCESSING JOB."
831        echo
832        IGCM_debug_CallStack
833
834        # Inform the rabbitMQ queue
835        IGCM_debug_BigBro_Finalize
836
837        # And Good Bye
838        date
839        exit 1
840      else
841        echo "In config.card the variable SpaceName is not in PROD"
842        echo "              SO WE DO NOT EXIT THE JOB."
843        echo
844        date
845      fi
846    elif [ X${TaskType} = Xchecking ] ; then
847      echo "Nothing will happen for now"
848    fi
849  fi
850}
851
852#D-#==================================================================
853#D-function IGCM_debug_Print
854#D-* Purpose: Print arguments according to a level of verbosity.
855#D-
856function IGCM_debug_Print
857{
858  typeset level=$1
859  shift
860
861  if [ X"${1}" = X"-e" ]; then
862    typeset cmd_echo="echo -e"
863    shift
864  else
865    typeset cmd_echo="echo"
866  fi
867
868  if [ ${level} -le ${Verbosity} ] ; then
869    typeset i
870    case "${level}" in
871    1) for i in "$@" ; do
872      ${cmd_echo} $(date +"%Y-%m-%d %T") "--Debug1-->" ${i}
873      done ;;
874    2) for i in "$@" ; do
875      ${cmd_echo} $(date +"%Y-%m-%d %T") "--------Debug2-->" ${i}
876      done ;;
877    3) for i in "$@" ; do
878      ${cmd_echo} $(date +"%Y-%m-%d %T") "--------------Debug3-->" ${i}
879      done ;;
880    esac
881  fi
882}
883
884#D-#==================================================================
885#D-function IGCM_debug_PrintVariables
886#D-* Purpose: Print arguments when match a pattern
887#D-           according to a level of verbosity.
888function IGCM_debug_PrintVariables
889{
890  typeset level=$1
891  shift
892
893  list=$( set | grep ^$1 | sed -e "s/'//g" )
894
895  if [ "X${list}" != X ]  ; then
896    IGCM_debug_Print ${level} ${list}
897  fi
898}
899
900#D-#==================================================================
901#D-function IGCM_debug_PrintInfosActions
902#D-* Purpose: Print information related to instrumentation
903function IGCM_debug_PrintInfosActions
904{
905  typeset actionType=$1
906  typeset entitySize=$2
907  typeset start_ms=$3
908  typeset end_ms=$4
909
910  typeset dest=$5
911  typeset source=$6
912
913  typeset diff_ms entitySizeKo entitySizeMo flux_Ko_ms flux_Ko_s flux_Mo_s
914  typeset dirFrom dirTo
915
916  diff_ms=$(( $end_ms - $start_ms ))
917  # echo "diff_ms=$diff_ms"
918
919  entitySizeKo=$( echo ${entitySize} | gawk -F"|" '{print $1}' )
920  # echo "entitySizeKo=$entitySizeKo"
921  entitySizeMo=$( echo ${entitySize} | gawk -F"|" '{print $2}' )
922
923  # flux en Ko / ms
924  flux_Ko_ms=$( echo "scale=6;${entitySizeKo}/${diff_ms}" | bc )
925  # echo "flux_Ko_ms=$flux_Ko_ms"
926
927  # flux en Ko / s
928  flux_Ko_s=$(( $flux_Ko_ms * 1000 ))
929  # echo "flux_Ko_s=$flux_Ko_s"
930
931  # flux en Mo / s
932  flux_Mo_s=$( echo "scale=6;${flux_Ko_s}/1024" | bc )
933  # echo "flux_Mo_s=$flux_Mo_s"
934
935  if [ -d $dest ] ; then
936    dirTo=$( readlink -f ${dest} )
937  else
938    dirTo=$( readlink -f $( dirname ${dest} ) )
939  fi
940
941  if [ -d $source ] ; then
942    dirFrom=$( readlink -f ${source} )
943  else
944    dirFrom=$( readlink -f $( dirname ${source} ) )
945  fi
946
947  instrumentationContent=$( echo "\"actionName\":\"${actionType}\",\"size_Mo\":\"${entitySizeMo}\",\"duration_ms\":\"${diff_ms}\",\"throughput_Mo_s\":\"${flux_Mo_s}\",\"dirFrom\":\"${dirFrom}\",\"dirTo\":\"${dirTo}\"" )
948
949  if [ X${ActivateStackFilling} = Xtrue ] ; then
950    echo "{${instrumentationContent}}" >> ${StackFileLocation}/${StackFileName}
951  fi
952
953  # Inform the rabbitMQ queue
954  if [ X${ActivateBigBro} = Xtrue ] ; then
955    # RabbitMQ message body
956    Body=$( echo "{${genericSimulationID},\"msgCode\":\"7000\",\"msgUID\":\"$(uuidgen)\",${instrumentationContent},\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
957    # Fill the rabbitMQ queue
958    IGCM_debug_sendAMQP
959  fi
960}
961
962#D-#==================================================================
963#D-function IGCM_debug_Check
964#D- * Purpose: Check the present file by comparison with a reference file
965function IGCM_debug_Check
966{
967  #---------------------
968  if [ ! -n "${libIGCM}" ] ; then
969    echo "Check libIGCM_debug ..........................................[ FAILED ]"
970    echo "--Error--> libIGCM variable is not defined"
971    exit 2
972  fi
973
974  #---------------------
975  if [ ! -n "${Verbosity}" ] ; then
976    echo "Check libIGCM_debug ..........................................[ FAILED ]"
977    echo "--Error--> Verbosity variable is not defined"
978    exit 3
979  fi
980
981  #---------------------
982  # Need to remove timestamps here
983  diff ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref <(${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh | sed -e "s:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]\:[0-9][0-9]\:[0-9][0-9] ::g") > /dev/null 2>&1
984  status=$?
985
986  if [ ${status} -eq 0 ] ; then
987    echo "Check libIGCM_debug ..............................................[ OK ]"
988  else
989    echo "Check libIGCM_debug ..........................................[ FAILED ]"
990    echo "--Error--> Execution of ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh"
991    echo "           has produced the file IGCM_debug_Test.ref.failed"
992    echo "           Please analyse differences with the reference file by typing:"
993    echo "           diff IGCM_debug_Test.ref.failed ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref"
994    echo "           Report errors to the author: Patrick.Brockmann@cea.fr"
995    diff ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref <(${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh | sed -e "s:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]\:[0-9][0-9]\:[0-9][0-9] ::g")
996    exit 4
997  fi
998  #---------------------
999}
Note: See TracBrowser for help on using the repository browser.