source: trunk/libIGCM/libIGCM_debug/libIGCM_debug.ksh @ 1282

Last change on this file since 1282 was 1282, checked in by sdipsl, 8 years ago
  • Add a postProcessingStopLevel mechanism. see #276
  • Property licence set to
    The following licence information concerns ONLY the libIGCM tools
    ==================================================================

    Copyright © Centre National de la Recherche Scientifique CNRS
    Commissariat à l'Énergie Atomique CEA

    libIGCM : Library for Portable Models Computation of IGCM Group.

    IGCM Group is the french IPSL Global Climate Model Group.

    This library is a set of shell scripts and functions whose purpose is
    the management of the initialization, the launch, the transfer of
    output files, the post-processing and the monitoring of datas produce
    by any numerical program on any plateforme.

    This software is governed by the CeCILL license under French law and
    abiding by the rules of distribution of free software. You can use,
    modify and/ or redistribute the software under the terms of the CeCILL
    license as circulated by CEA, CNRS and INRIA at the following URL
    "http://www.cecill.info".

    As a counterpart to the access to the source code and rights to copy,
    modify and redistribute granted by the license, users are provided only
    with a limited warranty and the software's author, the holder of the
    economic rights, and the successive licensors have only limited
    liability.

    In this respect, the user's attention is drawn to the risks associated
    with loading, using, modifying and/or developing or reproducing the
    software by the user in light of its specific status of free software,
    that may mean that it is complicated to manipulate, and that also
    therefore means that it is reserved for developers and experienced
    professionals having in-depth computer knowledge. Users are therefore
    encouraged to load and test the software's suitability as regards their
    requirements in conditions enabling the security of their systems and/or
    data to be ensured and, more generally, to use and operate it in the
    same conditions as regards security.

    The fact that you are presently reading this means that you have had
    knowledge of the CeCILL license and that you accept its terms.
  • Property svn:keywords set to Revision Author Date
File size: 36.3 KB
Line 
1#!/bin/ksh
2
3#**************************************************************
4# Author: Patrick Brockmann, Martial Mancip
5# Contact: Patrick.Brockmann__at__cea.fr Martial.Mancip__at__ipsl.jussieu.fr
6# $Revision::                                          $ Revision of last commit
7# $Author::                                            $ Author of last commit
8# $Date::                                              $ Date of last commit
9# IPSL (2006)
10#  This software is governed by the CeCILL licence see libIGCM/libIGCM_CeCILL.LIC
11#
12#**************************************************************
13
14#==================================================
15# The documentation of this file can be automatically generated
16# if you use the prefix #D- for comments to be extracted.
17# Extract with command: cat lib* | grep "^#D-" | cut -c "4-"
18#==================================================
19
20#==================================================
21# Add high level verbosity
22typeset -i Verbosity=${Verbosity:=3}
23
24#==================================================
25# DEBUG_debug
26# Add low level verbosity
27DEBUG_debug=${DEBUG_debug:=false}
28
29#==================================================
30# GENERATE RANDOM ERROR ; only apply if ( ${DEBUG_debug} )
31typeset -r RandomError=false
32
33#==================================================
34# NULL_STR
35# Default null string
36typeset -r NULL_STR="_0_"
37
38#==================================================
39# libIGCM_CurrentTag
40# Current libIGCM tag, check compatibilty with *.card
41typeset -r libIGCMVersion="2.7"
42
43#==================================================
44# Exit Flag (internal debug)
45# When true, end the master loop AFTER SAVES FILES
46ExitFlag=false
47
48#==================================================
49# When we start to run the simulation is not finished
50simulationIsOver=false
51
52#==================================================
53# When we start to run we dont flush AMQP messages
54FlushAMQP=false
55
56#==================================================
57# Declare a stack of functions calls
58unset IGCM_debug_Stack
59unset IGCM_debug_StackArgs
60unset IGCM_debug_StackTiming
61IGCM_debug_Stack[0]=${NULL_STR}
62IGCM_debug_StackArgs[0]=${NULL_STR}
63IGCM_debug_StackTiming[0]=${NULL_STR}
64IGCM_debug_LenStack=0
65
66#D-#==================================================================
67#D-function IGCM_debug_getDate_ms
68#D- * Purpose: Give number of milliseconds since 01-jan-1970
69function IGCM_debug_getDate_ms
70{
71  typeset nanosecs ms
72  # nano secondes since 01-jan-1970
73  nanosecs=$( date +%s%N )
74
75  # truncate the last 6 digits to get milliseconds since 01-jan-1970
76  ms=${nanosecs:0:${#nanosecs}-6}
77
78  echo "$ms"
79}
80
81#D-#==================================================================
82#D-function IGCM_debug_sizeOfTabContent
83#D- * Purpose: Give sumed size of a list of files
84#D- * Usage: IGCM_debug_sizeOfTabContent entityList destination
85#D- *        where entityList is a list of files or directory
86#D- *        where dest is either a directory or a file name
87function IGCM_debug_sizeOfTabContent
88{
89  typeset entityListe destination iEntity sizeKo sumSizeKo sumSizeMo
90
91  eval set +A entityListe \${${1}}
92  destination=${2}
93  sumSizeKo=0
94
95  # Here we will try to compute size (file or directory size) from local path and not from archive.
96  for ((i = 0; i < ${#entityListe[*]}; i += 1)) ; do
97    if [ -f ${entityListe[$i]} ] ; then
98      # One file or a bunch of files has been copied without renaming from a visible filesystem
99      iEntity=${entityListe[$i]}
100    elif [ -f ${entityListe[$i]##/*/} ] ; then
101      # One file or a bunch of files has been copied without renaming from an non visible filesystem
102      # remove path /home/login/../ from entityListe elements
103      iEntity=${entityListe[$i]##/*/}
104    elif [ -f ${destination} ] ; then
105      # a file has been copied and renamed
106      iEntity=${destination}
107    elif [ -f ${destination}/${entityListe[$i]##/*/} ] ; then
108      # a copy in a directory but not in ${PWD}
109      iEntity=${destination}/${entityListe[$i]##/*/}
110    elif [ -d ${entityListe[$i]} ] ; then
111      # a directory has been copied from a non remote place
112      iEntity=${entityListe[$i]}
113    elif [ -d ${destination}/${entityListe[$i]##/*/} ] ; then
114      # a directory has been copied from a remote archive and not renamed
115      iEntity=${destination}/${entityListe[$i]##/*/}
116    elif [ -d ${destination} ] ; then
117      # a directory has been copied from a remote archive and renamed
118      iEntity=${destination}
119    fi
120    sizeKo=$( du --apparent-size -skL ${iEntity} | gawk '{print $1}' )
121    sumSizeKo=$(( $sumSizeKo + $sizeKo ))
122  done
123  sumSizeMo=$( echo "scale=6;${sumSizeKo}/1024" | bc )
124  echo "${sumSizeKo}|${sumSizeMo}"
125}
126
127#D-#==================================================================
128#D-function IGCM_debug_send_AMQP_msg__MAILTUNNEL
129#D- * Purpose: Take over AMQP C client using mail as a message recipient
130#D- * One argument : base64 encoded message
131#D- * Attach encoded config.card when starting the simulation
132
133function IGCM_debug_send_AMQP_msg__MAILTUNNEL {
134
135  typeset b64_encoded_msg mail_recipient
136  typeset buffer send_messages mail_frequency
137  typeset last_mail_date__file
138  typeset secondsBetweenRefAndLastMail secondsSinceLastMail
139
140  b64_encoded_msg=$1
141
142  mail_recipient="superviseur@ipsl.jussieu.fr"
143  send_messages=0
144  mail_frequency=3600 # in seconds
145  # use to keep track when was last mail sent (maybe to be replaced with global variable)
146  last_mail_date__file=${R_BUF}/.stamp.${config_UserChoices_TagName}.${config_UserChoices_JobName}
147  # use to accumulate messages before sending them
148  buffer=${R_BUF}/.buffer.${config_UserChoices_TagName}.${config_UserChoices_JobName}
149
150  # init
151  if [ ! -f "${buffer}" ]; then
152    touch ${buffer}
153  fi
154
155  if [ ! -f "${last_mail_date__file}" ]; then
156    touch ${last_mail_date__file}
157  else
158    # compute last time the file was changed (in seconds)
159    secondsBetweenRefAndLastMail=$(stat -c %Y ${last_mail_date__file})
160    status=$?
161    #
162    # Only execute this block when the stat command succeeded.
163    # The stat command might fail in some circumstance but we consider it is ok to continue anyway.
164    if [ ${status} -eq 0 ] ; then
165      secondsSinceLastMail=$(( $(date +%s) - ${secondsBetweenRefAndLastMail} ))
166      # send message when exceeding threshold
167      [ ${secondsSinceLastMail} -gt ${mail_frequency} ] && send_messages=1
168    fi
169  fi
170
171  # queue messages in the buffer
172  echo ${b64_encoded_msg} >> ${buffer}
173
174  # send mail
175
176  if [ X${initBigBro} = Xtrue ] ; then
177    #echo $(date +"%Y-%m-%dT%H:%M:%S.%N%z") > ${SUBMIT_DIR}/mail.txt
178    mailx -s "[TEMPORARY AMQP CHANNEL]" -a ${SUBMIT_DIR}/config.card.base64 ${mail_recipient} < ${buffer} # send buffer
179    rm -f $buffer ; touch ${buffer}                                    # clear buffer
180    touch ${last_mail_date__file}                                      # memorize last mail date
181    initBigBro=false
182  elif [ ${send_messages} -eq 1 ] ; then
183    #echo $(date +"%Y-%m-%dT%H:%M:%S.%N%z") >> ${SUBMIT_DIR}/mail.txt
184    mailx -s "[TEMPORARY AMQP CHANNEL]" ${mail_recipient}  < ${buffer} # send buffer
185    rm -f ${buffer} ; touch ${buffer}                                  # flush the buffer
186    touch ${last_mail_date__file}                                      # memorize last mail date
187  fi
188
189  if ( ${FlushAMQP} ) ; then
190    mailx -s "[TEMPORARY AMQP CHANNEL]" ${mail_recipient}  < ${buffer} # send buffer
191    rm -f ${buffer}                                                    # cleaning behind us
192    rm -f ${last_mail_date__file}                                      # cleaning behind us
193  fi
194
195  # Allways all good for now.
196  return 0
197}
198
199#D-#==================================================================
200#D-function IGCM_debug_sendAMQP_Metrics
201#D- * Purpose: Take over AMQP C client using mail as a message recipient
202#D- * Two arguments : - Directory where metrics.json files can be found
203#D- *                 - Metrics Group Name. metrics will be added to this group
204#D- * Attach encoded metrics.json files.
205
206function IGCM_debug_sendAMQP_Metrics {
207
208  typeset mail_recipient encodedBody
209  if [ X${ActivateBigBro} = Xtrue ] ; then
210    mail_recipient="superviseur@ipsl.jussieu.fr"
211    # Metrics tag on server side
212    code=7100
213    # Usual AMQP message to route messages on server side
214    encodedBody=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"metricsGroupName\":\"${2}\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" |  base64 -w 0 )
215    # send mail
216    attachmentsOptions=""
217    for metricsFile in $( ls $1/*json ) ; do
218      attachmentsOptions="-a ${metricsFile} ${attachmentsOptions}"
219    done
220    IGCM_debug_Print 2 "IGCM_debug_sendAMQP_Metrics "
221    echo ${encodedBody}|mailx -s "[TEMPORARY AMQP CHANNEL]" ${attachmentsOptions} ${mail_recipient}
222  fi
223
224  # Allways all good for now.
225  return 0
226}
227
228#D-#==================================================================
229#D-function IGCM_debug_SendAMQP
230#D- * Purpose: Send body; encoded body and config.card to rabbitMQ
231function IGCM_debug_sendAMQP {
232
233  typeset decal first additionnalOption encodedBody
234
235  # Encode message Body
236  encodedBody=$( echo "${Body}" | base64 -w 0 )
237
238  # Send config.card ?
239  if [ X${1} = Xactivate ] ; then
240    # Encode config.card
241    cat ${SUBMIT_DIR}/config.card | base64 -w 0 > ${SUBMIT_DIR}/config.card.base64
242    # Prepare additionnal option
243    additionnalOption="-f ${SUBMIT_DIR}/config.card.base64"
244    #
245    initBigBro=true
246  else
247    additionnalOption=
248    #
249    initBigBro=false
250  fi
251
252  # Only cosmetics : stack file
253  if [ X${ActivateStackFilling} = Xtrue ] ; then
254    decal=0
255    while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
256      printf ' ' >> ${StackFileLocation}/${StackFileName}
257      (( decal = decal + 1 ))
258    done
259    # Log to stack file using human readable format
260    echo "${Body}" >> ${StackFileLocation}/${StackFileName}
261  fi
262
263  # Log separately encoded AMQP message command for reuse in a mock up
264  #echo sendAMQPMsg -h localhost -p 5672 ${additionnalOption} -b ${encodedBody} >> ${RUN_DIR_PATH}/send.AMQP.${config_UserChoices_JobName}.${config_UserChoices_ExperimentName}.${config_UserChoices_SpaceName}.${config_UserChoices_TagName}.${CumulPeriod}.history.txt
265
266  # Send the message
267  if [ X${BigBrotherChannel} = XMAIL ] ; then
268    IGCM_debug_send_AMQP_msg__MAILTUNNEL "${encodedBody}"
269    status=$?
270  else
271    sendAMQPMsg -h localhost -p 5672 ${additionnalOption} -b ${encodedBody}
272    status=$?
273  fi
274
275  if [ ${status} -gt 0 ] ; then
276    IGCM_debug_Print 2 "IGCM_debug_Push/PopStack/ActivateBigBro : command sendAMQPMsg failed error code ${status}"
277    echo sendAMQPMsg -h localhost -p 5672 -b "${Body}"
278    exit 1
279  fi
280}
281
282#D-#==================================================================
283#D-function IGCM_debug_CallStack
284#D-* Purpose: Print the call stack tree from the oldest to the youngest (opposite of the display standard)
285#D-
286function IGCM_debug_CallStack {
287  if ( $DEBUG_debug ) ; then
288    # Cosmetics
289    typeset i decal
290    i=0
291    until [ $i -eq ${IGCM_debug_LenStack} ]; do
292      decal=0
293      until [ $decal -eq ${i} ]; do
294        printf -- ' '
295        (( decal = decal + 1 ))
296      done
297      echo "$i - ${IGCM_debug_Stack[$(( $IGCM_debug_LenStack-$i-1 ))]}" "(${IGCM_debug_StackArgs[$(( $IGCM_debug_LenStack-$i-1 ))]})"
298      ((i = i + 1))
299    done
300  fi
301}
302
303#D-#==================================================================
304#D-function IGCM_debug_PushStack
305#D-* Purpose: Push a function name in the stack
306#D-
307function IGCM_debug_PushStack {
308  if ( $DEBUG_debug ) ; then
309    typeset decal inputs startTime_ms
310
311    # Only cosmetics : stack file
312    if [ X${ActivateStackFilling} = Xtrue ] ; then
313      echo >> ${StackFileLocation}/${StackFileName}
314      decal=0
315      while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
316        printf ' ' >> ${StackFileLocation}/${StackFileName}
317        (( decal = decal + 1 ))
318      done
319
320      # Fill the stack file
321      echo "> ${IGCM_debug_LenStack} : ${@}" >> ${StackFileLocation}/${StackFileName}
322    fi
323
324    # Save input list in an indexed array
325    INPUTS=( $@ )
326
327    # Get timing information
328    startTime_ms=$( IGCM_debug_getDate_ms )
329
330    # We add function call name on beginning of the stack
331    set +A IGCM_debug_Stack -- ${1} ${IGCM_debug_Stack[*]}
332
333    # Save timing in milliseconds in an indexed array
334    set +A IGCM_debug_StackTiming -- ${startTime_ms} ${IGCM_debug_StackTiming[*]}
335
336    # We include the "null" Args in the beginning of the StackArgs
337    set +A IGCM_debug_StackArgs ${NULL_STR} ${IGCM_debug_StackArgs[*]}
338
339    # Then, we shift StackArgs tabular
340    # Replacing blank separated list by comma separated list of quoted elements (except the first and last element)
341    if [ $# -gt 1 ]; then
342      IGCM_debug_StackArgs[0]=$(echo ${INPUTS[*]:1} | sed -e "s/\ /\",\"/g" )
343    fi
344
345    # Increment LenStack
346    (( IGCM_debug_LenStack = IGCM_debug_LenStack + 1 ))
347
348    #IGCM_debug_CallStack
349  fi
350}
351
352#D-#==================================================================
353#D-function IGCM_debug_PopStack
354#D-* Purpose: Pop a function name in the stack
355#D-
356function IGCM_debug_PopStack {
357  if ( $DEBUG_debug ) ; then
358    typeset i decal command arguments startTime_ms endTime_ms
359    typeset instrumentation dest prefix
360    # they are not typeset because they are send "by adress" to son functions
361    # we unset them to avoid "memory effect"
362    unset fileList source
363
364    # INTRODUCE SIMPLE ERROR GENERATOR TO TEST SUPERVISOR
365    # PROBABILITY ERROR IS 0.0001 PER COMMAND OR FUNCTION CALL
366    # THERE ARE ~500 COMMAND OR FUNCTION CALL PER PERIOD
367    # ONLY WHEN TaskType is "computing".
368    if [ X${ActivateBigBro} = Xtrue ] ; then
369      if [ X${TaskType} = Xcomputing ]; then
370        if ( ${RandomError} ) ; then
371          if [ $((RANDOM%10000)) -le 10 ] ; then
372            IGCM_debug_Print 1 "Random error has been triggered"
373            if [ X${ActivateStackFilling} = Xtrue ] ; then
374              echo "RANDOM ERROR" >> ${StackFileLocation}/${StackFileName}
375            fi
376            ExitFlag=true
377          fi
378        fi
379      fi
380    fi
381
382    if [ "${IGCM_debug_Stack[0]}" = "${1}" ]; then
383      # Everything is cool
384
385      # Get timing information
386      endTime_ms=$( IGCM_debug_getDate_ms )
387
388      # Save Stack information before poping the stack
389      command=${IGCM_debug_Stack[0]}
390
391      # Go from comma separated list of quoted elements (except the first and the last element)
392      # to unquoted space separated elements in an array
393      set -A arguments -- $( echo ${IGCM_debug_StackArgs[0]} | sed -e "s/\",\"/\ /g" )
394
395      # Save Stack information before poping the stack
396      startTime_ms=${IGCM_debug_StackTiming[0]}
397
398      # Pop the stack
399      (( IGCM_debug_LenStack = IGCM_debug_LenStack - 1 ))
400      set -A IGCM_debug_Stack -- ${IGCM_debug_Stack[*]:1}
401      set -A IGCM_debug_StackArgs -- ${IGCM_debug_StackArgs[*]:1}
402      set -A IGCM_debug_StackTiming -- ${IGCM_debug_StackTiming[*]:1}
403    else
404      echo 'IGCM_debug_Exit : stack is corrupted ! LenStack =' ${IGCM_debug_LenStack}
405      IGCM_debug_Exit $@
406    fi
407
408    # Special actions depending on command to prepare IGCM_debug_PrintInfosActions call
409    # We are interested in:
410    #  0. Which command performs the work
411    #  1. Size of entity we are working with
412    #  2. Where are we reading
413    #  3. Where are we writing
414    #  4. How long it took
415
416    instrumentation=false
417
418    case ${command} in
419    # Classical copy (only files are given to IGCM_sys_Cp as options)
420    IGCM_sys_Cp)
421      instrumentation=true
422      # All but the latest
423      fileList=${arguments[*]:0:${#arguments[*]}-1}
424      # just need the first file to get the directory
425      source=${arguments[0]}
426      # Nothing but the latest
427      dest=${arguments[${#arguments[*]}-1]}
428      # Size of file whose name are stored in a list
429      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
430      ;;
431
432    # Copy from archive machine or from buffer
433    IGCM_sys_Get|IGCM_sys_GetBuffer)
434      instrumentation=true
435      if [ ${#arguments[*]} -eq 2 ] ; then
436        source=${arguments[0]}
437        dest=${arguments[1]}
438        # Size of file whose name are stored in a variable
439        entitySize=$( IGCM_debug_sizeOfTabContent source ${dest} )
440      elif ( [ ${#arguments[*]} -eq 3 ] && [ ${arguments[0]} = '/l' ] ) ; then
441        # IGCM_sys_Get /l liste_file[*] /ccc/scratch/cont003/dsm/p86denv/RUN_DIR/985998_14754/
442        # Keep the array name hosting the all list
443        eval set +A fileList \${${arguments[1]}}
444        # just need the first file to get the directory
445        source=${fileList[0]}
446        dest=${arguments[2]}
447        # Size of file whose name are stored in a list
448        entitySize=$( IGCM_debug_sizeOfTabContent fileList[*] ${dest} )
449      elif [ [ ${#arguments[*]} -ge 3 ] ; then
450       # All but the latest
451        fileList=${arguments[*]:0:${#arguments[*]}-1}
452        # just need the first file to get the directory
453        source=${arguments[0]}
454        # Nothing but the latest
455        dest=${arguments[${#arguments[*]}-1]}
456        # Size of file whose name are stored in a list
457        entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
458      fi
459      ;;
460
461    # Copy from compute node or copy to archive/buffer
462    IGCM_sys_Get_Master|IGCM_sys_Get_Dir|IGCM_sys_Put_Out|IGCM_sys_PutBuffer_Out)
463      instrumentation=true
464      source=${arguments[0]}
465      dest=${arguments[1]}
466      # Size of file whose name are stored in a variable
467      entitySize=$( IGCM_debug_sizeOfTabContent source ${dest} )
468      ;;
469
470    # Rebuild command
471    IGCM_sys_rebuild|IGCM_sys_rebuild_station)
472      instrumentation=true
473      # All but the first
474      fileList=${arguments[*]:1:${#arguments[*]}-1}
475      # just need a file to get the directory
476      source=${arguments[1]}
477      # Nothing but the first
478      dest=${arguments[0]}
479      # Size of file whose name are stored in a list
480      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
481      ;;
482
483    # NCO commands
484    IGCM_sys_ncrcat|IGCM_sys_ncecat|IGCM_sys_ncra|IGCM_sys_ncks|IGCM_sys_cdo)
485      # Example of what we want to catch : only filenames in those command lines
486      # IGCM_sys_ncrcat -O -v ${list_var_final_ncrcat} ${OUT_SE[*]} ${RESULT_SE}
487      # IGCM_sys_ncrcat --hst -v ${liste_coord}${var} ${file1} ${liste_file_tmp[*]} ${file_out}
488      # IGCM_sys_ncrcat -p ${dir} ${liste_file_tmp} --output ${output}
489      # IGCM_sys_ncrcat -x -v ${list_var} -p ${dir} ${liste_file_tmp} --output ${output}
490      instrumentation=true
491      keepGoing=true
492      prefix=.
493      i=0
494      while ( ${keepGoing} ) ; do
495        # the last one is not interesting
496        if [ ${i} -eq ${#arguments[*]}-1 ] ; then
497          keepGoing=false
498        # look after "-p" option. Path prefix is the following arguments
499        elif [ ${arguments[${i}]} = "-p" ] ; then
500          ((i = i + 1))
501          prefix=${arguments[${i}]}
502          ((i = i + 1))
503        elif [ ${i} -eq ${#arguments[*]}-1 ] ; then
504          keepGoing=false
505        # looking for files
506        elif [ -f ${prefix}/${arguments[${i}]} ] ; then
507          fileList="${fileList} ${prefix}/${arguments[${i}]}"
508          ((i = i + 1))
509        # other options are not interesting
510        else
511          ((i = i + 1))
512        fi
513      done
514
515      # i value is at least 1
516      # just need one file to get the directory
517      source=$( echo ${fileList} | gawk '{print $1}' )
518      # Nothing but the latest
519      dest=${arguments[${#arguments[*]}-1]}
520      # Size of file whose name are stored in a list
521      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
522      ;;
523    esac
524
525    # Print information related to instrumentation
526    ( ${instrumentation} ) && IGCM_debug_PrintInfosActions ${command} ${entitySize} ${startTime_ms} ${endTime_ms} ${dest} ${source}
527
528    # Only cosmetics : stack file
529    if [ X${ActivateStackFilling} = Xtrue ] ; then
530      decal=0
531      while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
532        printf ' ' >> ${StackFileLocation}/${StackFileName}
533        (( decal = decal + 1 ))
534      done
535    fi
536
537    if ( ${ExitFlag} ) ; then
538      # Inform the stack file
539      if [ X${ActivateStackFilling} = Xtrue ] ; then
540        echo '!!! ExitFlag has been activated !!!' >> ${StackFileLocation}/${StackFileName}
541      fi
542
543      # Unplugged message 4900 handling for now. To ease downstream treatment.
544      if [ X${ActivateBigBro} = Xtrue ] ; then
545        if [ X${TaskType} = Xcomputing ]; then
546          # RabbitMQ message code "COMPUTING JOBs COMMAND FAILURE"
547          code=1900
548        elif [ X${TaskType} = Xpost-processing ]; then
549          # RabbitMQ message code "POST-PROCESSING JOBs COMMAND FAILURE"
550          code=2900
551        elif [ X${TaskType} = Xchecking ]; then
552          # RabbitMQ message code "POST-PROCESSING FROM CHECKER JOBs COMMAND FAILURE"
553          code=3900
554        fi
555        # RabbitMQ message body
556        Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"command\":\"${command}\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
557
558        # Fill the rabbitMQ queue
559        IGCM_debug_sendAMQP
560      fi
561    else
562      # Inform the stack file
563      if [ X${ActivateStackFilling} = Xtrue ] ; then
564        echo "< ${IGCM_debug_LenStack} : ${@}" >> ${StackFileLocation}/${StackFileName}
565      fi
566    fi
567
568    # Reset array if necessary
569    if [ ${IGCM_debug_LenStack} = 0 ]; then
570      #echo
571      #IGCM_debug_Print 3 "Clean stack array"
572      #echo
573      unset IGCM_debug_Stack
574      unset IGCM_debug_StackArgs
575      unset IGCM_debug_StackTiming
576      IGCM_debug_Stack[0]=${NULL_STR}
577      IGCM_debug_StackArgs[0]=${NULL_STR}
578      IGCM_debug_StackTiming[0]=${NULL_STR}
579    fi
580  fi
581  #IGCM_debug_CallStack
582}
583
584#D-#==================================================================
585#D-function IGCM_debug_BigBro_Initialize
586#D-* Purpose: switch rabbitMQ on
587#D-
588function IGCM_debug_BigBro_Initialize {
589  IGCM_debug_PushStack "IGCM_debug_BigBro_Initialize"
590
591  typeset postProcessingIDLength postProcessingName postProcessingDate postProcessingDimn postProcessingComp postProcessingFile
592
593# Message type standard fields:
594# https://github.com/Prodiguer/prodiguer-docs/wiki/MQ-Standard-Message-Fields
595
596# Message type dictionnary and custom fields:
597# https://github.com/Prodiguer/prodiguer-docs/wiki/Monitoring-Message-Dictionary
598
599  if [ X${BigBrother} = Xtrue ] ; then
600    # create a unique ID for this specific job
601    jobuid=$(uuidgen)
602
603    # get the assigned id by the scheduler for that job
604    IGCM_sys_getJobSchedulerID jobSchedulerID
605
606    if [ X${TaskType} = Xcomputing ]; then
607      if ( ${FirstInitialize} ) ; then
608        # RabbitMQ message code "BEGIN A SIMULATION"
609        code=0000
610        # create and persist a unique id for this simulation
611        simuid=$(uuidgen)
612        IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration simuid ${simuid}
613        # Standard fields for the first message
614        genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"activity\":\"IPSL\",\"name\":\"${config_UserChoices_JobName}\",\"experiment\":\"${config_UserChoices_ExperimentName}\",\"space\":\"${config_UserChoices_SpaceName}\",\"model\":\"${config_UserChoices_TagName}\",\"startDate\":\"${config_UserChoices_DateBegin}\",\"endDate\":\"${config_UserChoices_DateEnd}\",\"login\":\"${LOGIN}\",\"centre\":\"${CENTER}\",\"machine\":\"${MASTER}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
615        # RabbitMQ message body with specific fields associated message codes treated here
616        Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"jobSchedulerID\":\"${jobSchedulerID}\",\"jobSubmissionPath\":\"${SUBMIT_DIR}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
617        # Fill the rabbitMQ queue (the config.card in use will be sent)
618        IGCM_debug_sendAMQP activate
619      else
620        # RabbitMQ message code "A NEW COMPUTING JOB IS RUNNING PART OF A SIMULATION"
621        code=1000
622        # retrieve this simulation's unique id
623        IGCM_card_DefineVariableFromOption ${SUBMIT_DIR}/run.card Configuration simuid
624        simuid=${run_Configuration_simuid}
625        # Using standard fields for message others than the first one. Still subject to change
626        genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
627        # RabbitMQ message body with specific fields associated message codes treated here
628        Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"jobSchedulerID\":\"${jobSchedulerID}\",\"jobSubmissionPath\":\"${SUBMIT_DIR}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
629        # Fill the rabbitMQ queue
630        IGCM_debug_sendAMQP
631      fi
632
633      # NOT VERY NICE BUT ... IT WORKS
634      # Be sure that the genericSimulationID will be small from now on
635      # Using standard fields for messages others than the first one. Still subject to change
636      genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
637
638    elif [ X${TaskType} = Xpost-processing ]; then
639      # RabbitMQ message code "A NEW POST-PROCESSING JOB IS RUNNING PART OF A SIMULATION"
640      code=2000
641      # retrieve this simulation's unique id
642      IGCM_card_DefineVariableFromOption ${SUBMIT_DIR}/run.card Configuration simuid
643      simuid=${run_Configuration_simuid}
644      # Using standard fields for message others than the first one. Still subject to change
645      genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
646     
647      # Specify the post-processing task we are dealing with
648      postProcessingIDLength=$( echo "${Script_Post_Output}" | tr -d -c "\." | wc -c )
649      postProcessingName=$( echo "${Script_Post_Output}" | gawk -F. '{print $1}' )
650      postProcessingDate=$( echo "${Script_Post_Output}" | gawk -F. '{print $2}' )
651      postProcessingDimn="null"
652      postProcessingComp="null"
653      postProcessingFile="null"
654      if [ ${postProcessingIDLength} -eq 2 ] ; then
655        postProcessingDimn=$( echo "${Script_Post_Output}" | gawk -F. '{print $3}' )
656      elif [ ${postProcessingIDLength} -eq 4 ] ; then
657        postProcessingComp=$( echo "${Script_Post_Output}" | gawk -F. '{print $4}' )
658        postProcessingFile=$( echo "${Script_Post_Output}" | gawk -F. '{print $5}' )
659      fi
660
661      # RabbitMQ message body with specific fields associated message codes treated here
662      Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"jobSchedulerID\":\"${jobSchedulerID}\",\"jobSubmissionPath\":\"${SUBMIT_DIR}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\",\"postProcessingName\":\"${postProcessingName}\",\"postProcessingDate\":\"${postProcessingDate}\",\"postProcessingDimn\":\"${postProcessingDimn}\",\"postProcessingComp\":\"${postProcessingComp}\",\"postProcessingFile\":\"${postProcessingFile}\"}" )
663      # Fill the rabbitMQ queue
664      IGCM_debug_sendAMQP
665    fi
666    # Turn the flag on
667    ActivateBigBro=true
668  fi
669  IGCM_debug_PopStack "IGCM_debug_BigBro_Initialize"
670}
671
672#D-#==================================================================
673#D-function IGCM_debug_BigBro_Finalize
674#D-* Purpose: Finalize rabbitMQ messages exchanges
675#D-
676function IGCM_debug_BigBro_Finalize {
677  IGCM_debug_PushStack "IGCM_debug_BigBro_Finalize"
678
679  # Message type standard fields:
680  # https://github.com/Prodiguer/prodiguer-docs/wiki/MQ-Standard-Message-Fields
681
682  # Message type dictionnary and custom fields:
683  # https://github.com/Prodiguer/prodiguer-docs/wiki/Monitoring-Message-Dictionary
684
685  if ( $DEBUG_debug ) ; then
686    if [ X${ActivateBigBro} = Xtrue ] ; then
687      if [ X${TaskType} = Xcomputing ]; then
688        if ( ${simulationIsOver} ) ; then
689          # RabbitMQ message code "SIMULATION ENDS"
690          code=0100
691          FlushAMQP=true
692        elif ( ${ExitFlag} ) ; then
693          # RabbitMQ message code "EXIT THE JOBS BECAUSE ERROR(S) HAS BEEN TRIGGERED"
694          code=1999
695          FlushAMQP=true
696        else
697          # RabbitMQ message code "COMPUTING JOB ENDS"
698          code=1100
699        fi
700      elif [ X${TaskType} = Xpost-processing ]; then
701        if ( ${ExitFlag} ) ; then
702          # RabbitMQ message code "POST-PROCESSING JOB FAILS"
703          code=2999
704          FlushAMQP=true
705        else
706          # RabbitMQ message code "POST-PROCESSING JOB ENDS"
707          code=2100
708          FlushAMQP=true
709        fi
710      elif [ X${TaskType} = Xchecking ]; then
711        if ( ${ExitFlag} ) ; then
712          # RabbitMQ message code "POST-PROCESSING JOB FAILS"
713          code=3999
714          FlushAMQP=true
715        else
716          # RabbitMQ message code "POST-PROCESSING JOB ENDS"
717          code=3100
718          FlushAMQP=true
719        fi
720      fi
721      # RabbitMQ message body
722      Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
723      # Fill the rabbitMQ queue
724      IGCM_debug_sendAMQP
725    fi
726  fi
727 
728  IGCM_debug_PopStack "IGCM_debug_BigBro_Finalize"
729}
730
731#D-#==================================================================
732#D-function IGCM_debug_Exit
733#D-* Purpose: Print Call Stack and set ExitFlag to true
734#D-
735function IGCM_debug_Exit {
736  IGCM_debug_PushStack "IGCM_debug_Exit"
737  echo "IGCM_debug_Exit : " "${@}"
738  echo
739  echo "!!!!!!!!!!!!!!!!!!!!!!!!!!"
740  echo "!!   ERROR TRIGGERED    !!"
741  echo "!!   EXIT FLAG SET      !!"
742  echo "!------------------------!"
743  echo
744  IGCM_debug_CallStack
745  ExitFlag=true
746  IGCM_debug_PopStack "IGCM_debug_Exit"
747}
748
749#D-#==================================================
750#D-function IGCM_debug_Verif_Exit
751#D-* Purpose: exit with number 1 if ExitFlag is true
752#D-
753function IGCM_debug_Verif_Exit {
754  if ( ${ExitFlag} ) ; then
755    echo "IGCM_debug_Verif_Exit : Something wrong happened previously."
756    echo "IGCM_debug_Verif_Exit : ERROR and EXIT keyword will help find out where."
757    # Only computing TaskType stops the job for now.
758    if [ X${TaskType} = Xcomputing ] ; then
759      IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration PeriodState "Fatal"
760      echo "                        EXIT THE JOB."
761      echo
762      IGCM_debug_CallStack
763
764      # Mail notification
765      IGCM_sys_SendMail
766
767      # Inform the rabbitMQ queue
768      IGCM_debug_BigBro_Finalize
769
770      # And Good Bye
771      date
772      exit 1
773
774    elif [ X${TaskType} = Xpost-processing ] ; then
775      # Test if we need to stop the computing job
776      case ${postProcessingName} in
777      atlas*|monitoring*|metrics*)
778        [ ${postProcessingStopLevel} -gt 2 ] && StopAll=true ;;
779      create_*)
780        [ ${postProcessingStopLevel} -gt 1 ] && StopAll=true ;;
781      rebuild*|pack_*)
782        [ ${postProcessingStopLevel} -gt 0 ] && StopAll=true ;;
783      esac
784      # Notify the computing job that something wrong happened and stop it.
785      ( ${StopALL} ) && IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration PeriodState "Fatal ${Script_Post_Output}"
786
787      # If SpaceName is PROD we stop when post_processing failed
788      if [ X${config_UserChoices_SpaceName} = XPROD ] ; then
789        echo "                        EXIT THE POST-PROCESSING JOB."
790        echo
791        IGCM_debug_CallStack
792
793        # Inform the rabbitMQ queue
794        IGCM_debug_BigBro_Finalize
795
796        # And Good Bye
797        date
798        exit 1
799      else
800        echo "In config.card the variable SpaceName is not in PROD"
801        echo "              SO WE DO NOT EXIT THE JOB."
802        echo
803        date
804      fi
805    elif [ X${TaskType} = Xchecking ] ; then
806      echo "Nothing will happen for now"
807    fi
808  fi
809}
810
811#D-#==================================================================
812#D-function IGCM_debug_Print
813#D-* Purpose: Print arguments according to a level of verbosity.
814#D-
815function IGCM_debug_Print
816{
817  typeset level=$1
818  shift
819
820  if [ X"${1}" = X"-e" ]; then
821    typeset cmd_echo="echo -e"
822    shift
823  else
824    typeset cmd_echo="echo"
825  fi
826
827  if [ ${level} -le ${Verbosity} ] ; then
828    typeset i
829    case "${level}" in
830    1) for i in "$@" ; do
831      ${cmd_echo} $(date +"%Y-%m-%d %T") "--Debug1-->" ${i}
832      done ;;
833    2) for i in "$@" ; do
834      ${cmd_echo} $(date +"%Y-%m-%d %T") "--------Debug2-->" ${i}
835      done ;;
836    3) for i in "$@" ; do
837      ${cmd_echo} $(date +"%Y-%m-%d %T") "--------------Debug3-->" ${i}
838      done ;;
839    esac
840  fi
841}
842
843#D-#==================================================================
844#D-function IGCM_debug_PrintVariables
845#D-* Purpose: Print arguments when match a pattern
846#D-           according to a level of verbosity.
847function IGCM_debug_PrintVariables
848{
849  typeset level=$1
850  shift
851
852  list=$( set | grep ^$1 | sed -e "s/'//g" )
853
854  if [ "X${list}" != X ]  ; then
855    IGCM_debug_Print ${level} ${list}
856  fi
857}
858
859#D-#==================================================================
860#D-function IGCM_debug_PrintInfosActions
861#D-* Purpose: Print information related to instrumentation
862function IGCM_debug_PrintInfosActions
863{
864  typeset actionType=$1
865  typeset entitySize=$2
866  typeset start_ms=$3
867  typeset end_ms=$4
868
869  typeset dest=$5
870  typeset source=$6
871
872  typeset diff_ms entitySizeKo entitySizeMo flux_Ko_ms flux_Ko_s flux_Mo_s
873  typeset dirFrom dirTo
874
875  diff_ms=$(( $end_ms - $start_ms ))
876  # echo "diff_ms=$diff_ms"
877
878  entitySizeKo=$( echo ${entitySize} | gawk -F"|" '{print $1}' )
879  # echo "entitySizeKo=$entitySizeKo"
880  entitySizeMo=$( echo ${entitySize} | gawk -F"|" '{print $2}' )
881
882  # flux en Ko / ms
883  flux_Ko_ms=$( echo "scale=6;${entitySizeKo}/${diff_ms}" | bc )
884  # echo "flux_Ko_ms=$flux_Ko_ms"
885
886  # flux en Ko / s
887  flux_Ko_s=$(( $flux_Ko_ms * 1000 ))
888  # echo "flux_Ko_s=$flux_Ko_s"
889
890  # flux en Mo / s
891  flux_Mo_s=$( echo "scale=6;${flux_Ko_s}/1024" | bc )
892  # echo "flux_Mo_s=$flux_Mo_s"
893
894  if [ -d $dest ] ; then
895    dirTo=$( readlink -f ${dest} )
896  else
897    dirTo=$( readlink -f $( dirname ${dest} ) )
898  fi
899
900  if [ -d $source ] ; then
901    dirFrom=$( readlink -f ${source} )
902  else
903    dirFrom=$( readlink -f $( dirname ${source} ) )
904  fi
905
906  instrumentationContent=$( echo "\"actionName\":\"${actionType}\",\"size_Mo\":\"${entitySizeMo}\",\"duration_ms\":\"${diff_ms}\",\"throughput_Mo_s\":\"${flux_Mo_s}\",\"dirFrom\":\"${dirFrom}\",\"dirTo\":\"${dirTo}\"" )
907
908  if [ X${ActivateStackFilling} = Xtrue ] ; then
909    echo "{${instrumentationContent}}" >> ${StackFileLocation}/${StackFileName}
910  fi
911
912  # Inform the rabbitMQ queue
913  if [ X${ActivateBigBro} = Xtrue ] ; then
914    # RabbitMQ message body
915    Body=$( echo "{${genericSimulationID},\"msgCode\":\"7000\",\"msgUID\":\"$(uuidgen)\",${instrumentationContent},\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
916    # Fill the rabbitMQ queue
917    IGCM_debug_sendAMQP
918  fi
919}
920
921#D-#==================================================================
922#D-function IGCM_debug_Check
923#D- * Purpose: Check the present file by comparison with a reference file
924function IGCM_debug_Check
925{
926  #---------------------
927  if [ ! -n "${libIGCM}" ] ; then
928    echo "Check libIGCM_debug ..........................................[ FAILED ]"
929    echo "--Error--> libIGCM variable is not defined"
930    exit 2
931  fi
932
933  #---------------------
934  if [ ! -n "${Verbosity}" ] ; then
935    echo "Check libIGCM_debug ..........................................[ FAILED ]"
936    echo "--Error--> Verbosity variable is not defined"
937    exit 3
938  fi
939
940  #---------------------
941  # Need to remove timestamps here
942  diff ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref <(${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh | sed -e "s:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]\:[0-9][0-9]\:[0-9][0-9] ::g") > /dev/null 2>&1
943  status=$?
944
945  if [ ${status} -eq 0 ] ; then
946    echo "Check libIGCM_debug ..............................................[ OK ]"
947  else
948    echo "Check libIGCM_debug ..........................................[ FAILED ]"
949    echo "--Error--> Execution of ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh"
950    echo "           has produced the file IGCM_debug_Test.ref.failed"
951    echo "           Please analyse differences with the reference file by typing:"
952    echo "           diff IGCM_debug_Test.ref.failed ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref"
953    echo "           Report errors to the author: Patrick.Brockmann@cea.fr"
954    diff ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref <(${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh | sed -e "s:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]\:[0-9][0-9]\:[0-9][0-9] ::g")
955    exit 4
956  fi
957  #---------------------
958}
Note: See TracBrowser for help on using the repository browser.