source: trunk/libIGCM/libIGCM_debug/libIGCM_debug.ksh @ 1287

Last change on this file since 1287 was 1287, checked in by sdipsl, 8 years ago
  • easier to handle
  • Property licence set to
    The following licence information concerns ONLY the libIGCM tools
    ==================================================================

    Copyright © Centre National de la Recherche Scientifique CNRS
    Commissariat à l'Énergie Atomique CEA

    libIGCM : Library for Portable Models Computation of IGCM Group.

    IGCM Group is the french IPSL Global Climate Model Group.

    This library is a set of shell scripts and functions whose purpose is
    the management of the initialization, the launch, the transfer of
    output files, the post-processing and the monitoring of datas produce
    by any numerical program on any plateforme.

    This software is governed by the CeCILL license under French law and
    abiding by the rules of distribution of free software. You can use,
    modify and/ or redistribute the software under the terms of the CeCILL
    license as circulated by CEA, CNRS and INRIA at the following URL
    "http://www.cecill.info".

    As a counterpart to the access to the source code and rights to copy,
    modify and redistribute granted by the license, users are provided only
    with a limited warranty and the software's author, the holder of the
    economic rights, and the successive licensors have only limited
    liability.

    In this respect, the user's attention is drawn to the risks associated
    with loading, using, modifying and/or developing or reproducing the
    software by the user in light of its specific status of free software,
    that may mean that it is complicated to manipulate, and that also
    therefore means that it is reserved for developers and experienced
    professionals having in-depth computer knowledge. Users are therefore
    encouraged to load and test the software's suitability as regards their
    requirements in conditions enabling the security of their systems and/or
    data to be ensured and, more generally, to use and operate it in the
    same conditions as regards security.

    The fact that you are presently reading this means that you have had
    knowledge of the CeCILL license and that you accept its terms.
  • Property svn:keywords set to Revision Author Date
File size: 37.6 KB
Line 
1#!/bin/ksh
2
3#**************************************************************
4# Author: Patrick Brockmann, Martial Mancip
5# Contact: Patrick.Brockmann__at__cea.fr Martial.Mancip__at__ipsl.jussieu.fr
6# $Revision::                                          $ Revision of last commit
7# $Author::                                            $ Author of last commit
8# $Date::                                              $ Date of last commit
9# IPSL (2006)
10#  This software is governed by the CeCILL licence see libIGCM/libIGCM_CeCILL.LIC
11#
12#**************************************************************
13
14#==================================================
15# The documentation of this file can be automatically generated
16# if you use the prefix #D- for comments to be extracted.
17# Extract with command: cat lib* | grep "^#D-" | cut -c "4-"
18#==================================================
19
20#==================================================
21# Add high level verbosity
22typeset -i Verbosity=${Verbosity:=3}
23
24#==================================================
25# DEBUG_debug
26# Add low level verbosity
27DEBUG_debug=${DEBUG_debug:=false}
28
29#==================================================
30# GENERATE RANDOM ERROR ; only apply if ( ${DEBUG_debug} )
31typeset -r RandomError=false
32
33#==================================================
34# NULL_STR
35# Default null string
36typeset -r NULL_STR="_0_"
37
38#==================================================
39# libIGCM_CurrentTag
40# Current libIGCM tag, check compatibilty with *.card
41typeset -r libIGCMVersion="2.7"
42
43#==================================================
44# Exit Flag (internal debug)
45# When true, end the master loop AFTER SAVES FILES
46ExitFlag=false
47
48#==================================================
49# When we start to run the simulation is not finished
50simulationIsOver=false
51
52#==================================================
53# When we start to run we dont flush AMQP messages
54FlushAMQP=false
55
56#==================================================
57# Declare a stack of functions calls
58unset IGCM_debug_Stack
59unset IGCM_debug_StackArgs
60unset IGCM_debug_StackTiming
61IGCM_debug_Stack[0]=${NULL_STR}
62IGCM_debug_StackArgs[0]=${NULL_STR}
63IGCM_debug_StackTiming[0]=${NULL_STR}
64IGCM_debug_LenStack=0
65
66#D-#==================================================================
67#D-function IGCM_debug_getDate_ms
68#D- * Purpose: Give number of milliseconds since 01-jan-1970
69function IGCM_debug_getDate_ms
70{
71  typeset nanosecs ms
72  # nano secondes since 01-jan-1970
73  nanosecs=$( date +%s%N )
74
75  # truncate the last 6 digits to get milliseconds since 01-jan-1970
76  ms=${nanosecs:0:${#nanosecs}-6}
77
78  echo "$ms"
79}
80
81#D-#==================================================================
82#D-function IGCM_debug_sizeOfTabContent
83#D- * Purpose: Give sumed size of a list of files
84#D- * Usage: IGCM_debug_sizeOfTabContent entityList destination
85#D- *        where entityList is a list of files or directory
86#D- *        where dest is either a directory or a file name
87function IGCM_debug_sizeOfTabContent
88{
89  typeset entityListe destination iEntity sizeKo sumSizeKo sumSizeMo
90
91  eval set +A entityListe \${${1}}
92  destination=${2}
93  sumSizeKo=0
94
95  # Here we will try to compute size (file or directory size) from local path and not from archive.
96  for ((i = 0; i < ${#entityListe[*]}; i += 1)) ; do
97    if [ -f ${entityListe[$i]} ] ; then
98      # One file or a bunch of files has been copied without renaming from a visible filesystem
99      iEntity=${entityListe[$i]}
100    elif [ -f ${entityListe[$i]##/*/} ] ; then
101      # One file or a bunch of files has been copied without renaming from an non visible filesystem
102      # remove path /home/login/../ from entityListe elements
103      iEntity=${entityListe[$i]##/*/}
104    elif [ -f ${destination} ] ; then
105      # a file has been copied and renamed
106      iEntity=${destination}
107    elif [ -f ${destination}/${entityListe[$i]##/*/} ] ; then
108      # a copy in a directory but not in ${PWD}
109      iEntity=${destination}/${entityListe[$i]##/*/}
110    elif [ -d ${entityListe[$i]} ] ; then
111      # a directory has been copied from a non remote place
112      iEntity=${entityListe[$i]}
113    elif [ -d ${destination}/${entityListe[$i]##/*/} ] ; then
114      # a directory has been copied from a remote archive and not renamed
115      iEntity=${destination}/${entityListe[$i]##/*/}
116    elif [ -d ${destination} ] ; then
117      # a directory has been copied from a remote archive and renamed
118      iEntity=${destination}
119    fi
120    sizeKo=$( du --apparent-size -skL ${iEntity} | gawk '{print $1}' )
121    sumSizeKo=$(( $sumSizeKo + $sizeKo ))
122  done
123  sumSizeMo=$( echo "scale=6;${sumSizeKo}/1024" | bc )
124  echo "${sumSizeKo}|${sumSizeMo}"
125}
126
127#D-#==================================================================
128#D-function IGCM_debug_send_AMQP_msg__MAILTUNNEL
129#D- * Purpose: Take over AMQP C client using mail as a message recipient
130#D- * One argument : base64 encoded message
131#D- * Attach encoded config.card when starting the simulation
132
133function IGCM_debug_send_AMQP_msg__MAILTUNNEL {
134
135  typeset b64_encoded_msg mail_recipient
136  typeset buffer send_messages mail_frequency
137  typeset last_mail_date__file
138  typeset secondsBetweenRefAndLastMail secondsSinceLastMail
139
140  b64_encoded_msg=$1
141
142  mail_recipient="superviseur@ipsl.jussieu.fr"
143  send_messages=0
144  mail_frequency=3600 # in seconds
145  # use to keep track when was last mail sent (maybe to be replaced with global variable)
146  last_mail_date__file=${R_BUF}/.stamp.${config_UserChoices_TagName}.${config_UserChoices_JobName}
147  # use to accumulate messages before sending them
148  buffer=${R_BUF}/.buffer.${config_UserChoices_TagName}.${config_UserChoices_JobName}
149
150  # init
151  if [ ! -f "${buffer}" ]; then
152    touch ${buffer}
153  fi
154
155  if [ ! -f "${last_mail_date__file}" ]; then
156    touch ${last_mail_date__file}
157  else
158    # compute last time the file was changed (in seconds)
159    secondsBetweenRefAndLastMail=$(stat -c %Y ${last_mail_date__file})
160    status=$?
161    #
162    # Only execute this block when the stat command succeeded.
163    # The stat command might fail in some circumstance but we consider it is ok to continue anyway.
164    if [ ${status} -eq 0 ] ; then
165      secondsSinceLastMail=$(( $(date +%s) - ${secondsBetweenRefAndLastMail} ))
166      # send message when exceeding threshold
167      [ ${secondsSinceLastMail} -gt ${mail_frequency} ] && send_messages=1
168    fi
169  fi
170
171  # queue messages in the buffer
172  echo ${b64_encoded_msg} >> ${buffer}
173
174  # send mail
175
176  if [ X${initBigBro} = Xtrue ] ; then
177    #echo $(date +"%Y-%m-%dT%H:%M:%S.%N%z") > ${SUBMIT_DIR}/mail.txt
178    mailx -s "[TEMPORARY AMQP CHANNEL]" -a ${SUBMIT_DIR}/config.card.base64 ${mail_recipient} < ${buffer} # send buffer
179    rm -f $buffer ; touch ${buffer}                                    # clear buffer
180    touch ${last_mail_date__file}                                      # memorize last mail date
181    initBigBro=false
182  elif [ ${send_messages} -eq 1 ] ; then
183    #echo $(date +"%Y-%m-%dT%H:%M:%S.%N%z") >> ${SUBMIT_DIR}/mail.txt
184    mailx -s "[TEMPORARY AMQP CHANNEL]" ${mail_recipient}  < ${buffer} # send buffer
185    rm -f ${buffer} ; touch ${buffer}                                  # flush the buffer
186    touch ${last_mail_date__file}                                      # memorize last mail date
187  fi
188
189  if ( ${FlushAMQP} ) ; then
190    mailx -s "[TEMPORARY AMQP CHANNEL]" ${mail_recipient}  < ${buffer} # send buffer
191    rm -f ${buffer}                                                    # cleaning behind us
192    rm -f ${last_mail_date__file}                                      # cleaning behind us
193  fi
194
195  # Allways all good for now.
196  return 0
197}
198
199#D-#==================================================================
200#D-function IGCM_debug_sendAMQP_Metrics
201#D- * Purpose: Take over AMQP C client using mail as a message recipient
202#D- * Two arguments : - Directory where metrics.json files can be found
203#D- *                 - Metrics Group Name. metrics will be added to this group
204#D- * Attach encoded metrics.json files.
205
206function IGCM_debug_sendAMQP_Metrics {
207
208  typeset mail_recipient encodedBody
209  if [ X${ActivateBigBro} = Xtrue ] ; then
210    mail_recipient="superviseur@ipsl.jussieu.fr"
211    # Metrics tag on server side
212    code=7100
213    # Usual AMQP message to route messages on server side
214    encodedBody=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"metricsGroupName\":\"${2}\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" |  base64 -w 0 )
215    # send mail
216    attachmentsOptions=""
217    for metricsFile in $( ls $1/*json ) ; do
218      attachmentsOptions="-a ${metricsFile} ${attachmentsOptions}"
219    done
220    IGCM_debug_Print 2 "IGCM_debug_sendAMQP_Metrics "
221    echo ${encodedBody}|mailx -s "[TEMPORARY AMQP CHANNEL]" ${attachmentsOptions} ${mail_recipient}
222  fi
223
224  # Allways all good for now.
225  return 0
226}
227
228#D-#==================================================================
229#D-function IGCM_debug_sendAMQP_projectAccounting
230#D- * Purpose: Take over AMQP C client using mail as a message recipient
231#D- * One argument : - File name where project accounting details are stored
232#D- * Attach encoded accounting file.
233
234function IGCM_debug_sendAMQP_projectAccounting {
235
236  typeset mail_recipient encodedBody
237  if [ X${ActivateBigBro} = Xtrue ] ; then
238    mail_recipient="superviseur@ipsl.jussieu.fr"
239    # Metrics tag on server side
240    code=7010
241    # Usual AMQP message to route messages on server side
242    encodedBody=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"accountingProject\":\"${PROJECT}\",\"centre\":\"${CENTER}\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" |  base64 -w 0 )
243    # send mail
244    attachmentsOptions="-a ${1}"
245    IGCM_debug_Print 2 "IGCM_debug_sendAMQP_projectAccounting"
246    echo ${encodedBody}|mailx -s "[TEMPORARY AMQP CHANNEL]" ${attachmentsOptions} ${mail_recipient}
247  fi
248
249  # Allways all good for now.
250  return 0
251}
252
253#D-#==================================================================
254#D-function IGCM_debug_SendAMQP
255#D- * Purpose: Send body; encoded body and config.card to rabbitMQ
256function IGCM_debug_sendAMQP {
257
258  typeset decal first additionnalOption encodedBody
259
260  # Encode message Body
261  encodedBody=$( echo "${Body}" | base64 -w 0 )
262
263  # Send config.card ?
264  if [ X${1} = Xactivate ] ; then
265    # Encode config.card
266    cat ${SUBMIT_DIR}/config.card | base64 -w 0 > ${SUBMIT_DIR}/config.card.base64
267    # Prepare additionnal option
268    additionnalOption="-f ${SUBMIT_DIR}/config.card.base64"
269    #
270    initBigBro=true
271  else
272    additionnalOption=
273    #
274    initBigBro=false
275  fi
276
277  # Only cosmetics : stack file
278  if [ X${ActivateStackFilling} = Xtrue ] ; then
279    decal=0
280    while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
281      printf ' ' >> ${StackFileLocation}/${StackFileName}
282      (( decal = decal + 1 ))
283    done
284    # Log to stack file using human readable format
285    echo "${Body}" >> ${StackFileLocation}/${StackFileName}
286  fi
287
288  # Log separately encoded AMQP message command for reuse in a mock up
289  #echo sendAMQPMsg -h localhost -p 5672 ${additionnalOption} -b ${encodedBody} >> ${RUN_DIR_PATH}/send.AMQP.${config_UserChoices_JobName}.${config_UserChoices_ExperimentName}.${config_UserChoices_SpaceName}.${config_UserChoices_TagName}.${CumulPeriod}.history.txt
290
291  # Send the message
292  if [ X${BigBrotherChannel} = XMAIL ] ; then
293    IGCM_debug_send_AMQP_msg__MAILTUNNEL "${encodedBody}"
294    status=$?
295  else
296    sendAMQPMsg -h localhost -p 5672 ${additionnalOption} -b ${encodedBody}
297    status=$?
298  fi
299
300  if [ ${status} -gt 0 ] ; then
301    IGCM_debug_Print 2 "IGCM_debug_Push/PopStack/ActivateBigBro : command sendAMQPMsg failed error code ${status}"
302    echo sendAMQPMsg -h localhost -p 5672 -b "${Body}"
303    exit 1
304  fi
305}
306
307#D-#==================================================================
308#D-function IGCM_debug_CallStack
309#D-* Purpose: Print the call stack tree from the oldest to the youngest (opposite of the display standard)
310#D-
311function IGCM_debug_CallStack {
312  if ( $DEBUG_debug ) ; then
313    # Cosmetics
314    typeset i decal
315    i=0
316    until [ $i -eq ${IGCM_debug_LenStack} ]; do
317      decal=0
318      until [ $decal -eq ${i} ]; do
319        printf -- ' '
320        (( decal = decal + 1 ))
321      done
322      echo "$i - ${IGCM_debug_Stack[$(( $IGCM_debug_LenStack-$i-1 ))]}" "(${IGCM_debug_StackArgs[$(( $IGCM_debug_LenStack-$i-1 ))]})"
323      ((i = i + 1))
324    done
325  fi
326}
327
328#D-#==================================================================
329#D-function IGCM_debug_PushStack
330#D-* Purpose: Push a function name in the stack
331#D-
332function IGCM_debug_PushStack {
333  if ( $DEBUG_debug ) ; then
334    typeset decal inputs startTime_ms
335
336    # Only cosmetics : stack file
337    if [ X${ActivateStackFilling} = Xtrue ] ; then
338      echo >> ${StackFileLocation}/${StackFileName}
339      decal=0
340      while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
341        printf ' ' >> ${StackFileLocation}/${StackFileName}
342        (( decal = decal + 1 ))
343      done
344
345      # Fill the stack file
346      echo "> ${IGCM_debug_LenStack} : ${@}" >> ${StackFileLocation}/${StackFileName}
347    fi
348
349    # Save input list in an indexed array
350    INPUTS=( $@ )
351
352    # Get timing information
353    startTime_ms=$( IGCM_debug_getDate_ms )
354
355    # We add function call name on beginning of the stack
356    set +A IGCM_debug_Stack -- ${1} ${IGCM_debug_Stack[*]}
357
358    # Save timing in milliseconds in an indexed array
359    set +A IGCM_debug_StackTiming -- ${startTime_ms} ${IGCM_debug_StackTiming[*]}
360
361    # We include the "null" Args in the beginning of the StackArgs
362    set +A IGCM_debug_StackArgs ${NULL_STR} ${IGCM_debug_StackArgs[*]}
363
364    # Then, we shift StackArgs tabular
365    # Replacing blank separated list by comma separated list of quoted elements (except the first and last element)
366    if [ $# -gt 1 ]; then
367      IGCM_debug_StackArgs[0]=$(echo ${INPUTS[*]:1} | sed -e "s/\ /\",\"/g" )
368    fi
369
370    # Increment LenStack
371    (( IGCM_debug_LenStack = IGCM_debug_LenStack + 1 ))
372
373    #IGCM_debug_CallStack
374  fi
375}
376
377#D-#==================================================================
378#D-function IGCM_debug_PopStack
379#D-* Purpose: Pop a function name in the stack
380#D-
381function IGCM_debug_PopStack {
382  if ( $DEBUG_debug ) ; then
383    typeset i decal command arguments startTime_ms endTime_ms
384    typeset instrumentation dest prefix
385    # they are not typeset because they are send "by adress" to son functions
386    # we unset them to avoid "memory effect"
387    unset fileList source
388
389    # INTRODUCE SIMPLE ERROR GENERATOR TO TEST SUPERVISOR
390    # PROBABILITY ERROR IS 0.0001 PER COMMAND OR FUNCTION CALL
391    # THERE ARE ~500 COMMAND OR FUNCTION CALL PER PERIOD
392    # ONLY WHEN TaskType is "computing".
393    if [ X${ActivateBigBro} = Xtrue ] ; then
394      if [ X${TaskType} = Xcomputing ]; then
395        if ( ${RandomError} ) ; then
396          if [ $((RANDOM%10000)) -le 10 ] ; then
397            IGCM_debug_Print 1 "Random error has been triggered"
398            if [ X${ActivateStackFilling} = Xtrue ] ; then
399              echo "RANDOM ERROR" >> ${StackFileLocation}/${StackFileName}
400            fi
401            ExitFlag=true
402          fi
403        fi
404      fi
405    fi
406
407    if [ "${IGCM_debug_Stack[0]}" = "${1}" ]; then
408      # Everything is cool
409
410      # Get timing information
411      endTime_ms=$( IGCM_debug_getDate_ms )
412
413      # Save Stack information before poping the stack
414      command=${IGCM_debug_Stack[0]}
415
416      # Go from comma separated list of quoted elements (except the first and the last element)
417      # to unquoted space separated elements in an array
418      set -A arguments -- $( echo ${IGCM_debug_StackArgs[0]} | sed -e "s/\",\"/\ /g" )
419
420      # Save Stack information before poping the stack
421      startTime_ms=${IGCM_debug_StackTiming[0]}
422
423      # Pop the stack
424      (( IGCM_debug_LenStack = IGCM_debug_LenStack - 1 ))
425      set -A IGCM_debug_Stack -- ${IGCM_debug_Stack[*]:1}
426      set -A IGCM_debug_StackArgs -- ${IGCM_debug_StackArgs[*]:1}
427      set -A IGCM_debug_StackTiming -- ${IGCM_debug_StackTiming[*]:1}
428    else
429      echo 'IGCM_debug_Exit : stack is corrupted ! LenStack =' ${IGCM_debug_LenStack}
430      IGCM_debug_Exit $@
431    fi
432
433    # Special actions depending on command to prepare IGCM_debug_PrintInfosActions call
434    # We are interested in:
435    #  0. Which command performs the work
436    #  1. Size of entity we are working with
437    #  2. Where are we reading
438    #  3. Where are we writing
439    #  4. How long it took
440
441    instrumentation=false
442
443    case ${command} in
444    # Classical copy (only files are given to IGCM_sys_Cp as options)
445    IGCM_sys_Cp)
446      instrumentation=true
447      # All but the latest
448      fileList=${arguments[*]:0:${#arguments[*]}-1}
449      # just need the first file to get the directory
450      source=${arguments[0]}
451      # Nothing but the latest
452      dest=${arguments[${#arguments[*]}-1]}
453      # Size of file whose name are stored in a list
454      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
455      ;;
456
457    # Copy from archive machine or from buffer
458    IGCM_sys_Get|IGCM_sys_GetBuffer)
459      instrumentation=true
460      if [ ${#arguments[*]} -eq 2 ] ; then
461        source=${arguments[0]}
462        dest=${arguments[1]}
463        # Size of file whose name are stored in a variable
464        entitySize=$( IGCM_debug_sizeOfTabContent source ${dest} )
465      elif ( [ ${#arguments[*]} -eq 3 ] && [ ${arguments[0]} = '/l' ] ) ; then
466        # IGCM_sys_Get /l liste_file[*] /ccc/scratch/cont003/dsm/p86denv/RUN_DIR/985998_14754/
467        # Keep the array name hosting the all list
468        eval set +A fileList \${${arguments[1]}}
469        # just need the first file to get the directory
470        source=${fileList[0]}
471        dest=${arguments[2]}
472        # Size of file whose name are stored in a list
473        entitySize=$( IGCM_debug_sizeOfTabContent fileList[*] ${dest} )
474      elif [ [ ${#arguments[*]} -ge 3 ] ; then
475       # All but the latest
476        fileList=${arguments[*]:0:${#arguments[*]}-1}
477        # just need the first file to get the directory
478        source=${arguments[0]}
479        # Nothing but the latest
480        dest=${arguments[${#arguments[*]}-1]}
481        # Size of file whose name are stored in a list
482        entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
483      fi
484      ;;
485
486    # Copy from compute node or copy to archive/buffer
487    IGCM_sys_Get_Master|IGCM_sys_Get_Dir|IGCM_sys_Put_Out|IGCM_sys_PutBuffer_Out)
488      instrumentation=true
489      source=${arguments[0]}
490      dest=${arguments[1]}
491      # Size of file whose name are stored in a variable
492      entitySize=$( IGCM_debug_sizeOfTabContent source ${dest} )
493      ;;
494
495    # Rebuild command
496    IGCM_sys_rebuild|IGCM_sys_rebuild_station)
497      instrumentation=true
498      # All but the first
499      fileList=${arguments[*]:1:${#arguments[*]}-1}
500      # just need a file to get the directory
501      source=${arguments[1]}
502      # Nothing but the first
503      dest=${arguments[0]}
504      # Size of file whose name are stored in a list
505      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
506      ;;
507
508    # NCO commands
509    IGCM_sys_ncrcat|IGCM_sys_ncecat|IGCM_sys_ncra|IGCM_sys_ncks|IGCM_sys_cdo)
510      # Example of what we want to catch : only filenames in those command lines
511      # IGCM_sys_ncrcat -O -v ${list_var_final_ncrcat} ${OUT_SE[*]} ${RESULT_SE}
512      # IGCM_sys_ncrcat --hst -v ${liste_coord}${var} ${file1} ${liste_file_tmp[*]} ${file_out}
513      # IGCM_sys_ncrcat -p ${dir} ${liste_file_tmp} --output ${output}
514      # IGCM_sys_ncrcat -x -v ${list_var} -p ${dir} ${liste_file_tmp} --output ${output}
515      instrumentation=true
516      keepGoing=true
517      prefix=.
518      i=0
519      while ( ${keepGoing} ) ; do
520        # the last one is not interesting
521        if [ ${i} -eq ${#arguments[*]}-1 ] ; then
522          keepGoing=false
523        # look after "-p" option. Path prefix is the following arguments
524        elif [ ${arguments[${i}]} = "-p" ] ; then
525          ((i = i + 1))
526          prefix=${arguments[${i}]}
527          ((i = i + 1))
528        elif [ ${i} -eq ${#arguments[*]}-1 ] ; then
529          keepGoing=false
530        # looking for files
531        elif [ -f ${prefix}/${arguments[${i}]} ] ; then
532          fileList="${fileList} ${prefix}/${arguments[${i}]}"
533          ((i = i + 1))
534        # other options are not interesting
535        else
536          ((i = i + 1))
537        fi
538      done
539
540      # i value is at least 1
541      # just need one file to get the directory
542      source=$( echo ${fileList} | gawk '{print $1}' )
543      # Nothing but the latest
544      dest=${arguments[${#arguments[*]}-1]}
545      # Size of file whose name are stored in a list
546      entitySize=$( IGCM_debug_sizeOfTabContent fileList ${dest} )
547      ;;
548    esac
549
550    # Print information related to instrumentation
551    ( ${instrumentation} ) && IGCM_debug_PrintInfosActions ${command} ${entitySize} ${startTime_ms} ${endTime_ms} ${dest} ${source}
552
553    # Only cosmetics : stack file
554    if [ X${ActivateStackFilling} = Xtrue ] ; then
555      decal=0
556      while [ ${decal} -lt ${IGCM_debug_LenStack} ]; do
557        printf ' ' >> ${StackFileLocation}/${StackFileName}
558        (( decal = decal + 1 ))
559      done
560    fi
561
562    if ( ${ExitFlag} ) ; then
563      # Inform the stack file
564      if [ X${ActivateStackFilling} = Xtrue ] ; then
565        echo '!!! ExitFlag has been activated !!!' >> ${StackFileLocation}/${StackFileName}
566      fi
567
568      # Unplugged message 4900 handling for now. To ease downstream treatment.
569      if [ X${ActivateBigBro} = Xtrue ] ; then
570        if [ X${TaskType} = Xcomputing ]; then
571          # RabbitMQ message code "COMPUTING JOBs COMMAND FAILURE"
572          code=1900
573        elif [ X${TaskType} = Xpost-processing ]; then
574          # RabbitMQ message code "POST-PROCESSING JOBs COMMAND FAILURE"
575          code=2900
576        elif [ X${TaskType} = Xchecking ]; then
577          # RabbitMQ message code "POST-PROCESSING FROM CHECKER JOBs COMMAND FAILURE"
578          code=3900
579        fi
580        # RabbitMQ message body
581        Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"command\":\"${command}\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
582
583        # Fill the rabbitMQ queue
584        IGCM_debug_sendAMQP
585      fi
586    else
587      # Inform the stack file
588      if [ X${ActivateStackFilling} = Xtrue ] ; then
589        echo "< ${IGCM_debug_LenStack} : ${@}" >> ${StackFileLocation}/${StackFileName}
590      fi
591    fi
592
593    # Reset array if necessary
594    if [ ${IGCM_debug_LenStack} = 0 ]; then
595      #echo
596      #IGCM_debug_Print 3 "Clean stack array"
597      #echo
598      unset IGCM_debug_Stack
599      unset IGCM_debug_StackArgs
600      unset IGCM_debug_StackTiming
601      IGCM_debug_Stack[0]=${NULL_STR}
602      IGCM_debug_StackArgs[0]=${NULL_STR}
603      IGCM_debug_StackTiming[0]=${NULL_STR}
604    fi
605  fi
606  #IGCM_debug_CallStack
607}
608
609#D-#==================================================================
610#D-function IGCM_debug_BigBro_Initialize
611#D-* Purpose: switch rabbitMQ on
612#D-
613function IGCM_debug_BigBro_Initialize {
614  IGCM_debug_PushStack "IGCM_debug_BigBro_Initialize"
615
616  typeset postProcessingIDLength postProcessingName postProcessingDate postProcessingDimn postProcessingComp postProcessingFile
617
618# Message type standard fields:
619# https://github.com/Prodiguer/prodiguer-docs/wiki/MQ-Standard-Message-Fields
620
621# Message type dictionnary and custom fields:
622# https://github.com/Prodiguer/prodiguer-docs/wiki/Monitoring-Message-Dictionary
623
624  if [ X${BigBrother} = Xtrue ] ; then
625    # create a unique ID for this specific job
626    jobuid=$(uuidgen)
627
628    # get the assigned id by the scheduler for that job
629    IGCM_sys_getJobSchedulerID jobSchedulerID
630
631    if [ X${TaskType} = Xcomputing ]; then
632      if ( ${FirstInitialize} ) ; then
633        # RabbitMQ message code "BEGIN A SIMULATION"
634        code=0000
635        # create and persist a unique id for this simulation
636        simuid=$(uuidgen)
637        IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration simuid ${simuid}
638        # Standard fields for the first message
639        genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"activity\":\"IPSL\",\"name\":\"${config_UserChoices_JobName}\",\"experiment\":\"${config_UserChoices_ExperimentName}\",\"space\":\"${config_UserChoices_SpaceName}\",\"model\":\"${config_UserChoices_TagName}\",\"startDate\":\"${config_UserChoices_DateBegin}\",\"endDate\":\"${config_UserChoices_DateEnd}\",\"login\":\"${LOGIN}\",\"centre\":\"${CENTER}\",\"machine\":\"${MASTER}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
640        # RabbitMQ message body with specific fields associated message codes treated here
641        Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"jobSchedulerID\":\"${jobSchedulerID}\",\"jobSubmissionPath\":\"${SUBMIT_DIR}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
642        # Fill the rabbitMQ queue (the config.card in use will be sent)
643        IGCM_debug_sendAMQP activate
644      else
645        # RabbitMQ message code "A NEW COMPUTING JOB IS RUNNING PART OF A SIMULATION"
646        code=1000
647        # retrieve this simulation's unique id
648        IGCM_card_DefineVariableFromOption ${SUBMIT_DIR}/run.card Configuration simuid
649        simuid=${run_Configuration_simuid}
650        # Using standard fields for message others than the first one. Still subject to change
651        genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
652        # RabbitMQ message body with specific fields associated message codes treated here
653        Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"jobSchedulerID\":\"${jobSchedulerID}\",\"jobSubmissionPath\":\"${SUBMIT_DIR}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
654        # Fill the rabbitMQ queue
655        IGCM_debug_sendAMQP
656      fi
657
658      # NOT VERY NICE BUT ... IT WORKS
659      # Be sure that the genericSimulationID will be small from now on
660      # Using standard fields for messages others than the first one. Still subject to change
661      genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
662
663    elif [ X${TaskType} = Xpost-processing ]; then
664      # RabbitMQ message code "A NEW POST-PROCESSING JOB IS RUNNING PART OF A SIMULATION"
665      code=2000
666      # retrieve this simulation's unique id
667      IGCM_card_DefineVariableFromOption ${SUBMIT_DIR}/run.card Configuration simuid
668      simuid=${run_Configuration_simuid}
669      # Using standard fields for message others than the first one. Still subject to change
670      genericSimulationID=$( echo "\"msgApplication\":\"monitoring\",\"msgProducer\":\"libigcm\",\"msgProducerVersion\":\"${libIGCMVersion}\",\"simuid\":\"${simuid}\",\"jobuid\":\"${jobuid}\"" )
671     
672      # Specify the post-processing task we are dealing with
673      postProcessingIDLength=$( echo "${Script_Post_Output}" | tr -d -c "\." | wc -c )
674      postProcessingName=$( echo "${Script_Post_Output}" | gawk -F. '{print $1}' )
675      postProcessingDate=$( echo "${Script_Post_Output}" | gawk -F. '{print $2}' )
676      postProcessingDimn="null"
677      postProcessingComp="null"
678      postProcessingFile="null"
679      if [ ${postProcessingIDLength} -eq 2 ] ; then
680        postProcessingDimn=$( echo "${Script_Post_Output}" | gawk -F. '{print $3}' )
681      elif [ ${postProcessingIDLength} -eq 4 ] ; then
682        postProcessingComp=$( echo "${Script_Post_Output}" | gawk -F. '{print $4}' )
683        postProcessingFile=$( echo "${Script_Post_Output}" | gawk -F. '{print $5}' )
684      fi
685
686      # RabbitMQ message body with specific fields associated message codes treated here
687      Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"accountingProject\":\"${PROJECT}\",\"jobWarningDelay\":\"${jobWarningDelay}\",\"jobSchedulerID\":\"${jobSchedulerID}\",\"jobSubmissionPath\":\"${SUBMIT_DIR}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\",\"postProcessingName\":\"${postProcessingName}\",\"postProcessingDate\":\"${postProcessingDate}\",\"postProcessingDimn\":\"${postProcessingDimn}\",\"postProcessingComp\":\"${postProcessingComp}\",\"postProcessingFile\":\"${postProcessingFile}\"}" )
688      # Fill the rabbitMQ queue
689      IGCM_debug_sendAMQP
690    fi
691
692    # Save project accounting details in a file
693    IGCM_sys_projectAccounting cpt_${CENTER}_${PROJECT}_$( date +"%Y%m%d_%H%M" ).dat
694
695    # And send it
696    IGCM_debug_sendAMQP_projectAccounting cpt_${CENTER}_${PROJECT}_$( date +"%Y%m%d_%H%M" ).dat
697
698    # Turn the flag on
699    ActivateBigBro=true
700  fi
701  IGCM_debug_PopStack "IGCM_debug_BigBro_Initialize"
702}
703
704#D-#==================================================================
705#D-function IGCM_debug_BigBro_Finalize
706#D-* Purpose: Finalize rabbitMQ messages exchanges
707#D-
708function IGCM_debug_BigBro_Finalize {
709  IGCM_debug_PushStack "IGCM_debug_BigBro_Finalize"
710
711  # Message type standard fields:
712  # https://github.com/Prodiguer/prodiguer-docs/wiki/MQ-Standard-Message-Fields
713
714  # Message type dictionnary and custom fields:
715  # https://github.com/Prodiguer/prodiguer-docs/wiki/Monitoring-Message-Dictionary
716
717  if ( $DEBUG_debug ) ; then
718    if [ X${ActivateBigBro} = Xtrue ] ; then
719      if [ X${TaskType} = Xcomputing ]; then
720        if ( ${simulationIsOver} ) ; then
721          # RabbitMQ message code "SIMULATION ENDS"
722          code=0100
723          FlushAMQP=true
724        elif ( ${ExitFlag} ) ; then
725          # RabbitMQ message code "EXIT THE JOBS BECAUSE ERROR(S) HAS BEEN TRIGGERED"
726          code=1999
727          FlushAMQP=true
728        else
729          # RabbitMQ message code "COMPUTING JOB ENDS"
730          code=1100
731        fi
732      elif [ X${TaskType} = Xpost-processing ]; then
733        if ( ${ExitFlag} ) ; then
734          # RabbitMQ message code "POST-PROCESSING JOB FAILS"
735          code=2999
736          FlushAMQP=true
737        else
738          # RabbitMQ message code "POST-PROCESSING JOB ENDS"
739          code=2100
740          FlushAMQP=true
741        fi
742      elif [ X${TaskType} = Xchecking ]; then
743        if ( ${ExitFlag} ) ; then
744          # RabbitMQ message code "POST-PROCESSING JOB FAILS"
745          code=3999
746          FlushAMQP=true
747        else
748          # RabbitMQ message code "POST-PROCESSING JOB ENDS"
749          code=3100
750          FlushAMQP=true
751        fi
752      fi
753      # RabbitMQ message body
754      Body=$( echo "{${genericSimulationID},\"msgCode\":\"${code}\",\"msgUID\":\"$(uuidgen)\",\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
755      # Fill the rabbitMQ queue
756      IGCM_debug_sendAMQP
757    fi
758  fi
759 
760  IGCM_debug_PopStack "IGCM_debug_BigBro_Finalize"
761}
762
763#D-#==================================================================
764#D-function IGCM_debug_Exit
765#D-* Purpose: Print Call Stack and set ExitFlag to true
766#D-
767function IGCM_debug_Exit {
768  IGCM_debug_PushStack "IGCM_debug_Exit"
769  echo "IGCM_debug_Exit : " "${@}"
770  echo
771  echo "!!!!!!!!!!!!!!!!!!!!!!!!!!"
772  echo "!!   ERROR TRIGGERED    !!"
773  echo "!!   EXIT FLAG SET      !!"
774  echo "!------------------------!"
775  echo
776  IGCM_debug_CallStack
777  ExitFlag=true
778  IGCM_debug_PopStack "IGCM_debug_Exit"
779}
780
781#D-#==================================================
782#D-function IGCM_debug_Verif_Exit
783#D-* Purpose: exit with number 1 if ExitFlag is true
784#D-
785function IGCM_debug_Verif_Exit {
786  if ( ${ExitFlag} ) ; then
787    echo "IGCM_debug_Verif_Exit : Something wrong happened previously."
788    echo "IGCM_debug_Verif_Exit : ERROR and EXIT keyword will help find out where."
789    # Only computing TaskType stops the job for now.
790    if [ X${TaskType} = Xcomputing ] ; then
791      IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration PeriodState "Fatal"
792      echo "                        EXIT THE JOB."
793      echo
794      IGCM_debug_CallStack
795
796      # Mail notification
797      IGCM_sys_SendMail
798
799      # Inform the rabbitMQ queue
800      IGCM_debug_BigBro_Finalize
801
802      # And Good Bye
803      date
804      exit 1
805
806    elif [ X${TaskType} = Xpost-processing ] ; then
807      # Test if we need to stop the computing job
808      case ${postProcessingName} in
809      atlas*|monitoring*|metrics*)
810        [ ${postProcessingStopLevel} -gt 2 ] && StopAll=true ;;
811      create_*)
812        [ ${postProcessingStopLevel} -gt 1 ] && StopAll=true ;;
813      rebuild*|pack_*)
814        [ ${postProcessingStopLevel} -gt 0 ] && StopAll=true ;;
815      esac
816      # Notify the computing job that something wrong happened and stop it.
817      ( ${StopALL} ) && IGCM_card_WriteOption ${SUBMIT_DIR}/run.card Configuration PeriodState "Fatal.${Script_Post_Output}"
818
819      # If SpaceName is PROD we stop when post_processing failed
820      if [ X${config_UserChoices_SpaceName} = XPROD ] ; then
821        echo "                        EXIT THE POST-PROCESSING JOB."
822        echo
823        IGCM_debug_CallStack
824
825        # Inform the rabbitMQ queue
826        IGCM_debug_BigBro_Finalize
827
828        # And Good Bye
829        date
830        exit 1
831      else
832        echo "In config.card the variable SpaceName is not in PROD"
833        echo "              SO WE DO NOT EXIT THE JOB."
834        echo
835        date
836      fi
837    elif [ X${TaskType} = Xchecking ] ; then
838      echo "Nothing will happen for now"
839    fi
840  fi
841}
842
843#D-#==================================================================
844#D-function IGCM_debug_Print
845#D-* Purpose: Print arguments according to a level of verbosity.
846#D-
847function IGCM_debug_Print
848{
849  typeset level=$1
850  shift
851
852  if [ X"${1}" = X"-e" ]; then
853    typeset cmd_echo="echo -e"
854    shift
855  else
856    typeset cmd_echo="echo"
857  fi
858
859  if [ ${level} -le ${Verbosity} ] ; then
860    typeset i
861    case "${level}" in
862    1) for i in "$@" ; do
863      ${cmd_echo} $(date +"%Y-%m-%d %T") "--Debug1-->" ${i}
864      done ;;
865    2) for i in "$@" ; do
866      ${cmd_echo} $(date +"%Y-%m-%d %T") "--------Debug2-->" ${i}
867      done ;;
868    3) for i in "$@" ; do
869      ${cmd_echo} $(date +"%Y-%m-%d %T") "--------------Debug3-->" ${i}
870      done ;;
871    esac
872  fi
873}
874
875#D-#==================================================================
876#D-function IGCM_debug_PrintVariables
877#D-* Purpose: Print arguments when match a pattern
878#D-           according to a level of verbosity.
879function IGCM_debug_PrintVariables
880{
881  typeset level=$1
882  shift
883
884  list=$( set | grep ^$1 | sed -e "s/'//g" )
885
886  if [ "X${list}" != X ]  ; then
887    IGCM_debug_Print ${level} ${list}
888  fi
889}
890
891#D-#==================================================================
892#D-function IGCM_debug_PrintInfosActions
893#D-* Purpose: Print information related to instrumentation
894function IGCM_debug_PrintInfosActions
895{
896  typeset actionType=$1
897  typeset entitySize=$2
898  typeset start_ms=$3
899  typeset end_ms=$4
900
901  typeset dest=$5
902  typeset source=$6
903
904  typeset diff_ms entitySizeKo entitySizeMo flux_Ko_ms flux_Ko_s flux_Mo_s
905  typeset dirFrom dirTo
906
907  diff_ms=$(( $end_ms - $start_ms ))
908  # echo "diff_ms=$diff_ms"
909
910  entitySizeKo=$( echo ${entitySize} | gawk -F"|" '{print $1}' )
911  # echo "entitySizeKo=$entitySizeKo"
912  entitySizeMo=$( echo ${entitySize} | gawk -F"|" '{print $2}' )
913
914  # flux en Ko / ms
915  flux_Ko_ms=$( echo "scale=6;${entitySizeKo}/${diff_ms}" | bc )
916  # echo "flux_Ko_ms=$flux_Ko_ms"
917
918  # flux en Ko / s
919  flux_Ko_s=$(( $flux_Ko_ms * 1000 ))
920  # echo "flux_Ko_s=$flux_Ko_s"
921
922  # flux en Mo / s
923  flux_Mo_s=$( echo "scale=6;${flux_Ko_s}/1024" | bc )
924  # echo "flux_Mo_s=$flux_Mo_s"
925
926  if [ -d $dest ] ; then
927    dirTo=$( readlink -f ${dest} )
928  else
929    dirTo=$( readlink -f $( dirname ${dest} ) )
930  fi
931
932  if [ -d $source ] ; then
933    dirFrom=$( readlink -f ${source} )
934  else
935    dirFrom=$( readlink -f $( dirname ${source} ) )
936  fi
937
938  instrumentationContent=$( echo "\"actionName\":\"${actionType}\",\"size_Mo\":\"${entitySizeMo}\",\"duration_ms\":\"${diff_ms}\",\"throughput_Mo_s\":\"${flux_Mo_s}\",\"dirFrom\":\"${dirFrom}\",\"dirTo\":\"${dirTo}\"" )
939
940  if [ X${ActivateStackFilling} = Xtrue ] ; then
941    echo "{${instrumentationContent}}" >> ${StackFileLocation}/${StackFileName}
942  fi
943
944  # Inform the rabbitMQ queue
945  if [ X${ActivateBigBro} = Xtrue ] ; then
946    # RabbitMQ message body
947    Body=$( echo "{${genericSimulationID},\"msgCode\":\"7000\",\"msgUID\":\"$(uuidgen)\",${instrumentationContent},\"msgTimestamp\":\"$( date +"%Y-%m-%dT%H:%M:%S.%N%z" )\"}" )
948    # Fill the rabbitMQ queue
949    IGCM_debug_sendAMQP
950  fi
951}
952
953#D-#==================================================================
954#D-function IGCM_debug_Check
955#D- * Purpose: Check the present file by comparison with a reference file
956function IGCM_debug_Check
957{
958  #---------------------
959  if [ ! -n "${libIGCM}" ] ; then
960    echo "Check libIGCM_debug ..........................................[ FAILED ]"
961    echo "--Error--> libIGCM variable is not defined"
962    exit 2
963  fi
964
965  #---------------------
966  if [ ! -n "${Verbosity}" ] ; then
967    echo "Check libIGCM_debug ..........................................[ FAILED ]"
968    echo "--Error--> Verbosity variable is not defined"
969    exit 3
970  fi
971
972  #---------------------
973  # Need to remove timestamps here
974  diff ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref <(${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh | sed -e "s:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]\:[0-9][0-9]\:[0-9][0-9] ::g") > /dev/null 2>&1
975  status=$?
976
977  if [ ${status} -eq 0 ] ; then
978    echo "Check libIGCM_debug ..............................................[ OK ]"
979  else
980    echo "Check libIGCM_debug ..........................................[ FAILED ]"
981    echo "--Error--> Execution of ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh"
982    echo "           has produced the file IGCM_debug_Test.ref.failed"
983    echo "           Please analyse differences with the reference file by typing:"
984    echo "           diff IGCM_debug_Test.ref.failed ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref"
985    echo "           Report errors to the author: Patrick.Brockmann@cea.fr"
986    diff ${libIGCM}/libIGCM_debug/IGCM_debug_Test.ref <(${libIGCM}/libIGCM_debug/IGCM_debug_Test.ksh | sed -e "s:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]\:[0-9][0-9]\:[0-9][0-9] ::g")
987    exit 4
988  fi
989  #---------------------
990}
Note: See TracBrowser for help on using the repository browser.