source: trunk/libIGCM/AA_RunChecker @ 762

Last change on this file since 762 was 762, checked in by labetoulle, 12 years ago

RunChecker? :

  • Bugfix in -p option processing ;
  • Do not search for post-treatment jobs if job is "Waiting"
  • Property svn:executable set to *
  • Property svn:keywords set to Revision Author Date
File size: 15.8 KB
Line 
1#!/bin/ksh
2
3#**************************************************************
4# Author: Sonia Labetoulle
5# Contact: sonia.labetoulle__at__ipsl.jussieu.fr
6# $Revision::                                          $ Revision of last commit
7# $Author::                                            $ Author of last commit
8# $Date::                                              $ Date of last commit
9# IPSL (2012)
10#  This software is governed by the CeCILL licence see libIGCM/libIGCM_CeCILL.LIC
11#
12#**************************************************************
13
14#======================================================================#
15# Display a report on how a simulation is faring.
16
17
18function ChangeUsr {
19
20  echo $1 | sed -e "s/${CurrentUsr}/${TargetUsr}/" \
21                -e "s/${CurrentGrp}/${TargetGrp}/"
22
23}
24
25
26function SearchCatalog {
27
28  typeset num
29  unset SUBMIT_DIR
30
31  fg_new=false
32
33  if [ ! X${JobName} == X ] ; then
34    NbOcc=$( awk -v JobName=${JobName} \
35             'BEGIN {x=0}  $1 ~ JobName {++x} END {print x}' ${SimuCatalog} )
36  else
37    NbOcc=0
38  fi
39
40  if ( [ ${NbOcc} -eq 0 ] && ( $fg_path ) ) ; then
41    set -A FileList $( ls ${ConfigPath}/Job_* )
42    if [ X$FileList == X ] ; then
43      NbOcc=0
44    else
45      NbOcc=${#FileList[@]}
46      fg_new=true
47    fi
48  fi
49
50  if ( [ ${NbOcc} -eq 0 ] && ( ${fg_search} ) ) ; then
51    SEARCH_DIR=${WORKDIR}
52    if [ ${TargetUsr} != ${CurrentUsr} ] ; then
53      SEARCH_DIR=$( ChangeUsr ${SEARCH_DIR})
54    fi
55    echo "${JobName} not in Catalog, we'll try to find it in ${SEARCH_DIR}"
56
57    set -A FileList $( find ${SEARCH_DIR}/ \
58                            -path ${SEARCH_DIR}/IGCM_OUT -prune -o \
59                            -name Job_${JobName} -print )
60    if [ X$FileList == X ] ; then
61      NbOcc=0
62    else
63      NbOcc=${#FileList[@]}
64      fg_new=true
65    fi
66  fi
67
68  if [ ${NbOcc} -gt 1 ] ; then
69    echo "More than one job"
70    ind=0
71    while [ ${ind} -lt ${NbOcc} ] ; do
72      printf '%2i) %-30s\n' ${ind} ${FileList[${ind}]}
73      (( ind = ind + 1 ))
74    done
75    echo "Give your choice number or 'q' to quit : "
76    read Choice
77    if [ X${Choice} == Xq ] ; then
78      exit
79    else
80      fg_new=true
81      FileList=${FileList[${Choice}]}
82      NbOcc=1
83    fi
84  fi
85
86  case ${NbOcc} in
87    0)
88      echo "${JobName} not found."
89      echo "You can try : *) '-s' option to automatically search your \$WORKDIR, "
90      echo "              *) '-p' option to provide the config.card path, "
91      echo "              *)  manually editing your ${SimuCatalog}"
92      exit ;;
93    1)
94      if ( ${fg_new} ) ; then
95        JobName=${JobName:=$( basename ${FileList} | awk -F"_" '{ print $2 }' )}
96        SUBMIT_DIR=$( dirname ${FileList} )
97        echo "${JobName} ${TargetUsr} ${HostName} ${SUBMIT_DIR}"
98        echo "${JobName} ${TargetUsr} ${HostName} ${SUBMIT_DIR}" >> ${SimuCatalog}
99      elif ( [ ${TargetUsr} == $( awk -v JobName=${JobName} \
100                                      '$1 ~ JobName {print $2}' \
101                                      ${SimuCatalog} ) ] \
102          && [ ${HostName}  == $( awk -v JobName=${JobName} \
103                                      '$1 ~ JobName {print $3}' \
104                                      ${SimuCatalog} ) ] ) ; then
105        JobName=$( awk -v JobName=${JobName} '$1 ~ JobName {print $1}' ${SimuCatalog} )
106        SUBMIT_DIR=$( awk -v JobName=${JobName} '$1 ~ JobName {print $4}' ${SimuCatalog} )
107      else
108        echo "${JobName} not in Catalog."
109        exit
110      fi
111      break ;;
112    *)
113      break ;;
114  esac
115
116  return
117
118}
119
120
121function AffichResult {
122
123  fg_first=false
124  fg_last=false
125  fg_job=false
126
127  while [ $# -ne 0 ] ; do
128    case ${1} in
129      -f|--first)
130        fg_first=true
131        shift 1 ;;
132      -l|--last)
133        fg_last=true
134        shift 1 ;;
135      -j|--job)
136        fg_job=true
137        shift 1 ;;
138      -*)
139        echo "usage: ${0}"
140        echo "       options = -f; -l"
141        exit ;;
142      *)
143        break ;;
144    esac
145  done
146
147
148  # Define colors
149  # =============
150  ColEsc="\033["
151  ColNon="${ColEsc}0m"       # Return to normal
152  ColExp="${ColEsc}1m"       # Blanc - gras
153  ColFat="${ColEsc}1;31m"    # Fatal
154  ColCpl="${ColEsc}1;32m"    # Completed
155  ColAtt="${ColEsc}1;30m"    # Waiting
156  ColDef="${ColEsc}1;34m"    # Default
157  ColRbl="${ColEsc}31m"      # Rebuild
158
159
160 
161  # Print header
162  # ============
163  if ( ${fg_first} ) ; then
164    Dum=""
165    (( len = 67 - ${#JobName} ))
166    echo "|===========================================================================================================|"
167    printf "| JobName = ${ColExp}%-${#JobName}s${ColNon}" ${JobName}
168    printf "%-${len}s" ${Dum} 
169    printf "run.card : ${ColExp}%-18s${ColNon}|\n" "${LastWrite}"
170    echo "|-------------------------|-------------|-------------------------|-------------|-----:----------:----------|"
171    echo "|                         |             |                         |             |     Pending Rebuilds      |"
172    echo "| Date Begin - DateEnd    | PeriodState | Current Period          | CumulPeriod | Nb  : from     : to       |"
173    echo "|-------------------------|-------------|-------------------------|-------------|-----:----------:----------|"
174
175    printf "| %-10s - %-10s | " \
176           $DateBegin $DateEnd 
177
178    case $PeriodState in
179      Fatal)
180        Color=${ColFat}
181        break ;;
182      Completed)
183        Color=${ColCpl}
184        break ;;
185      Waiting|OnQueue)
186        Color=${ColAtt}
187        break ;;
188      *)
189        Color=${ColDef}
190        break ;;
191    esac
192    printf "${Color}%-11s${ColNon} | " $PeriodState
193
194    printf "%-10s - %-10s | %11s | " \
195           $PeriodDateBegin $PeriodDateEnd $CumulPeriod
196
197    if ( [ X${NbRebuild} != X. ] && [ X${NbRebuild} != X0 ] ) ; then
198      printf "${ColRbl}%3s : %-8s : %-8s${ColNon} |\n" \
199             $NbRebuild $FirstRebuild $LastRebuild
200    else
201      printf "%3s : %-8s : %-8s |\n" \
202             $NbRebuild $FirstRebuild $LastRebuild
203    fi
204
205    if [ ${NbLines} -gt 0 ] ; then
206      printf "|-----------------------------------------------------------------------------------------------------------|\n"
207      printf "|                                                      Last                                                 |\n"
208      printf "|     Rebuild      |   Pack_Output    |   Pack_Restart   |    Pack_Debug    |  Monitoring  |     Atlas      |\n"
209      printf "|------------------|------------------|------------------|------------------|--------------|----------------|\n"
210    fi
211
212    return
213  fi
214
215  # Print Post-processing job status
216  # ================================
217  if ( ${fg_job} ) ; then
218    printf "|"
219
220    # Print rebuild and pack jobs
221    # ---------------------------
222    for JobType in ${JobType_list[*]} ; do
223      eval Date=\${${JobType}_Date[${ind}]}
224      eval Status=\${${JobType}_Status[${ind}]}
225      eval Nb=\${${JobType}_Nb[${ind}]}
226
227      if [ X${Status} == XOK  ] ; then
228        Color=${ColCpl}
229      else
230        Color=${ColFat}
231      fi
232      printf "  ${Color}%-8s${ColNon} : %3s  |" ${Date} ${Nb}
233    done
234
235    Color=${ColExp}
236
237    # Print monitoring jobs
238    # ---------------------
239    JobType=monitoring
240    if [ $ind -eq 0 ] ; then
241      eval Date=\${${JobType}_Date}
242    else
243      Date=""
244    fi
245    printf "     ${Color}%-4s${ColNon}     |" ${Date}
246
247    # Print atlas jobs
248    # ----------------
249    JobType=atlas
250    eval Date=\${${JobType}_Date[${ind}]}
251    printf "  ${Color}%-12s${ColNon}  |" ${Date}
252
253    printf "\n"
254
255    return
256  fi
257
258  # Print footer
259  # ============
260  if ( ${fg_last} ) ; then
261    printf "|===========================================================================================================|\n"
262    date +"${DateFormat}"
263    return
264  fi
265
266}
267
268#======================================================================#
269
270#D- Task type (computing or post-processing)
271TaskType=post-processing
272typeset -i Verbosity=0
273
274CurrentUsr=$( whoami )
275CurrentGrp=$( groups $CurrentUsr | awk '{print $3}' )
276
277if ( [ $# -eq 0 ] ) ; then
278  $0 -h
279  exit
280fi
281
282# Arguments
283# =========
284# Default argument values
285# -----------------------
286TargetUsr=${CurrentUsr}
287HostName=$( hostname | sed -e "s/[0-9].*//" )
288
289fg_color=true
290fg_search=false
291fg_quiet=false
292fg_path=false
293NbHisto=10
294
295# Get arguments from command line
296# -------------------------------
297while [ $# -ne 0 ] ; do
298  case $1 in
299    -h|--help|-help)
300      echo "usage: $0 [-u user] [-q] [-j n] [-s] job_name"
301      echo "       $0 [-u user] [-q] [-j n] -p config.card_path"
302      echo ""
303      echo "options :"
304      echo "  -h : print this help and exit"
305      echo "  -u : owner of the job"
306      echo "  -q : quiet"
307      echo "  -j : print n post-processing jobs (default is 10)"
308      echo "  -s : search for a new job in \$WORKDIR and fill in "
309      echo "       the catalog before printing information"
310      echo "  -p : give the directory (absolute path) containing "
311      echo "       the config.card instead of the job name."
312      exit ;;
313#    -b|-nocolor)
314#      fg_color=false
315#      shift 1 ;;
316    -j|-job-number)
317      NbHisto="$2"
318      shift 2 ;;
319    -p|-config-path)
320      ConfigPath="$2"
321      fg_path=true
322      shift 2 ;;
323    -q|-quiet)
324      fg_quiet=true
325      shift 1 ;;
326    -s|-search)
327      fg_search=true
328      shift 1 ;;
329    -u|-user)
330      TargetUsr="$2"
331      shift 2 ;;
332    -*)
333      $0 -h
334      exit ;;
335    *)
336      break ;;
337  esac
338done
339
340
341if ( ( ! $fg_path ) && [ $# -lt 1 ] ) ; then
342  $0 -h
343  exit
344fi
345
346if ( ( ${fg_path} ) && ( ${fg_search} ) ) ; then
347  echo "You cannot use -s and -p at the same time"
348  exit
349fi
350
351
352# Load libIGCM library
353# ====================
354libIGCM=${libIGCM:=::modipsl::/libIGCM}
355
356. ${libIGCM}/libIGCM_debug/libIGCM_debug.ksh
357     ( ${DEBUG_debug} ) && IGCM_debug_Check
358. ${libIGCM}/libIGCM_card/libIGCM_card.ksh
359     ( ${DEBUG_debug} ) && IGCM_card_Check
360. ${libIGCM}/libIGCM_date/libIGCM_date.ksh
361     ( ${DEBUG_debug} ) && IGCM_date_Check
362#-------
363. ${libIGCM}/libIGCM_sys/libIGCM_sys.ksh
364. ${libIGCM}/libIGCM_config/libIGCM_config.ksh
365
366
367JobName=$1
368
369if ( ${fg_quiet} ) ; then
370  NbHisto=1
371fi
372
373echo "Target user = ${TargetUsr}"
374
375TargetGrp=$( groups $TargetUsr | awk '{print $3}' )
376
377# Define the catalog in which the known simulations are stored
378SimuCatalog="$( ccc_home )/.simucatalog.dat"
379if [ ! -s ${SimuCatalog} ] ; then
380  touch ${SimuCatalog}
381fi
382
383# Date format
384DateFormat="%d/%m/%y %R:%S"
385
386# Find SUBMIT_DIR in catalog
387# ==========================
388SearchCatalog
389
390if [ ! X${SUBMIT_DIR} == X ] ; then
391
392  echo "Submit:  >${SUBMIT_DIR}<"
393  cd $SUBMIT_DIR
394
395
396  # Extract usefull information from run.card and config.card
397  # =========================================================
398
399  RunFile="${SUBMIT_DIR}/run.card"
400  ConfFile="${SUBMIT_DIR}/config.card"
401
402  IGCM_config_CommonConfiguration ${SUBMIT_DIR}/config.card
403
404
405  if [ -s ${RunFile} ] ; then
406    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodState
407    PeriodState=${run_Configuration_PeriodState}
408  else
409    PeriodState="Waiting"
410  fi
411
412  if ( [ X${PeriodState} == XRunning ] || [ X${PeriodState} == XOnQueue ] ) ; then
413    NbRun=$( ccc_mstat -f | grep -c ${JobName} )
414
415    if [ ${NbRun} -eq 0 ] ; then
416      PeriodState="Fatal"
417    fi
418  fi
419
420  DateBegin=${config_UserChoices_DateBegin}
421  DateEnd=${config_UserChoices_DateEnd}
422  TagName=${config_UserChoices_TagName}
423  ExperimentName=${config_UserChoices_ExperimentName}
424  SpaceName=${config_UserChoices_SpaceName}
425
426
427  if ( [ ! X${PeriodState} == XWaiting ] && [ ! X${PeriodState} == XCompleted ] ) ; then
428    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodDateBegin
429    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodDateEnd
430  IGCM_card_DefineVariableFromOption ${RunFile} Configuration CumulPeriod
431    PeriodDateBegin=${run_Configuration_PeriodDateBegin}
432    PeriodDateEnd=${run_Configuration_PeriodDateEnd}
433    CumulPeriod=${run_Configuration_CumulPeriod}
434  else
435    PeriodDateBegin="."
436    PeriodDateEnd="."
437    CumulPeriod="."
438  fi
439
440  DATA_DIR=${R_SAVE}
441  POST_DIR=${R_BUFR}/Out
442  CWORK_DIR=${R_FIGR}
443  if [ X${config_Post_RebuildFromArchive} = Xtrue ] ; then
444    REBUILD_DIR=${R_SAVE}/TMP
445    RebuildJob="rebuild_fromArchive"
446  else
447    REBUILD_DIR=${BIG_DIR}/${config_UserChoices_TagName}/${config_UserChoices_JobName}
448    RebuildJob="rebuild_fromWorkdir"
449  fi
450
451  if [ ${TargetUsr} != ${CurrentUsr} ] ; then
452    DATA_DIR=$( ChangeUsr ${DATA_DIR})
453    POST_DIR=$( ChangeUsr ${POST_DIR} )
454    CWORK_DIR=$( ChangeUsr ${CWORK_DIR} )
455    REBUILD_DIR=$( ChangeUsr ${REBUILD_DIR} )
456  fi
457
458  echo "Data:    >${DATA_DIR}<"
459  echo "Rebuild: >${REBUILD_DIR}<"
460  echo "Post:    >${POST_DIR}<"
461  echo "Work:    >${CWORK_DIR}<"
462
463  if [ $PeriodState != "Waiting" ] ; then
464
465    # Check pending rebuilds
466    # ======================
467
468    set -A RebuildList $( find ${REBUILD_DIR}/ -name "REBUILD_*" | sort )
469    if [ ${#RebuildList[*]} -gt 0 ] ; then
470      NbRebuild=$( IGCM_sys_CountFileArchive ${REBUILD_DIR} )
471
472      FirstRebuild=$( basename ${RebuildList[0]} | cut -f2 -d\_ )
473      LastRebuild=$( basename ${RebuildList[ (( NbRebuild=${NbRebuild}-1 )) ]} | cut -f2 -d\_ )
474    else
475      NbRebuild="."
476
477      FirstRebuild="."
478      LastRebuild="."
479    fi
480  else
481    NbRebuild="."
482
483    FirstRebuild="."
484    LastRebuild="."
485  fi
486
487
488  if [ $PeriodState != "Waiting" ] ; then
489
490    # Check last REBUILD and PACK* jobs
491    # =================================
492    # Define input parameters
493    # -----------------------
494    set -A JobType_list "${RebuildJob}" "pack_output" "pack_restart" "pack_debug"
495
496    for JobType in ${JobType_list[*]} ; do
497      typeset    name1="${JobType}_String"
498      typeset    name2="${JobType}_Field"
499      if [ X${JobType} == X${RebuildJob} ] ; then
500        eval ${name1}=IGCM_sys_PutBuffer_Out
501        eval ${name2}=4
502      else
503        eval ${name1}=IGCM_sys_Put_Out
504        eval ${name2}=3
505      fi
506    done
507
508    # Check jobs
509    # ----------
510    NbLines=0
511    for JobType in ${JobType_list[*]} ; do
512      eval String=\${${JobType}_String}
513      eval Field=\${${JobType}_Field}
514
515      set -A FileList $( ls ${POST_DIR}/${JobType}.*.out | tail -n ${NbHisto} )
516
517      if [ ${#FileList[*]} -gt ${NbLines} ] ; then
518        NbLines=${#FileList[*]}
519      fi
520
521      (( ind = 0 ))
522      for FileName in ${FileList[*]} ; do
523        LastDate=$( basename ${FileName} | awk -F"." '{ print $(NF-1) }' )
524
525        Error=$( awk -v String=${String} \
526                     'BEGIN { x=0 } ($1~String) && ($3~"error.") { x=x+1 } END { print x }' \
527                     ${POST_DIR}/${JobType}.${LastDate}.out )
528
529        Match=$( awk -v String=${String} \
530                     'BEGIN { x=0 } ($1~String) && ($3!~"error.") { x=x+1 } END { print x }' \
531                     ${POST_DIR}/${JobType}.${LastDate}.out )
532        (( Nb = ${Match} - ${Error} ))
533
534        if ( [ ${Error} -eq 0 ] && [ ${Nb} -gt 0 ] ) ; then
535          Status=OK
536        else
537          Status=KO
538        fi
539
540        eval ${JobType}_Date[$ind]=${LastDate}
541        eval ${JobType}_Status[$ind]=${Status}
542        eval ${JobType}_Nb[$ind]=${Nb}
543       
544        (( ind = ind + 1 ))
545      done
546    done
547
548    # Check last MONITORING jobs
549    # ==========================
550    JobType=monitoring
551    if [ -d ${CWORK_DIR}/MONITORING ] ; then
552      LastDate=$( cdo showyear ${CWORK_DIR}/MONITORING/files/ATM_bils_global_ave.nc 2> /dev/null | \
553                      awk '{ print $NF }' )
554      eval ${JobType}_Date=${LastDate}
555    fi
556
557    # Check last ATLAS jobs
558    # =====================
559    JobType=atlas
560    if [ -d ${CWORK_DIR}/ATLAS ] ; then
561      set -A FileList $( ls ${CWORK_DIR}/ATLAS | tail -n ${NbHisto} )
562
563      if [ ${#FileList[*]} -gt ${NbLines} ] ; then
564        NbLines=${#FileList[*]}
565      fi
566
567      (( ind = 0 ))
568      for FileName in ${FileList[*]} ; do
569        eval ${JobType}_Date[$ind]=${FileName}
570        (( ind = ind + 1 ))
571      done
572    fi
573
574
575    # Time of last write on run.card
576    # ==============================
577    LastWrite=$( ls -l --time-style=+"${DateFormat}" ${SUBMIT_DIR}/run.card | awk '{print $6 " " $7}' )
578
579  else
580
581    NbLines=0
582
583  fi
584   
585
586  # Print results
587  # =============
588  AffichResult -f
589  ind=0
590  while [ $ind -lt $NbLines ] ; do
591    AffichResult -j
592    (( ind = ind + 1 ))
593  done
594  AffichResult -l
595
596fi
597
Note: See TracBrowser for help on using the repository browser.