source: trunk/libIGCM/AA_RunChecker @ 834

Last change on this file since 834 was 834, checked in by labetoulle, 11 years ago

Look for the right string to check rebuild jobs when SpaceName?=TEST. See #106.

  • Property svn:executable set to *
  • Property svn:keywords set to Revision Author Date
File size: 16.7 KB
Line 
1#!/bin/ksh
2
3#**************************************************************
4# Author: Sonia Labetoulle
5# Contact: sonia.labetoulle__at__ipsl.jussieu.fr
6# $Revision::                                          $ Revision of last commit
7# $Author::                                            $ Author of last commit
8# $Date::                                              $ Date of last commit
9# IPSL (2012)
10#  This software is governed by the CeCILL licence see libIGCM/libIGCM_CeCILL.LIC
11#
12#**************************************************************
13
14#======================================================================#
15# Display a report on how a simulation is faring.
16
17
18function ChangeUsr {
19
20  CurrentGrp=$( groups $CurrentUsr | gawk '{print $3}' )
21  TargetGrp=$( groups $TargetUsr | gawk '{print $3}' )
22
23  echo $1 | sed -e "s/${CurrentUsr}/${TargetUsr}/" \
24                -e "s/${CurrentGrp}/${TargetGrp}/"
25
26}
27
28
29function SearchCatalog {
30
31  typeset num
32  unset SUBMIT_DIR
33
34  fg_new=false
35
36  if [ ! X${JobName} == X ] ; then
37    NbOcc=$( gawk -v JobName=${JobName} \
38             'BEGIN {x=0}  $1 ~ JobName {++x} END {print x}' ${SimuCatalog} )
39  else
40    NbOcc=0
41  fi
42
43  if ( [ ${NbOcc} -eq 0 ] && ( $fg_path ) ) ; then
44    set -A FileList $( ls ${ConfigPath}/Job_* )
45    if [ X$FileList == X ] ; then
46      NbOcc=0
47    else
48      NbOcc=${#FileList[@]}
49      fg_new=true
50    fi
51  fi
52
53  if ( [ ${NbOcc} -eq 0 ] && ( ${fg_search} ) ) ; then
54    SEARCH_DIR=${WORKDIR}
55    if [ ${TargetUsr} != ${CurrentUsr} ] ; then
56      SEARCH_DIR=$( ChangeUsr ${SEARCH_DIR})
57    fi
58    echo "${JobName} not in Catalog, we'll try to find it in ${SEARCH_DIR}"
59
60    set -A FileList $( find ${SEARCH_DIR}/ \
61                            -path ${SEARCH_DIR}/IGCM_OUT -prune -o \
62                            -name Job_${JobName} -print )
63    if [ X$FileList == X ] ; then
64      NbOcc=0
65    else
66      NbOcc=${#FileList[@]}
67      fg_new=true
68    fi
69  fi
70
71  if [ ${NbOcc} -gt 1 ] ; then
72    echo "More than one job"
73    ind=0
74    while [ ${ind} -lt ${NbOcc} ] ; do
75      printf '%2i) %-30s\n' ${ind} ${FileList[${ind}]}
76      (( ind = ind + 1 ))
77    done
78    echo "Give your choice number or 'q' to quit : "
79    read Choice
80    if [ X${Choice} == Xq ] ; then
81      exit
82    else
83      fg_new=true
84      FileList=${FileList[${Choice}]}
85      NbOcc=1
86    fi
87  fi
88
89  case ${NbOcc} in
90    0)
91      echo "${JobName} not found."
92      echo "You can try : *) '-s' option to automatically search your \$WORKDIR, "
93      echo "              *) '-p' option to provide the config.card path, "
94      echo "              *)  manually editing your ${SimuCatalog}"
95      exit ;;
96    1)
97      if ( ${fg_new} ) ; then
98        JobName=${JobName:=$( basename ${FileList} | gawk -F"_" '{ print $2 }' )}
99        SUBMIT_DIR=$( dirname ${FileList} )
100        echo "${JobName} ${TargetUsr} ${HostName} ${SUBMIT_DIR}"
101        echo "${JobName} ${TargetUsr} ${HostName} ${SUBMIT_DIR}" >> ${SimuCatalog}
102        sort -u ${SimuCatalog} > ${SimuCatalog}.tmp
103        mv ${SimuCatalog}.tmp ${SimuCatalog}
104      elif ( [ ${TargetUsr} == $( gawk -v JobName=${JobName} \
105                                      '$1 ~ JobName {print $2}' \
106                                      ${SimuCatalog} ) ] \
107          && [ ${HostName}  == $( gawk -v JobName=${JobName} \
108                                      '$1 ~ JobName {print $3}' \
109                                      ${SimuCatalog} ) ] ) ; then
110        JobName=$( gawk -v JobName=${JobName} '$1 ~ JobName {print $1}' ${SimuCatalog} )
111        SUBMIT_DIR=$( gawk -v JobName=${JobName} '$1 ~ JobName {print $4}' ${SimuCatalog} )
112      else
113        echo "${JobName} not in Catalog."
114        exit
115      fi
116      break ;;
117    *)
118      break ;;
119  esac
120
121  return
122
123}
124
125
126function AffichResult {
127
128  fg_first=false
129  fg_last=false
130  fg_job=false
131
132  while [ $# -ne 0 ] ; do
133    case ${1} in
134      -f|--first)
135        fg_first=true
136        shift 1 ;;
137      -l|--last)
138        fg_last=true
139        shift 1 ;;
140      -j|--job)
141        fg_job=true
142        shift 1 ;;
143      -*)
144        echo "usage: ${0}"
145        echo "       options = -f; -l"
146        exit ;;
147      *)
148        break ;;
149    esac
150  done
151
152
153  # Define colors
154  # =============
155  ColEsc="\033["
156  ColNon="${ColEsc}0m"       # Return to normal
157  ColExp="${ColEsc}1m"       # Blanc - gras
158  ColFat="${ColEsc}1;31m"    # Fatal
159  ColCpl="${ColEsc}1;32m"    # Completed
160  ColAtt="${ColEsc}1;30m"    # Waiting
161  ColDef="${ColEsc}1;34m"    # Default
162  ColRbl="${ColEsc}31m"      # Rebuild
163
164
165
166  # Print header
167  # ============
168  if ( ${fg_first} ) ; then
169    Dum=""
170    (( len = 67 - ${#JobName} ))
171    echo "|===========================================================================================================|"
172    printf "| JobName = ${ColExp}%-${#JobName}s${ColNon}" ${JobName}
173    printf "%-${len}s" ${Dum} 
174    printf "run.card : ${ColExp}%-18s${ColNon}|\n" "${LastWrite}"
175    echo "|-------------------------|-------------|-------------------------|-------------|-----:----------:----------|"
176    echo "|                         |             |                         |             |     Pending Rebuilds      |"
177    echo "| Date Begin - DateEnd    | PeriodState | Current Period          | CumulPeriod | Nb  : from     : to       |"
178    echo "|-------------------------|-------------|-------------------------|-------------|-----:----------:----------|"
179
180    printf "| %-10s - %-10s | " \
181           $DateBegin $DateEnd 
182
183    case $PeriodState in
184      Fatal)
185        Color=${ColFat}
186        break ;;
187      Completed)
188        Color=${ColCpl}
189        break ;;
190      Waiting|OnQueue)
191        Color=${ColAtt}
192        break ;;
193      *)
194        Color=${ColDef}
195        break ;;
196    esac
197    printf "${Color}%-11s${ColNon} | " $PeriodState
198
199    printf "%-10s - %-10s | %11s | " \
200           $PeriodDateBegin $PeriodDateEnd $CumulPeriod
201
202    if ( [ X${NbRebuild} != X. ] && [ X${NbRebuild} != X0 ] ) ; then
203      printf "${ColRbl}%3s : %-8s : %-8s${ColNon} |\n" \
204             $NbRebuild $FirstRebuild $LastRebuild
205    else
206      printf "%3s : %-8s : %-8s |\n" \
207             $NbRebuild $FirstRebuild $LastRebuild
208    fi
209
210    if [ ${NbLines} -gt 0 ] ; then
211      printf "|-----------------------------------------------------------------------------------------------------------|\n"
212      printf "|                                                      Last                                                 |\n"
213      printf "|     Rebuild      |   Pack_Output    |   Pack_Restart   |    Pack_Debug    |  Monitoring  |     Atlas      |\n"
214      printf "|------------------|------------------|------------------|------------------|--------------|----------------|\n"
215    fi
216
217    return
218  fi
219
220  # Print Post-processing job status
221  # ================================
222  if ( ${fg_job} ) ; then
223    printf "|"
224
225    # Print rebuild and pack jobs
226    # ---------------------------
227    for JobType in ${JobType_list[*]} ; do
228      eval Date=\${${JobType}_Date[${ind}]}
229      eval Status=\${${JobType}_Status[${ind}]}
230      eval Nb=\${${JobType}_Nb[${ind}]}
231
232      if [ X${Status} == XOK  ] ; then
233        Color=${ColCpl}
234      else
235        Color=${ColFat}
236      fi
237      printf "  ${Color}%-8s${ColNon} : %3s  |" ${Date} ${Nb}
238    done
239
240    Color=${ColExp}
241
242    # Print monitoring jobs
243    # ---------------------
244    JobType=monitoring
245    if [ $ind -eq 0 ] ; then
246      eval Date=\${${JobType}_Date}
247    else
248      Date=""
249    fi
250    printf "     ${Color}%-4s${ColNon}     |" ${Date}
251
252    # Print atlas jobs
253    # ----------------
254    JobType=atlas
255    eval Date=\${${JobType}_Date[${ind}]}
256    printf "  ${Color}%-12s${ColNon}  |" ${Date}
257
258    printf "\n"
259
260    return
261  fi
262
263  # Print footer
264  # ============
265  if ( ${fg_last} ) ; then
266    printf "|===========================================================================================================|\n"
267    date +"${DateFormat}"
268    return
269  fi
270
271}
272
273#======================================================================#
274
275#D- Task type (computing ,post-processing or checking)
276TaskType=checking
277typeset -i Verbosity=0
278
279CurrentUsr=$( whoami )
280
281if ( [ $# -eq 0 ] ) ; then
282  $0 -h
283  exit
284fi
285
286# Arguments
287# =========
288# Default argument values
289# -----------------------
290TargetUsr=${CurrentUsr}
291HostName=$( hostname | sed -e "s/[0-9].*//" )
292
293fg_color=true
294fg_search=false
295fg_quiet=false
296fg_path=false
297NbHisto=20
298
299# Get arguments from command line
300# -------------------------------
301while [ $# -ne 0 ] ; do
302  case $1 in
303    -h|--help|-help)
304      echo "usage: $0 [-u user] [-q] [-j n] [-s] job_name"
305      echo "       $0 [-u user] [-q] [-j n] -p config.card_path"
306      echo ""
307      echo "options :"
308      echo "  -h : print this help and exit"
309      echo "  -u : owner of the job"
310      echo "  -q : quiet"
311      echo "  -j : print n post-processing jobs (default is 20)"
312      echo "  -s : search for a new job in \$WORKDIR and fill in "
313      echo "       the catalog before printing information"
314      echo "  -p : give the directory (absolute path) containing "
315      echo "       the config.card instead of the job name."
316      exit ;;
317    -j|-job-number)
318      NbHisto="$2"
319      shift 2 ;;
320    -p|-config-path)
321      ConfigPath="$2"
322      fg_path=true
323      shift 2 ;;
324    -q|-quiet)
325      fg_quiet=true
326      shift 1 ;;
327    -s|-search)
328      fg_search=true
329      shift 1 ;;
330    -u|-user)
331      TargetUsr="$2"
332      shift 2 ;;
333    -*)
334      $0 -h
335      exit ;;
336    *)
337      break ;;
338  esac
339done
340
341
342if ( ( ! $fg_path ) && [ $# -lt 1 ] ) ; then
343  $0 -h
344  exit
345fi
346
347if ( ( ${fg_path} ) && ( ${fg_search} ) ) ; then
348  echo "You cannot use -s and -p at the same time"
349  exit
350fi
351
352
353# Load libIGCM library
354# ====================
355libIGCM=${libIGCM:=::modipsl::/libIGCM}
356
357DEBUG_debug=false
358
359. ${libIGCM}/libIGCM_debug/libIGCM_debug.ksh
360. ${libIGCM}/libIGCM_card/libIGCM_card.ksh
361. ${libIGCM}/libIGCM_date/libIGCM_date.ksh
362#-------
363. ${libIGCM}/libIGCM_sys/libIGCM_sys.ksh
364. ${libIGCM}/libIGCM_config/libIGCM_config.ksh
365#-------
366( ${DEBUG_debug} ) && IGCM_debug_Check
367( ${DEBUG_debug} ) && IGCM_card_Check
368( ${DEBUG_debug} ) && IGCM_date_Check
369
370JobName=$1
371
372if ( ${fg_quiet} ) ; then
373  NbHisto=1
374fi
375
376echo "Target user = ${TargetUsr}"
377
378# Define the catalog in which the known simulations are stored
379SimuCatalog="${HOME}/.simucatalog.dat"
380if [ ! -s ${SimuCatalog} ] ; then
381  touch ${SimuCatalog}
382fi
383
384# Date format
385DateFormat="%d/%m/%y %R:%S"
386
387# Find SUBMIT_DIR in catalog
388# ==========================
389SearchCatalog
390
391if [ ! X${SUBMIT_DIR} == X ] ; then
392
393  echo "Submit:  >${SUBMIT_DIR}<"
394  cd $SUBMIT_DIR
395
396
397  # Extract usefull information from run.card and config.card
398  # =========================================================
399
400  RunFile="${SUBMIT_DIR}/run.card"
401  ConfFile="${SUBMIT_DIR}/config.card"
402
403  IGCM_config_CommonConfiguration ${SUBMIT_DIR}/config.card
404
405
406  if [ -s ${RunFile} ] ; then
407    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodState
408    PeriodState=${run_Configuration_PeriodState}
409  else
410    PeriodState="Waiting"
411  fi
412
413  if ( [ X${PeriodState} == XRunning ] || [ X${PeriodState} == XOnQueue ] ) ; then
414    #NbRun=$( ccc_mstat -f | grep -c ${JobName} )
415    IGCM_sys_CountJobInQueue ${JobName} NbRun
416
417    if [ ${NbRun} -eq 0 ] ; then
418      PeriodState="Fatal"
419    fi
420  fi
421
422  DateBegin=${config_UserChoices_DateBegin}
423  DateEnd=${config_UserChoices_DateEnd}
424  TagName=${config_UserChoices_TagName}
425  ExperimentName=${config_UserChoices_ExperimentName}
426  SpaceName=${config_UserChoices_SpaceName}
427
428
429  if ( [ ! X${PeriodState} == XWaiting ] && [ ! X${PeriodState} == XCompleted ] ) ; then
430    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodDateBegin
431    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodDateEnd
432    IGCM_card_DefineVariableFromOption ${RunFile} Configuration CumulPeriod
433    PeriodDateBegin=${run_Configuration_PeriodDateBegin}
434    PeriodDateEnd=${run_Configuration_PeriodDateEnd}
435    CumulPeriod=${run_Configuration_CumulPeriod}
436  else
437    PeriodDateBegin="."
438    PeriodDateEnd="."
439    CumulPeriod="."
440  fi
441
442  DATA_DIR=${R_SAVE}
443  POST_DIR=${R_BUFR}/Out
444  CWORK_DIR=${R_FIGR}
445  if [ X${config_Post_RebuildFromArchive} = Xtrue ] ; then
446    RebuildJob="rebuild_fromArchive"
447  else
448    RebuildJob="rebuild_fromWorkdir"
449  fi
450
451  if [ ${TargetUsr} != ${CurrentUsr} ] ; then
452    DATA_DIR=$( ChangeUsr ${DATA_DIR})
453    POST_DIR=$( ChangeUsr ${POST_DIR} )
454    CWORK_DIR=$( ChangeUsr ${CWORK_DIR} )
455    REBUILD_DIR=$( ChangeUsr ${REBUILD_DIR} )
456  fi
457
458  echo "Data:    >${DATA_DIR}<"
459  echo "Rebuild: >${REBUILD_DIR}<"
460  echo "Post:    >${POST_DIR}<"
461  echo "Work:    >${CWORK_DIR}<"
462
463  # Are packs activated or not ?
464  # ============================
465  if ( [ ! X${config_Post_PackFrequency} = X${NULL_STR} ] && \
466       [ ! X${config_Post_PackFrequency} = XNONE ] ) ; then
467    Pack=true
468  else
469    Pack=false
470  fi
471
472
473  if [ $PeriodState != "Waiting" ] ; then
474
475    # Check pending rebuilds
476    # ======================
477
478    set -A RebuildList $( find ${REBUILD_DIR}/ -name "REBUILD_*" | sort )
479    if [ ${#RebuildList[*]} -gt 0 ] ; then
480      NbRebuild=$( IGCM_sys_CountFileArchive ${REBUILD_DIR} )
481
482      FirstRebuild=$( basename ${RebuildList[0]} | cut -f2 -d\_ )
483      LastRebuild=$( basename ${RebuildList[ (( NbRebuild=${NbRebuild}-1 )) ]} | cut -f2 -d\_ )
484    else
485      NbRebuild="."
486
487      FirstRebuild="."
488      LastRebuild="."
489    fi
490  else
491    NbRebuild="."
492
493    FirstRebuild="."
494    LastRebuild="."
495  fi
496
497
498  if [ $PeriodState != "Waiting" ] ; then
499
500    # Check last REBUILD and PACK* jobs
501    # =================================
502    # Define input parameters
503    # -----------------------
504    set -A JobType_list "${RebuildJob}" "pack_output" "pack_restart" "pack_debug"
505
506    for JobType in ${JobType_list[*]} ; do
507      typeset    name1="${JobType}_String"
508      typeset    name2="${JobType}_Field"
509      typeset    name3="${JobType}_Activ"
510      if [ X${JobType} == X${RebuildJob} ] ; then
511        if ( ${Pack} ) ; then
512          eval ${name1}=IGCM_sys_PutBuffer_Out
513        else
514          eval ${name1}=IGCM_sys_Put_Out
515        fi
516        eval ${name2}=4
517        eval ${name3}=true
518      else
519        eval ${name1}=IGCM_sys_Put_Out
520        eval ${name2}=3
521        if ( ${Pack} ) ; then
522          eval ${name3}=true
523        else
524          eval ${name3}=false
525        fi
526      fi
527    done
528
529    # Check jobs
530    # ----------
531    NbLines=0
532    for JobType in ${JobType_list[*]} ; do
533      eval String=\${${JobType}_String}
534      eval Field=\${${JobType}_Field}
535      eval Activ=\${${JobType}_Activ}
536
537      if ( ${Activ} ) ; then
538
539        set -A FileList $( ls ${POST_DIR}/${JobType}.*.out | tail -n ${NbHisto} )
540
541        if [ ${#FileList[*]} -gt ${NbLines} ] ; then
542          NbLines=${#FileList[*]}
543        fi
544
545        (( ind = 0 ))
546        for FileName in ${FileList[*]} ; do
547          LastDate=$( basename ${FileName} | gawk -F"." '{ print $(NF-1) }' )
548
549          set -- $( gawk -v String=${String} \
550                         'BEGIN { nb_ok = 0 ; nb_ko = 0 } \
551                         ($1 ~ String) { \
552                           if ($3 !~ /error./) { \
553                             nb_ok = nb_ok + 1 \
554                           } else { \
555                             nb_ko = nb_ko + 1 \
556                           } \
557                         } \
558                         END { print nb_ok " " nb_ko }' \
559                         ${POST_DIR}/${JobType}.${LastDate}.out )
560          Match=$1
561          Error=$2
562
563          (( Nb = ${Match} - ${Error} ))
564
565          if ( [ ${Error} -eq 0 ] && [ ${Nb} -gt 0 ] ) ; then
566            Status=OK
567          else
568            Status=KO
569          fi
570
571          eval ${JobType}_Date[$ind]=${LastDate}
572          eval ${JobType}_Status[$ind]=${Status}
573          eval ${JobType}_Nb[$ind]=${Nb}
574
575          (( ind = ind + 1 ))
576        done
577
578      else
579
580          eval ${JobType}_Date[0]=""
581          eval ${JobType}_Status[0]=""
582          eval ${JobType}_Nb[0]=""
583
584      fi
585
586    done
587
588    # Check last MONITORING jobs
589    # ==========================
590    JobType=monitoring
591    if [ -d ${CWORK_DIR}/MONITORING ] ; then
592      LastDate=$( cdo showyear ${CWORK_DIR}/MONITORING/files/ATM_bils_global_ave.nc 2> /dev/null | \
593                      gawk '{ print $NF }' )
594      eval ${JobType}_Date=${LastDate}
595    fi
596
597    # Check last ATLAS jobs
598    # =====================
599    JobType=atlas
600    if [ -d ${CWORK_DIR}/ATLAS ] ; then
601      set -A FileList $( ls ${CWORK_DIR}/ATLAS | tail -n ${NbHisto} )
602
603      if [ ${#FileList[*]} -gt ${NbLines} ] ; then
604        NbLines=${#FileList[*]}
605      fi
606
607      (( ind = 0 ))
608      for FileName in ${FileList[*]} ; do
609        eval ${JobType}_Date[$ind]=${FileName}
610        (( ind = ind + 1 ))
611      done
612    fi
613
614
615    # Time of last write on run.card
616    # ==============================
617    LastWrite=$( ls -l --time-style=+"${DateFormat}" ${SUBMIT_DIR}/run.card | gawk '{print $6 " " $7}' )
618
619  else
620
621    NbLines=0
622
623  fi
624
625
626  # Print results
627  # =============
628  AffichResult -f
629  ind=0
630  while [ $ind -lt $NbLines ] ; do
631    AffichResult -j
632    (( ind = ind + 1 ))
633  done
634  AffichResult -l
635
636fi
637
Note: See TracBrowser for help on using the repository browser.