source: tags/libIGCM_v2.0_rc2/AA_RunChecker @ 764

Last change on this file since 764 was 763, checked in by labetoulle, 11 years ago

RunChecker? : use sort -u to make sure there are no duplicate lines in the simulation catalog.

  • Property svn:executable set to *
  • Property svn:keywords set to Revision Author Date
File size: 15.9 KB
Line 
1#!/bin/ksh
2
3#**************************************************************
4# Author: Sonia Labetoulle
5# Contact: sonia.labetoulle__at__ipsl.jussieu.fr
6# $Revision::                                          $ Revision of last commit
7# $Author::                                            $ Author of last commit
8# $Date::                                              $ Date of last commit
9# IPSL (2012)
10#  This software is governed by the CeCILL licence see libIGCM/libIGCM_CeCILL.LIC
11#
12#**************************************************************
13
14#======================================================================#
15# Display a report on how a simulation is faring.
16
17
18function ChangeUsr {
19
20  echo $1 | sed -e "s/${CurrentUsr}/${TargetUsr}/" \
21                -e "s/${CurrentGrp}/${TargetGrp}/"
22
23}
24
25
26function SearchCatalog {
27
28  typeset num
29  unset SUBMIT_DIR
30
31  fg_new=false
32
33  if [ ! X${JobName} == X ] ; then
34    NbOcc=$( awk -v JobName=${JobName} \
35             'BEGIN {x=0}  $1 ~ JobName {++x} END {print x}' ${SimuCatalog} )
36  else
37    NbOcc=0
38  fi
39
40  if ( [ ${NbOcc} -eq 0 ] && ( $fg_path ) ) ; then
41    set -A FileList $( ls ${ConfigPath}/Job_* )
42    if [ X$FileList == X ] ; then
43      NbOcc=0
44    else
45      NbOcc=${#FileList[@]}
46      fg_new=true
47    fi
48  fi
49
50  if ( [ ${NbOcc} -eq 0 ] && ( ${fg_search} ) ) ; then
51    SEARCH_DIR=${WORKDIR}
52    if [ ${TargetUsr} != ${CurrentUsr} ] ; then
53      SEARCH_DIR=$( ChangeUsr ${SEARCH_DIR})
54    fi
55    echo "${JobName} not in Catalog, we'll try to find it in ${SEARCH_DIR}"
56
57    set -A FileList $( find ${SEARCH_DIR}/ \
58                            -path ${SEARCH_DIR}/IGCM_OUT -prune -o \
59                            -name Job_${JobName} -print )
60    if [ X$FileList == X ] ; then
61      NbOcc=0
62    else
63      NbOcc=${#FileList[@]}
64      fg_new=true
65    fi
66  fi
67
68  if [ ${NbOcc} -gt 1 ] ; then
69    echo "More than one job"
70    ind=0
71    while [ ${ind} -lt ${NbOcc} ] ; do
72      printf '%2i) %-30s\n' ${ind} ${FileList[${ind}]}
73      (( ind = ind + 1 ))
74    done
75    echo "Give your choice number or 'q' to quit : "
76    read Choice
77    if [ X${Choice} == Xq ] ; then
78      exit
79    else
80      fg_new=true
81      FileList=${FileList[${Choice}]}
82      NbOcc=1
83    fi
84  fi
85
86  case ${NbOcc} in
87    0)
88      echo "${JobName} not found."
89      echo "You can try : *) '-s' option to automatically search your \$WORKDIR, "
90      echo "              *) '-p' option to provide the config.card path, "
91      echo "              *)  manually editing your ${SimuCatalog}"
92      exit ;;
93    1)
94      if ( ${fg_new} ) ; then
95        JobName=${JobName:=$( basename ${FileList} | awk -F"_" '{ print $2 }' )}
96        SUBMIT_DIR=$( dirname ${FileList} )
97        echo "${JobName} ${TargetUsr} ${HostName} ${SUBMIT_DIR}"
98        echo "${JobName} ${TargetUsr} ${HostName} ${SUBMIT_DIR}" >> ${SimuCatalog}
99        sort -u ${SimuCatalog} > ${SimuCatalog}.tmp
100        mv ${SimuCatalog}.tmp ${SimuCatalog}
101      elif ( [ ${TargetUsr} == $( awk -v JobName=${JobName} \
102                                      '$1 ~ JobName {print $2}' \
103                                      ${SimuCatalog} ) ] \
104          && [ ${HostName}  == $( awk -v JobName=${JobName} \
105                                      '$1 ~ JobName {print $3}' \
106                                      ${SimuCatalog} ) ] ) ; then
107        JobName=$( awk -v JobName=${JobName} '$1 ~ JobName {print $1}' ${SimuCatalog} )
108        SUBMIT_DIR=$( awk -v JobName=${JobName} '$1 ~ JobName {print $4}' ${SimuCatalog} )
109      else
110        echo "${JobName} not in Catalog."
111        exit
112      fi
113      break ;;
114    *)
115      break ;;
116  esac
117
118  return
119
120}
121
122
123function AffichResult {
124
125  fg_first=false
126  fg_last=false
127  fg_job=false
128
129  while [ $# -ne 0 ] ; do
130    case ${1} in
131      -f|--first)
132        fg_first=true
133        shift 1 ;;
134      -l|--last)
135        fg_last=true
136        shift 1 ;;
137      -j|--job)
138        fg_job=true
139        shift 1 ;;
140      -*)
141        echo "usage: ${0}"
142        echo "       options = -f; -l"
143        exit ;;
144      *)
145        break ;;
146    esac
147  done
148
149
150  # Define colors
151  # =============
152  ColEsc="\033["
153  ColNon="${ColEsc}0m"       # Return to normal
154  ColExp="${ColEsc}1m"       # Blanc - gras
155  ColFat="${ColEsc}1;31m"    # Fatal
156  ColCpl="${ColEsc}1;32m"    # Completed
157  ColAtt="${ColEsc}1;30m"    # Waiting
158  ColDef="${ColEsc}1;34m"    # Default
159  ColRbl="${ColEsc}31m"      # Rebuild
160
161
162 
163  # Print header
164  # ============
165  if ( ${fg_first} ) ; then
166    Dum=""
167    (( len = 67 - ${#JobName} ))
168    echo "|===========================================================================================================|"
169    printf "| JobName = ${ColExp}%-${#JobName}s${ColNon}" ${JobName}
170    printf "%-${len}s" ${Dum} 
171    printf "run.card : ${ColExp}%-18s${ColNon}|\n" "${LastWrite}"
172    echo "|-------------------------|-------------|-------------------------|-------------|-----:----------:----------|"
173    echo "|                         |             |                         |             |     Pending Rebuilds      |"
174    echo "| Date Begin - DateEnd    | PeriodState | Current Period          | CumulPeriod | Nb  : from     : to       |"
175    echo "|-------------------------|-------------|-------------------------|-------------|-----:----------:----------|"
176
177    printf "| %-10s - %-10s | " \
178           $DateBegin $DateEnd 
179
180    case $PeriodState in
181      Fatal)
182        Color=${ColFat}
183        break ;;
184      Completed)
185        Color=${ColCpl}
186        break ;;
187      Waiting|OnQueue)
188        Color=${ColAtt}
189        break ;;
190      *)
191        Color=${ColDef}
192        break ;;
193    esac
194    printf "${Color}%-11s${ColNon} | " $PeriodState
195
196    printf "%-10s - %-10s | %11s | " \
197           $PeriodDateBegin $PeriodDateEnd $CumulPeriod
198
199    if ( [ X${NbRebuild} != X. ] && [ X${NbRebuild} != X0 ] ) ; then
200      printf "${ColRbl}%3s : %-8s : %-8s${ColNon} |\n" \
201             $NbRebuild $FirstRebuild $LastRebuild
202    else
203      printf "%3s : %-8s : %-8s |\n" \
204             $NbRebuild $FirstRebuild $LastRebuild
205    fi
206
207    if [ ${NbLines} -gt 0 ] ; then
208      printf "|-----------------------------------------------------------------------------------------------------------|\n"
209      printf "|                                                      Last                                                 |\n"
210      printf "|     Rebuild      |   Pack_Output    |   Pack_Restart   |    Pack_Debug    |  Monitoring  |     Atlas      |\n"
211      printf "|------------------|------------------|------------------|------------------|--------------|----------------|\n"
212    fi
213
214    return
215  fi
216
217  # Print Post-processing job status
218  # ================================
219  if ( ${fg_job} ) ; then
220    printf "|"
221
222    # Print rebuild and pack jobs
223    # ---------------------------
224    for JobType in ${JobType_list[*]} ; do
225      eval Date=\${${JobType}_Date[${ind}]}
226      eval Status=\${${JobType}_Status[${ind}]}
227      eval Nb=\${${JobType}_Nb[${ind}]}
228
229      if [ X${Status} == XOK  ] ; then
230        Color=${ColCpl}
231      else
232        Color=${ColFat}
233      fi
234      printf "  ${Color}%-8s${ColNon} : %3s  |" ${Date} ${Nb}
235    done
236
237    Color=${ColExp}
238
239    # Print monitoring jobs
240    # ---------------------
241    JobType=monitoring
242    if [ $ind -eq 0 ] ; then
243      eval Date=\${${JobType}_Date}
244    else
245      Date=""
246    fi
247    printf "     ${Color}%-4s${ColNon}     |" ${Date}
248
249    # Print atlas jobs
250    # ----------------
251    JobType=atlas
252    eval Date=\${${JobType}_Date[${ind}]}
253    printf "  ${Color}%-12s${ColNon}  |" ${Date}
254
255    printf "\n"
256
257    return
258  fi
259
260  # Print footer
261  # ============
262  if ( ${fg_last} ) ; then
263    printf "|===========================================================================================================|\n"
264    date +"${DateFormat}"
265    return
266  fi
267
268}
269
270#======================================================================#
271
272#D- Task type (computing or post-processing)
273TaskType=post-processing
274typeset -i Verbosity=0
275
276CurrentUsr=$( whoami )
277CurrentGrp=$( groups $CurrentUsr | awk '{print $3}' )
278
279if ( [ $# -eq 0 ] ) ; then
280  $0 -h
281  exit
282fi
283
284# Arguments
285# =========
286# Default argument values
287# -----------------------
288TargetUsr=${CurrentUsr}
289HostName=$( hostname | sed -e "s/[0-9].*//" )
290
291fg_color=true
292fg_search=false
293fg_quiet=false
294fg_path=false
295NbHisto=10
296
297# Get arguments from command line
298# -------------------------------
299while [ $# -ne 0 ] ; do
300  case $1 in
301    -h|--help|-help)
302      echo "usage: $0 [-u user] [-q] [-j n] [-s] job_name"
303      echo "       $0 [-u user] [-q] [-j n] -p config.card_path"
304      echo ""
305      echo "options :"
306      echo "  -h : print this help and exit"
307      echo "  -u : owner of the job"
308      echo "  -q : quiet"
309      echo "  -j : print n post-processing jobs (default is 10)"
310      echo "  -s : search for a new job in \$WORKDIR and fill in "
311      echo "       the catalog before printing information"
312      echo "  -p : give the directory (absolute path) containing "
313      echo "       the config.card instead of the job name."
314      exit ;;
315#    -b|-nocolor)
316#      fg_color=false
317#      shift 1 ;;
318    -j|-job-number)
319      NbHisto="$2"
320      shift 2 ;;
321    -p|-config-path)
322      ConfigPath="$2"
323      fg_path=true
324      shift 2 ;;
325    -q|-quiet)
326      fg_quiet=true
327      shift 1 ;;
328    -s|-search)
329      fg_search=true
330      shift 1 ;;
331    -u|-user)
332      TargetUsr="$2"
333      shift 2 ;;
334    -*)
335      $0 -h
336      exit ;;
337    *)
338      break ;;
339  esac
340done
341
342
343if ( ( ! $fg_path ) && [ $# -lt 1 ] ) ; then
344  $0 -h
345  exit
346fi
347
348if ( ( ${fg_path} ) && ( ${fg_search} ) ) ; then
349  echo "You cannot use -s and -p at the same time"
350  exit
351fi
352
353
354# Load libIGCM library
355# ====================
356libIGCM=${libIGCM:=::modipsl::/libIGCM}
357
358. ${libIGCM}/libIGCM_debug/libIGCM_debug.ksh
359     ( ${DEBUG_debug} ) && IGCM_debug_Check
360. ${libIGCM}/libIGCM_card/libIGCM_card.ksh
361     ( ${DEBUG_debug} ) && IGCM_card_Check
362. ${libIGCM}/libIGCM_date/libIGCM_date.ksh
363     ( ${DEBUG_debug} ) && IGCM_date_Check
364#-------
365. ${libIGCM}/libIGCM_sys/libIGCM_sys.ksh
366. ${libIGCM}/libIGCM_config/libIGCM_config.ksh
367
368
369JobName=$1
370
371if ( ${fg_quiet} ) ; then
372  NbHisto=1
373fi
374
375echo "Target user = ${TargetUsr}"
376
377TargetGrp=$( groups $TargetUsr | awk '{print $3}' )
378
379# Define the catalog in which the known simulations are stored
380SimuCatalog="$( ccc_home )/.simucatalog.dat"
381if [ ! -s ${SimuCatalog} ] ; then
382  touch ${SimuCatalog}
383fi
384
385# Date format
386DateFormat="%d/%m/%y %R:%S"
387
388# Find SUBMIT_DIR in catalog
389# ==========================
390SearchCatalog
391
392if [ ! X${SUBMIT_DIR} == X ] ; then
393
394  echo "Submit:  >${SUBMIT_DIR}<"
395  cd $SUBMIT_DIR
396
397
398  # Extract usefull information from run.card and config.card
399  # =========================================================
400
401  RunFile="${SUBMIT_DIR}/run.card"
402  ConfFile="${SUBMIT_DIR}/config.card"
403
404  IGCM_config_CommonConfiguration ${SUBMIT_DIR}/config.card
405
406
407  if [ -s ${RunFile} ] ; then
408    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodState
409    PeriodState=${run_Configuration_PeriodState}
410  else
411    PeriodState="Waiting"
412  fi
413
414  if ( [ X${PeriodState} == XRunning ] || [ X${PeriodState} == XOnQueue ] ) ; then
415    NbRun=$( ccc_mstat -f | grep -c ${JobName} )
416
417    if [ ${NbRun} -eq 0 ] ; then
418      PeriodState="Fatal"
419    fi
420  fi
421
422  DateBegin=${config_UserChoices_DateBegin}
423  DateEnd=${config_UserChoices_DateEnd}
424  TagName=${config_UserChoices_TagName}
425  ExperimentName=${config_UserChoices_ExperimentName}
426  SpaceName=${config_UserChoices_SpaceName}
427
428
429  if ( [ ! X${PeriodState} == XWaiting ] && [ ! X${PeriodState} == XCompleted ] ) ; then
430    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodDateBegin
431    IGCM_card_DefineVariableFromOption ${RunFile} Configuration PeriodDateEnd
432  IGCM_card_DefineVariableFromOption ${RunFile} Configuration CumulPeriod
433    PeriodDateBegin=${run_Configuration_PeriodDateBegin}
434    PeriodDateEnd=${run_Configuration_PeriodDateEnd}
435    CumulPeriod=${run_Configuration_CumulPeriod}
436  else
437    PeriodDateBegin="."
438    PeriodDateEnd="."
439    CumulPeriod="."
440  fi
441
442  DATA_DIR=${R_SAVE}
443  POST_DIR=${R_BUFR}/Out
444  CWORK_DIR=${R_FIGR}
445  if [ X${config_Post_RebuildFromArchive} = Xtrue ] ; then
446    REBUILD_DIR=${R_SAVE}/TMP
447    RebuildJob="rebuild_fromArchive"
448  else
449    REBUILD_DIR=${BIG_DIR}/${config_UserChoices_TagName}/${config_UserChoices_JobName}
450    RebuildJob="rebuild_fromWorkdir"
451  fi
452
453  if [ ${TargetUsr} != ${CurrentUsr} ] ; then
454    DATA_DIR=$( ChangeUsr ${DATA_DIR})
455    POST_DIR=$( ChangeUsr ${POST_DIR} )
456    CWORK_DIR=$( ChangeUsr ${CWORK_DIR} )
457    REBUILD_DIR=$( ChangeUsr ${REBUILD_DIR} )
458  fi
459
460  echo "Data:    >${DATA_DIR}<"
461  echo "Rebuild: >${REBUILD_DIR}<"
462  echo "Post:    >${POST_DIR}<"
463  echo "Work:    >${CWORK_DIR}<"
464
465  if [ $PeriodState != "Waiting" ] ; then
466
467    # Check pending rebuilds
468    # ======================
469
470    set -A RebuildList $( find ${REBUILD_DIR}/ -name "REBUILD_*" | sort )
471    if [ ${#RebuildList[*]} -gt 0 ] ; then
472      NbRebuild=$( IGCM_sys_CountFileArchive ${REBUILD_DIR} )
473
474      FirstRebuild=$( basename ${RebuildList[0]} | cut -f2 -d\_ )
475      LastRebuild=$( basename ${RebuildList[ (( NbRebuild=${NbRebuild}-1 )) ]} | cut -f2 -d\_ )
476    else
477      NbRebuild="."
478
479      FirstRebuild="."
480      LastRebuild="."
481    fi
482  else
483    NbRebuild="."
484
485    FirstRebuild="."
486    LastRebuild="."
487  fi
488
489
490  if [ $PeriodState != "Waiting" ] ; then
491
492    # Check last REBUILD and PACK* jobs
493    # =================================
494    # Define input parameters
495    # -----------------------
496    set -A JobType_list "${RebuildJob}" "pack_output" "pack_restart" "pack_debug"
497
498    for JobType in ${JobType_list[*]} ; do
499      typeset    name1="${JobType}_String"
500      typeset    name2="${JobType}_Field"
501      if [ X${JobType} == X${RebuildJob} ] ; then
502        eval ${name1}=IGCM_sys_PutBuffer_Out
503        eval ${name2}=4
504      else
505        eval ${name1}=IGCM_sys_Put_Out
506        eval ${name2}=3
507      fi
508    done
509
510    # Check jobs
511    # ----------
512    NbLines=0
513    for JobType in ${JobType_list[*]} ; do
514      eval String=\${${JobType}_String}
515      eval Field=\${${JobType}_Field}
516
517      set -A FileList $( ls ${POST_DIR}/${JobType}.*.out | tail -n ${NbHisto} )
518
519      if [ ${#FileList[*]} -gt ${NbLines} ] ; then
520        NbLines=${#FileList[*]}
521      fi
522
523      (( ind = 0 ))
524      for FileName in ${FileList[*]} ; do
525        LastDate=$( basename ${FileName} | awk -F"." '{ print $(NF-1) }' )
526
527        Error=$( awk -v String=${String} \
528                     'BEGIN { x=0 } ($1~String) && ($3~"error.") { x=x+1 } END { print x }' \
529                     ${POST_DIR}/${JobType}.${LastDate}.out )
530
531        Match=$( awk -v String=${String} \
532                     'BEGIN { x=0 } ($1~String) && ($3!~"error.") { x=x+1 } END { print x }' \
533                     ${POST_DIR}/${JobType}.${LastDate}.out )
534        (( Nb = ${Match} - ${Error} ))
535
536        if ( [ ${Error} -eq 0 ] && [ ${Nb} -gt 0 ] ) ; then
537          Status=OK
538        else
539          Status=KO
540        fi
541
542        eval ${JobType}_Date[$ind]=${LastDate}
543        eval ${JobType}_Status[$ind]=${Status}
544        eval ${JobType}_Nb[$ind]=${Nb}
545       
546        (( ind = ind + 1 ))
547      done
548    done
549
550    # Check last MONITORING jobs
551    # ==========================
552    JobType=monitoring
553    if [ -d ${CWORK_DIR}/MONITORING ] ; then
554      LastDate=$( cdo showyear ${CWORK_DIR}/MONITORING/files/ATM_bils_global_ave.nc 2> /dev/null | \
555                      awk '{ print $NF }' )
556      eval ${JobType}_Date=${LastDate}
557    fi
558
559    # Check last ATLAS jobs
560    # =====================
561    JobType=atlas
562    if [ -d ${CWORK_DIR}/ATLAS ] ; then
563      set -A FileList $( ls ${CWORK_DIR}/ATLAS | tail -n ${NbHisto} )
564
565      if [ ${#FileList[*]} -gt ${NbLines} ] ; then
566        NbLines=${#FileList[*]}
567      fi
568
569      (( ind = 0 ))
570      for FileName in ${FileList[*]} ; do
571        eval ${JobType}_Date[$ind]=${FileName}
572        (( ind = ind + 1 ))
573      done
574    fi
575
576
577    # Time of last write on run.card
578    # ==============================
579    LastWrite=$( ls -l --time-style=+"${DateFormat}" ${SUBMIT_DIR}/run.card | awk '{print $6 " " $7}' )
580
581  else
582
583    NbLines=0
584
585  fi
586   
587
588  # Print results
589  # =============
590  AffichResult -f
591  ind=0
592  while [ $ind -lt $NbLines ] ; do
593    AffichResult -j
594    (( ind = ind + 1 ))
595  done
596  AffichResult -l
597
598fi
599
Note: See TracBrowser for help on using the repository browser.