New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
Changeset 14825 for utils/CI – NEMO

Changeset 14825 for utils/CI


Ignore:
Timestamp:
2021-05-10T16:42:39+02:00 (3 years ago)
Author:
acc
Message:

Update sette for ARCHER2 and remove support for the de-commissioned ARCHER platform. Also fixed reporting for tracer.stat files in sette_rpt.sh and added reporting of average cpu timings from timing.output (if available). Uses red/green highlighting for increases/decreases from the reference.

Location:
utils/CI/sette
Files:
1 added
1 deleted
3 edited

Legend:

Unmodified
Added
Removed
  • utils/CI/sette/BATCH_TEMPLATE/batch-X86_ARCHER2-Cray

    r13790 r14825  
    11#!/bin/bash 
    2 #SBATCH --job-name=sette_test 
    3 #SBATCH --time=0:20:0 
    4 #SBATCH --nodes=NODES 
    5 #SBATCH --ntasks=TOTAL_NPROCS 
    6 #SBATCH --account=n01 
    7 #SBATCH --partition=standard 
    8 #SBATCH --qos=standard 
    9 ##BATCH --reservation=shortqos 
    10 ##BATCH --qos=short 
    11 module -s restore /work/n01/shared/acc/n01_modules/ucx_env 
    122# 
    13   export XIO_HOME=/work/n01/shared/acc/xios-2.5 
     3# A batch script will be generated using: 
     4# /work/n01/shared/acc/mkslurm_settejob -S $NXIO_PROC -s 8 -m 4 -C $NB_PROC -g 2 -a n01-CLASS -j sette_job -t 20:00 > ${SETTE_DIR}/job_batch_template 
     5# by prepare_job.sh 
    146# 
    15   export OMP_NUM_THREADS=1 
    16   export OCORES=NPROCS 
    17   export XCORES=NXIOPROCS 
    18   export SETTE_DIR=DEF_SETTE_DIR 
    19 # 
    20 # load sette functions (only post_test_tidyup needed) 
    21 # 
    22   . ${SETTE_DIR}/all_functions.sh 
    23 ############################################################### 
    24 # 
    25 # set up mpp computing environment 
    26 # 
    27 # 
    28 # Local settings. These settings are for a particular machine (the MOBILIS ClusterVision  
    29 # system at NOCS) at a particular stage of that machine's evolution. This template file 
    30 # is provided for illustration purposes only and will not work on any other machine. There 
    31 # should, however, be sufficient similarity with other MPP platforms and batch systems 
    32 # for this example to provide a useful guide for experienced users 
    33 # 
    34 # Don't remove neither change the following line 
    35 # BODY 
    36 # 
    37 # Test specific settings. Do not hand edit these lines; the fcm_job.sh script will set these 
    38 # (via sed operating on this template job file). Note that the number of compute nodes required 
    39 # is also set by the fcm_job.sh on the PBS select header line above. 
    40 # 
    41 # These variables are needed by post_test_tidyup function in all_functions.sh 
    42 # 
    43   export INPUT_DIR=DEF_INPUT_DIR 
    44   export CONFIG_DIR=DEF_CONFIG_DIR 
    45   export TOOLS_DIR=DEF_TOOLS_DIR 
    46   export NEMO_VALIDATION_DIR=DEF_NEMO_VALIDATION 
    47   export NEW_CONF=DEF_NEW_CONF 
    48   export CMP_NAM=DEF_CMP_NAM 
    49   export TEST_NAME=DEF_TEST_NAME 
    50   export EXE_DIR=DEF_EXE_DIR 
    51 # 
    52 # end of set up 
    53 ############################################################### 
    54 # 
    55 # change to the working directory  
    56 # 
    57   cd $EXE_DIR 
    58   echo Directory is `pwd` 
    59   if [ $XCORES -eq 0 ]; then 
    60 # 
    61 # Run SPMD case 
    62 # 
    63        echo srun --cpu-bind=v,rank_ldom -n $OCORES ./nemo 
    64        srun --cpu-bind=v,rank_ldom -n $OCORES ./nemo 
    65   else 
    66        if [ ! -f ./xios_server.exe ] && [ -f ${XIO_HOME}/bin/xios_server.exe ]; then 
    67           cp ${XIO_HOME}/bin/xios_server.exe . 
    68        fi 
    69        if [ ! -f ./xios_server.exe ]; then 
    70           echo "./xios_server.exe not found" 
    71           echo "run aborted" 
    72           exit 
    73        fi 
    74 # 
    75 #  Run MPMD case 
    76 # 
    77 cat > myscript_wrapper2.sh << EOFB 
    78 #!/bin/ksh 
    79 # 
    80 set -A map ./xios_server.exe ./nemo 
    81 exec_map=( 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ) 
    82 # 
    83 exec \${map[\${exec_map[\$SLURM_PROCID]}]} 
    84 ## 
    85 EOFB 
    86 chmod u+x ./myscript_wrapper2.sh 
    87 # 
    88 srun --mem-bind=local --cpu-bind=v,map_cpu:00,0x4,0x8,0xc,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e,0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e, ./myscript_wrapper2.sh 
    89 # 
    90   fi 
    91 # 
    92   post_test_tidyup 
    93 # END_BODY 
    94 # Don't remove neither change the previous line 
    95   exit 
  • utils/CI/sette/prepare_job.sh

    r14507 r14825  
    200200            ;; 
    201201         X86_ARCHER2*) 
    202                                 OSPACE=2 
    203                                 SSPACE=4 
    204                                 NB_REM=$( echo $NB_PROC $OSPACE $NXIO_PROC $SSPACE | awk '{print ( $1 * $2 + $3 * $4 ) % 128}') 
    205                                 echo "NB_REM= "$NB_REM 
    206                if [ ${NB_REM} == 0 ] ; then 
    207                # number of processes required is an integer multiple of 128 
    208                # 
    209                NB_NODES=$( echo $NB_PROC $OSPACE $NXIO_PROC $SSPACE | awk '{print ( $1 * $2 + $3 * $4 ) / 128}') 
    210             else 
    211                # 
    212                # number of processes required is not an integer multiple of 128 
    213                # round up the number of nodes required. 
    214                # 
    215                NB_NODES=$( echo $NB_PROC $OSPACE $NXIO_PROC $SSPACE | awk '{printf("%d",( $1 * $2 + $3 * $4 ) / 128 + 1 )}') 
    216                   fi 
     202                                MK_TEMPLATE=$( /work/n01/shared/acc/mkslurm_settejob -S $NXIO_PROC -s 8 -m 4 -C $NB_PROC -g 2 -a n01-CLASS -j sette_job -t 20:00 > ${SETTE_DIR}/job_batch_template ) 
    217203            ;; 
    218                         XC_ARCHER_INTEL) 
    219                                 # ocean cores are packed 24 to a node 
    220                                 NB_REM=$( echo $NB_PROC | awk '{print ( $1 % 24 ) }') 
    221                                 if [ ${NB_REM} == 0 ] ; then 
    222                                         # number of processes required is an integer multiple of 24 
    223                                         # 
    224                                         NB_NODES=$( echo $NB_PROC $NXIO_PROC | awk '{print ($1) / 24}') 
    225                                 else 
    226                                         # 
    227                                         # number of processes required is not an integer multiple of 24 
    228                                         # round up the number of nodes required. 
    229                                         # 
    230                                         NB_NODES=$( echo $NB_PROC | awk '{printf("%d",($1) / 24 + 1 )}') 
    231                                 fi 
    232                                 # xios cores are sparsely packed at 4 to a node 
    233                                 # but can not share nodes with the ocean cores 
    234                                 NB_REM=$( echo $NXIO_PROC | awk '{print ( $2 % 4 ) }') 
    235                                 if [ ${NB_REM} == 0 ] ; then 
    236                                         # number of processes required is an integer multiple of 4                            
    237                                         # 
    238                                         NB_NODES=$( echo $NB_NODES $NXIO_PROC | awk '{print ($1 + ( $2 / 4 ))}')                     
    239                                 else 
    240                                         # 
    241                                         # number of processes required is not an integer multiple of 4                              
    242                                         # round up the number of nodes required. 
    243                                         # 
    244                                         NB_NODES=$( echo $NB_NODES $NXIO_PROC | awk '{print ($1 + ( $2 / 4 ) + 1)}')  
    245                                 fi 
    246                                 ;; 
    247204                        XC40_METO*) #Setup for Met Office XC40 with any compiler 
    248205                                # ocean cores are packed 32 to a node 
  • utils/CI/sette/sette_rpt.sh

    r14244 r14825  
    2525function get_ktdiff() { 
    2626  ktdiff=`diff ${1} ${2} | head -2 | grep it | awk '{ print $4 }'` 
     27} 
     28 
     29function get_ktdiff2() { 
     30  ktdiff=`diff ${1} ${2} |  head -2 | tail -1l | awk '{print $2}'` 
    2731} 
    2832 
     
    107111        fi 
    108112      else 
    109         get_ktdiff f1.tmp$$ $f2t 
     113        get_ktdiff2 f1.tmp$$ $f2t 
    110114        printf "\e[38;5;196m%-27s %s %s %s %-5s %s\e[0m\n" $nam  " tracer.stat    restartability  FAILED : " $dorv " (results are different after " $ktdiff " time steps)" 
    111115# 
     
    213217        fi 
    214218      else 
    215         get_ktdiff $f1t $f2t 
     219        get_ktdiff2 $f1t $f2t 
    216220        printf "\e[38;5;196m%-27s %s %s %s %-5s %s\e[0m\n" $nam  " tracer.stat reproducibility FAILED : " $dorv " (results are different after " $ktdiff " time steps)" 
    217221# 
     
    266270    f1s=$vdir/$nam/$mach/$dorv/LONG/run.stat 
    267271    f1t=$vdir/$nam/$mach/$dorv/LONG/tracer.stat 
     272    f1a=$vdir/$nam/$mach/$dorv/LONG/timing.output 
    268273    f2s=$vdirref/$nam/$mach/$dorvref/LONG/run.stat 
    269274    f2t=$vdirref/$nam/$mach/$dorvref/LONG/tracer.stat 
     275    f2a=$vdirref/$nam/$mach/$dorvref/LONG/timing.output 
    270276    if  [ ! -f $f1s ] && [ ! -f $f1t ] ; then 
    271277      printf "%-20s %s\n" $nam " incomplete test"; 
     
    310316        fi 
    311317      else 
    312         get_ktdiff $f1t $f2t 
     318        get_ktdiff2 $f1t $f2t 
    313319        printf "%-20s %s %s %-5s %s\n" $nam  " tracer.stat files are DIFFERENT (results are different after " $ktdiff " time steps) " 
    314320# 
     
    319325          read y 
    320326          sdiff $f1t $f2t 
     327        fi 
     328      fi 
     329    fi 
     330# 
     331# Report average CPU time differences (if available) 
     332# 
     333    if  [ -f $f1a ] && [ -f $f2a ] ; then 
     334      tnew=$(grep 'Average ' $f1a  | awk '{print $5}') 
     335      tref=$(grep 'Average ' $f2a  | awk '{print $5}') 
     336      if [ $? == 0 ]; then 
     337        if [ $pass == 0 ]; then 
     338          tdif=$( echo ${tnew} ${tref} | awk '{print $1 - $2}') 
     339          if (( $(echo "$tnew > $tref" |bc -l) )); then 
     340            printf "%-20s %14s %10s %14s %10s %14s \\e[41;33;196m%10s\\e[0m\n" $nam  " ref. time:" $tref "cur. time:" $tnew "diff.:" $tdif 
     341          else 
     342            printf "%-20s %14s %10s %14s %10s %14s \\e[42;01;196m%10s\\e[0m\n" $nam  " ref. time:" $tref "cur. time:" $tnew "diff.:" $tdif 
     343          fi 
    321344        fi 
    322345      fi 
Note: See TracChangeset for help on using the changeset viewer.