Changeset 1659


Ignore:
Timestamp:
10/29/24 16:38:13 (7 weeks ago)
Author:
aclsce
Message:

Merged with branches/libIGCM_PREPOST to split main Job into 3 Jobs (prerun, compute and postrun) on JeanZay? supercomputer at IDRIS.
Prerun and postrun run on prepost partition to have access to STORE space.

Location:
trunk/libIGCM
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/libIGCM/AA_job

    r1619 r1659  
    6868#-Q- jeanzay #SBATCH --output=Script_Output_::Jobname::.000001    # standard output 
    6969#-Q- jeanzay #SBATCH --error=Script_Output_::Jobname::.000001    # error output 
    70 #-Q- jeanzay #SBATCH --ntasks=::JobNumProcTot::   # Number of MPI tasks 
    71 #-Q- jeanzay #SBATCH --cpus-per-task=::openMPthreads::  # Number of openMP threads. 
    72 #-Q- jeanzay #SBATCH --hint=nomultithread         # 1 processus MPI par par physical core (no hyperthreading)  
     70#-Q- jeanzay #SBATCH -n 1   # Number of MPI tasks 
     71#-Q- jeanzay #SBATCH --partition=prepost 
    7372#-Q- jeanzay #SBATCH --time=::WallTime::             # Wall clock limit (minutes) 
    7473#-Q- jeanzay #SBATCH --account ::default_project::@cpu 
    7574#-Q- jeanzay 
    76 #-Q- jeanzay ##BATCH_NUM_PROC_TOT=$BRIDGE_SBATCH_NPROC 
    7775#-Q- jeanzay set +x 
    7876#-Q- ada #!/bin/ksh 
     
    157155 
    158156date 
     157#-Q- jeanzay if [ ${job_type} = "postrun" ] ; then 
     158#-Q- jeanzay    echo 
     159#-Q- jeanzay    echo "#######################################" 
     160#-Q- jeanzay    echo "#       START OF POSTRUN     #" 
     161#-Q- jeanzay    echo "#######################################" 
     162#-Q- jeanzay    echo 
     163#-Q- jeanzay else 
    159164echo 
    160165echo "#######################################" 
     
    162167echo "#######################################" 
    163168echo 
    164  
     169#-Q- jeanzay fi 
    165170#D--------------------------------------------------------------------== 
    166171#D- 
     
    200205 
    201206#D- Number of execution in one job 
     207#-Q- jeanzay #D- Note : on JeanZay supercomputer, NbPeriodsPerJob will be forced to 1. 
    202208NbPeriodsPerJob=1 
    203209 
     
    315321#D--------------------------------------------------------------------== 
    316322IGCM_config_CommonConfiguration ${SUBMIT_DIR}/config.card 
    317  
     323#-Q- jeanzay if [ ${job_type} != "postrun" ] ; then 
    318324if [ ! -r ${SUBMIT_DIR}/run.card ] ; then 
    319325  #================================================# 
     
    328334  IGCM_debug_Print 2 "run.card exists" 
    329335fi 
    330  
     336#-Q- jeanzay fi 
    331337# ------------------------------------------------------------------ 
    332338# Activate BigBrother so as to supervise this simulation 
     
    349355# Define, create and cd RUN_DIR 
    350356# --------------------------------------------------------------------== 
    351 RUN_DIR=${RUN_DIR_PATH}/${config_UserChoices_JobName}.${$} 
     357RUN_DIR=${RUN_DIR:=${RUN_DIR_PATH}/${config_UserChoices_JobName}.${$}} 
    352358IGCM_sys_MkdirWork ${RUN_DIR} 
    353359IGCM_sys_Cd ${RUN_DIR} 
     
    431437  IGCM_comp_PeriodStart 
    432438 
     439#-Q- jeanzay if [ ${job_type} = "prerun" ] ; then 
     440   
    433441  #D- 
    434442  # ------------------------------------------------------------------ 
     
    539547  else 
    540548      if [ ${DRYRUN} -le 1 ] ; then 
     549#-Q- jeanzay if [ ${job_type} = "prerun" ] ; then 
     550#-Q- jeanzay IGCM_sys_launch_job_compute 
     551#-Q- jeanzay fi 
    541552          REAL_DATE_INIT=$( date ) 
    542553          echo                                                                                   > ${Exe_Output} 
     
    577588  fi 
    578589  echo "========================================================================" 
     590#-Q- jeanzay fi 
    579591 
    580592  echo 
  • trunk/libIGCM/ins_job

    r1633 r1659  
    119119F_JOB_DEBUG=${libIGCM}'/AA_job_debug'; 
    120120[[ ! -f ${F_JOB_DEBUG} ]] && { print - "${F_JOB_DEBUG} unreachable"; exit 3; } 
     121F_JOB_COMPUTE=${libIGCM}'/AA_job_compute'; 
     122[[ ! -f ${F_JOB_COMPUTE} ]] && { print - "${F_JOB_COMPUTE} unreachable"; exit 3; } 
    121123F_RCI=${libIGCM}'/run.card.init'; 
    122124[[ ! -f ${F_RCI} ]] && { print - "${F_RCI} unreachable"; exit 3; } 
     
    593595  IGCM_sys_updateHeaders ${libIGCM}'/'${n_f} 
    594596 
     597  # File name for Job_compute 
     598  n_f='Job_compute_'${JobName}; 
     599  [[ ${x_v} = 'verbose' ]] && print - "\nWorking with file ${F_CFG}\nin directory ${j}\nfor ${n_f}"; 
     600  sed -e "/^${W_W} */ s///" \ 
     601      -e "/^${W_P}/d"       \ 
     602      -e "s%::modipsl::%${F_MOD}%" \ 
     603      -e "s/::Jobname::/${JobName}/" \ 
     604      -e "s/::default_project::/${ProjectID}/" \ 
     605      -e "s/::WallTime::/${WallTime}/" \ 
     606      ${F_JOB_COMPUTE} > ${libIGCM}'/'${n_f} 
     607  chmod u+x ${libIGCM}'/'${n_f} 
     608 
     609  # update Headers so that ressources description are accurate (MPMD/SPMD/...) 
     610  IGCM_sys_updateHeaders ${libIGCM}'/'${n_f} 
     611 
    595612  # File name for Job 
    596613  n_f='Job_'${JobName}; 
     
    620637  [[ ${i_f} = 'AA_job' ]] && { continue; } 
    621638  [[ ${i_f} = 'AA_job_debug' ]]  && { continue; } 
     639  [[ ${i_f} = 'AA_job_compute' ]]  && { continue; } 
    622640  j=${i%/*}; n_f=${i_f#AA_}'.job'; 
    623641  [[ ${x_f} = 'false' ]] && [[ -f ${j}'/'${n_f} ]] && { ins_job_Warning; continue; } 
  • trunk/libIGCM/libIGCM_config/libIGCM_config.ksh

    r1645 r1659  
    487487  IGCM_debug_Print 3  "R_BC=${R_BC}" 
    488488  NbPeriodsPerJob=${config_UserChoices_NbPeriodsPerJob:=${NbPeriodsPerJob}} 
     489  # Specific for JeanZay because of the 3 jobs (prerun, compute, postrun). 
     490  if [ X${MASTER} = Xjeanzay ] ; then 
     491     NbPeriodsPerJob=1 
     492     IGCM_debug_Print 3  "On JeanZay supercomputer, we force NbPeriodsPerJob=1" 
     493  fi 
    489494  IGCM_debug_Print 3  "Loop in main Job with ${NbPeriodsPerJob} period(s)" 
    490495   
     
    739744  else 
    740745      Pack=false 
    741       if ( [ X${MASTER} = Xjeanzay ] && [ X${TaskType} = Xcomputing ] ) ; then 
    742           if [ ! X${config_UserChoices_SpaceName} = XTEST ]; then 
    743               IGCM_debug_Exit "ERROR On JeanZay it is not possible to run without Pack" 
    744               IGCM_debug_Verif_Exit 
    745           fi 
    746       fi 
    747            
    748746  fi 
    749747  # Loop over components 
     
    12691267    IGCM_sys_build_execution_scripts 
    12701268  fi 
    1271  
    1272   ExecutionFail=false 
    1273  
     1269  if [ X${job_type} != Xpostrun ] ; then 
     1270    ExecutionFail=false 
     1271  fi 
    12741272  # Update the rabbitMQ queue 
    12751273  IGCM_debug_BigBro_Update 
  • trunk/libIGCM/libIGCM_sys/libIGCM_sys_jeanzay.ksh

    r1648 r1659  
    161161typeset RUN_DIR_PATH=${RUN_DIR_PATH:=${SCRATCH}/RUN_DIR/${SLURM_JOBID}_${$}} 
    162162 
     163typeset job_type=${job_type:="prerun"} 
     164 
    163165#==================================================== 
    164166#- OUTCOMMAND_PATH : tmp place to store command lines standard error and outputs 
     
    217219  IGCM_debug_Print 1 "Data project for output to be used: ${DataProject}" 
    218220 
    219 #==================================================== 
    220 #- set OLDARCHIVE to access OLDSTORE  
    221 OLDARCHIVE=/gpfsstore/rech/${DataProject}/${LOGIN} 
    222  
    223 #==================================================== 
    224 #- set TMPARCHIVE to access TMPSTORE  
    225 TMPARCHIVE=/lustre/fsnomig/ipsl/tmpstore/rech/${DataProject}/${LOGIN} 
    226221 
    227222#==================================================== 
     
    462457  typeset options status 
    463458  options="-o ${SUBMIT_DIR}/${Script_Output} -e ${SUBMIT_DIR}/${Script_Output}" 
    464  
    465   /usr/bin/time sbatch ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
     459  export job_type="prerun" 
     460  unset RUN_DIR 
     461 
     462  sbatch ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
    466463  status=$? 
    467464 
     
    474471  fi 
    475472  IGCM_debug_PopStack "IGCM_sys_Qsub" 
     473} 
     474 
     475#D-#================================================== 
     476#D-function IGCM_sys_Qsub_job_compute 
     477#D-* Purpose: Qsub new computing job 
     478#D-* Examples: 
     479#D- 
     480function IGCM_sys_Qsub_job_compute { 
     481  IGCM_debug_PushStack "IGCM_sys_Qsub_job_compute" $@ 
     482  if ( $DEBUG_sys ) ; then 
     483    echo "IGCM_sys_Qsub_job_compute :" $@ 
     484  fi 
     485  typeset options status 
     486  (( requested_time_seconds = SLURM_JOB_END_TIME - SLURM_JOB_START_TIME )) 
     487  (( requested_time_minute = requested_time_seconds / 60 )) 
     488  options="-o ${SUBMIT_DIR}/Script_Output_${config_UserChoices_JobName}.$( printf "%06d" ${CumulPeriod} ) -e ${SUBMIT_DIR}/Script_Output_${config_UserChoices_JobName}.$( printf "%06d" ${CumulPeriod} ) --open-mode=append --time=${requested_time_minute}" 
     489 
     490  export job_type="compute" 
     491  export RUN_DIR=${RUN_DIR} 
     492  export EXPERIMENT=${SUBMIT_DIR} 
     493  export CumulPeriod=${CumulPeriod} 
     494  export FirstInitialize=${FirstInitialize} 
     495  export PREFIX=${PREFIX} 
     496  unset SLURM_HOSTFILE 
     497 
     498  sbatch ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
     499  status=$? 
     500 
     501  cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 
     502  if [ ${status} -gt 0 ] ; then 
     503    IGCM_debug_Print 2 "IGCM_sys_Qsub ${options} $1 : error code ${status}" 
     504    IGCM_debug_Exit "IGCM_sys_Qsub" 
     505  else 
     506    JobID=$( gawk {'print $4'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) 
     507  fi 
     508  IGCM_debug_PopStack "IGCM_sys_Qsub_job_compute" 
     509} 
     510 
     511#D-#================================================== 
     512#D-function IGCM_sys_Qsub_postrun 
     513#D-* Purpose: Qsub new job 
     514#D-* Examples: 
     515#D- 
     516function IGCM_sys_Qsub_job_postrun { 
     517  IGCM_debug_PushStack "IGCM_sys_Qsub_job_postrun" $@ 
     518  if ( $DEBUG_sys ) ; then 
     519    echo "IGCM_sys_Qsub_job_postrun :" $@ 
     520  fi 
     521  typeset options status 
     522  options="-o ${EXPERIMENT}/Script_Output_${JobName}.$( printf "%06d" ${CumulPeriod} ) -e ${EXPERIMENT}/Script_Output_${JobName}.$( printf "%06d" ${CumulPeriod} ) --open-mode=append" 
     523 
     524  export job_type="postrun" 
     525  export RUN_DIR=${RUN_DIR} 
     526  export ExecutionFail=${ExecutionFail} 
     527  export FirstInitialize=${FirstInitialize} 
     528  export executionType=${executionType} 
     529  unset SLURM_HOSTFILE 
     530 
     531  sbatch ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
     532  status=$? 
     533 
     534  cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 
     535  if [ ${status} -gt 0 ] ; then 
     536    IGCM_debug_Print 2 "IGCM_sys_Qsub ${options} $1 : error code ${status}" 
     537    IGCM_debug_Exit "IGCM_sys_Qsub" 
     538  else 
     539    JobID=$( gawk {'print $4'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) 
     540  fi 
     541  IGCM_debug_PopStack "IGCM_sys_Qsub_job_postrun" 
    476542} 
    477543 
     
    11561222 
    11571223  ulimit -s unlimited 
    1158   if [ ${executionType} -eq 2 ] ; then 
     1224  if ( [ ${executionType} == 2 ] && [ ${job_type} == "compute" ]) ; then 
    11591225    export SLURM_HOSTFILE=./hostlist 
    11601226  fi 
     
    14431509             (( index_host = current_core / NB_CORE_PER_NODE )) 
    14441510             host_value=${listnodes[${index_host}]} 
     1511             if [ ${job_type} == "prerun" ] ; then 
     1512                 echo "node_${index_host}_X" >> hostlist_template 
     1513             else 
    14451514             echo "$host_value" >> hostlist 
    1446              if [ ${DRYRUN_DEBUG} = 4 ] ; then 
    1447                  echo "node_${index_host}_X" >> hostlist_template 
    14481515             fi 
    14491516             (( current_core = current_core + offset_comp_proc_loc )) 
     
    18311898} 
    18321899 
     1900#  
     1901#D-#================================================== 
     1902#D-function IGCM_sys_launch_job_compute 
     1903#D-* Purpose: launch the computing part of the simulation on computing partition 
     1904#D-* Examples: 
     1905#D- 
     1906function IGCM_sys_launch_job_compute { 
     1907  IGCM_debug_PushStack "IGCM_sys_launch_job_compute" 
     1908  if ( $DEBUG_sys ) ; then 
     1909    echo "IGCM_sys_launch_job_compute :" 
     1910  fi 
     1911  if [ -f ${libIGCM}/Job_compute_${config_UserChoices_JobName} ] ; then 
     1912 
     1913   SUBMIT_DIRECTORY=$(grep SUBMIT_DIR ${libIGCM}/libIGCM_sys/libIGCM_sys_${SYSTEM}.ksh | grep -m1 typeset | cut -dx -f2) 
     1914      sed -e "s%::EXECUTION::%${EXECUTION}%g" \ 
     1915          -e "s%::EXECUTION_TYPE::%${executionType}%" \ 
     1916          -e "s%::JOBNAME::%${config_UserChoices_JobName}%" \ 
     1917          ${libIGCM}/Job_compute_${config_UserChoices_JobName} > ${RUN_DIR}/Job_compute_${config_UserChoices_JobName} 
     1918      cp ${EnvFile} ${RUN_DIR}/. 
     1919      cd ${RUN_DIR} ; IGCM_sys_Qsub_job_compute Job_compute_${config_UserChoices_JobName} 
     1920      IGCM_debug_Verif_Exit 
     1921      echo 
     1922      echo "############################################" 
     1923      echo "#    END OF PRERUN STEP   #" 
     1924      echo "############################################" 
     1925      echo 
     1926      exit 
     1927  else 
     1928      IGCM_debug_Exit "ERROR with computing job !" 
     1929      IGCM_debug_Print 2 "Computing job is missing" 
     1930      IGCM_debug_Verif_Exit 
     1931  fi 
     1932  
     1933  IGCM_debug_PopStack "IGCM_sys_launch_job_compute" 
     1934} 
Note: See TracChangeset for help on using the changeset viewer.