Changeset 1652


Ignore:
Timestamp:
10/22/24 13:28:01 (3 months ago)
Author:
aclsce
Message:

Modifications to split main Job into 3 Jobs (prerun, compute and postrun) at IDRIS.
Prerun and postrun run on prepost partition to have access to STORE space.

Location:
branches/libIGCM_PREPOST
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • branches/libIGCM_PREPOST/AA_job

    r1619 r1652  
    6868#-Q- jeanzay #SBATCH --output=Script_Output_::Jobname::.000001    # standard output 
    6969#-Q- jeanzay #SBATCH --error=Script_Output_::Jobname::.000001    # error output 
    70 #-Q- jeanzay #SBATCH --ntasks=::JobNumProcTot::   # Number of MPI tasks 
    71 #-Q- jeanzay #SBATCH --cpus-per-task=::openMPthreads::  # Number of openMP threads. 
    72 #-Q- jeanzay #SBATCH --hint=nomultithread         # 1 processus MPI par par physical core (no hyperthreading)  
     70#-Q- jeanzay #SBATCH --ntasks=1   # Number of MPI tasks 
     71#-Q- jeanzay #SBATCH --partition=prepost 
    7372#-Q- jeanzay #SBATCH --time=::WallTime::             # Wall clock limit (minutes) 
    7473#-Q- jeanzay #SBATCH --account ::default_project::@cpu 
    7574#-Q- jeanzay 
    76 #-Q- jeanzay ##BATCH_NUM_PROC_TOT=$BRIDGE_SBATCH_NPROC 
    7775#-Q- jeanzay set +x 
    7876#-Q- ada #!/bin/ksh 
     
    157155 
    158156date 
     157#-Q- jeanzay if [ ${job_type} = "postrun" ] ; then 
     158#-Q- jeanzay    echo 
     159#-Q- jeanzay    echo "#######################################" 
     160#-Q- jeanzay    echo "#       START OF POSTRUN     #" 
     161#-Q- jeanzay    echo "#######################################" 
     162#-Q- jeanzay    echo 
     163#-Q- jeanzayelse 
    159164echo 
    160165echo "#######################################" 
     
    162167echo "#######################################" 
    163168echo 
    164  
     169#-Q- jeanzay fi 
    165170#D--------------------------------------------------------------------== 
    166171#D- 
     
    315320#D--------------------------------------------------------------------== 
    316321IGCM_config_CommonConfiguration ${SUBMIT_DIR}/config.card 
    317  
     322#-Q- jeanzay if [ ${job_type} != "postrun" ] ; then 
    318323if [ ! -r ${SUBMIT_DIR}/run.card ] ; then 
    319324  #================================================# 
     
    328333  IGCM_debug_Print 2 "run.card exists" 
    329334fi 
    330  
     335#-Q- jeanzay fi 
    331336# ------------------------------------------------------------------ 
    332337# Activate BigBrother so as to supervise this simulation 
     
    349354# Define, create and cd RUN_DIR 
    350355# --------------------------------------------------------------------== 
    351 RUN_DIR=${RUN_DIR_PATH}/${config_UserChoices_JobName}.${$} 
     356RUN_DIR=${RUN_DIR:=${RUN_DIR_PATH}/${config_UserChoices_JobName}.${$}} 
    352357IGCM_sys_MkdirWork ${RUN_DIR} 
    353358IGCM_sys_Cd ${RUN_DIR} 
     
    431436  IGCM_comp_PeriodStart 
    432437 
     438#-Q- jeanzay  if [ ${job_type} = "prerun" ] ; then 
     439   
    433440  #D- 
    434441  # ------------------------------------------------------------------ 
     
    539546  else 
    540547      if [ ${DRYRUN} -le 1 ] ; then 
     548#-Q- jeanzay      if [ ${job_type} = "prerun" ] ; then 
     549#-Q- jeanzay          IGCM_sys_launch_job_compute 
     550#-Q- jeanzay      fi 
    541551          REAL_DATE_INIT=$( date ) 
    542552          echo                                                                                   > ${Exe_Output} 
     
    577587  fi 
    578588  echo "========================================================================" 
     589  fi 
    579590 
    580591  echo 
  • branches/libIGCM_PREPOST/ins_job

    r1633 r1652  
    119119F_JOB_DEBUG=${libIGCM}'/AA_job_debug'; 
    120120[[ ! -f ${F_JOB_DEBUG} ]] && { print - "${F_JOB_DEBUG} unreachable"; exit 3; } 
     121F_JOB_COMPUTE=${libIGCM}'/AA_job_compute'; 
     122[[ ! -f ${F_JOB_COMPUTE} ]] && { print - "${F_JOB_COMPUTE} unreachable"; exit 3; } 
    121123F_RCI=${libIGCM}'/run.card.init'; 
    122124[[ ! -f ${F_RCI} ]] && { print - "${F_RCI} unreachable"; exit 3; } 
     
    593595  IGCM_sys_updateHeaders ${libIGCM}'/'${n_f} 
    594596 
     597  # File name for Job_compute 
     598  n_f='Job_compute_'${JobName}; 
     599  [[ ${x_v} = 'verbose' ]] && print - "\nWorking with file ${F_CFG}\nin directory ${j}\nfor ${n_f}"; 
     600  sed -e "/^${W_W} */ s///" \ 
     601      -e "/^${W_P}/d"       \ 
     602      -e "s%::modipsl::%${F_MOD}%" \ 
     603      -e "s/::Jobname::/${JobName}/" \ 
     604      -e "s/::default_project::/${ProjectID}/" \ 
     605      -e "s/::WallTime::/${WallTime}/" \ 
     606      ${F_JOB_COMPUTE} > ${libIGCM}'/'${n_f} 
     607  chmod u+x ${libIGCM}'/'${n_f} 
     608 
     609  # update Headers so that ressources description are accurate (MPMD/SPMD/...) 
     610  IGCM_sys_updateHeaders ${libIGCM}'/'${n_f} 
     611 
    595612  # File name for Job 
    596613  n_f='Job_'${JobName}; 
     
    620637  [[ ${i_f} = 'AA_job' ]] && { continue; } 
    621638  [[ ${i_f} = 'AA_job_debug' ]]  && { continue; } 
     639  [[ ${i_f} = 'AA_job_compute' ]]  && { continue; } 
    622640  j=${i%/*}; n_f=${i_f#AA_}'.job'; 
    623641  [[ ${x_f} = 'false' ]] && [[ -f ${j}'/'${n_f} ]] && { ins_job_Warning; continue; } 
  • branches/libIGCM_PREPOST/libIGCM_sys/libIGCM_sys_jeanzay.ksh

    r1648 r1652  
    161161typeset RUN_DIR_PATH=${RUN_DIR_PATH:=${SCRATCH}/RUN_DIR/${SLURM_JOBID}_${$}} 
    162162 
     163typeset job_type=${job_type:="prerun"} 
     164 
    163165#==================================================== 
    164166#- OUTCOMMAND_PATH : tmp place to store command lines standard error and outputs 
     
    217219  IGCM_debug_Print 1 "Data project for output to be used: ${DataProject}" 
    218220 
    219 #==================================================== 
    220 #- set OLDARCHIVE to access OLDSTORE  
    221 OLDARCHIVE=/gpfsstore/rech/${DataProject}/${LOGIN} 
    222  
    223 #==================================================== 
    224 #- set TMPARCHIVE to access TMPSTORE  
    225 TMPARCHIVE=/lustre/fsnomig/ipsl/tmpstore/rech/${DataProject}/${LOGIN} 
    226221 
    227222#==================================================== 
     
    462457  typeset options status 
    463458  options="-o ${SUBMIT_DIR}/${Script_Output} -e ${SUBMIT_DIR}/${Script_Output}" 
    464  
    465   /usr/bin/time sbatch ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
     459  export job_type="prerun" 
     460  unset RUN_DIR 
     461 
     462  sbatch ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
    466463  status=$? 
    467464 
     
    474471  fi 
    475472  IGCM_debug_PopStack "IGCM_sys_Qsub" 
     473} 
     474 
     475#D-#================================================== 
     476#D-function IGCM_sys_Qsub_job_compute 
     477#D-* Purpose: Qsub new computing job 
     478#D-* Examples: 
     479#D- 
     480function IGCM_sys_Qsub_job_compute { 
     481  IGCM_debug_PushStack "IGCM_sys_Qsub_job_compute" $@ 
     482  if ( $DEBUG_sys ) ; then 
     483    echo "IGCM_sys_Qsub_job_compute :" $@ 
     484  fi 
     485  typeset options status 
     486  options="-o ${SUBMIT_DIR}/Script_Output_${config_UserChoices_JobName}.$( printf "%06d" ${CumulPeriod} ) -e ${SUBMIT_DIR}/Script_Output_${config_UserChoices_JobName}.$( printf "%06d" ${CumulPeriod} ) --open-mode=append" 
     487 
     488  export job_type="compute" 
     489  export RUN_DIR=${RUN_DIR} 
     490  export EXPERIMENT=${SUBMIT_DIR} 
     491  export CumulPeriod=${CumulPeriod} 
     492  export FirstInitialize=${FirstInitialize} 
     493  export PREFIX=${PREFIX} 
     494  unset SLURM_HOSTFILE 
     495 
     496  sbatch ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
     497  status=$? 
     498 
     499  cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 
     500  if [ ${status} -gt 0 ] ; then 
     501    IGCM_debug_Print 2 "IGCM_sys_Qsub ${options} $1 : error code ${status}" 
     502    IGCM_debug_Exit "IGCM_sys_Qsub" 
     503  else 
     504    JobID=$( gawk {'print $4'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) 
     505  fi 
     506  IGCM_debug_PopStack "IGCM_sys_Qsub_job_compute" 
     507} 
     508 
     509#D-#================================================== 
     510#D-function IGCM_sys_Qsub_postrun 
     511#D-* Purpose: Qsub new job 
     512#D-* Examples: 
     513#D- 
     514function IGCM_sys_Qsub_job_postrun { 
     515  IGCM_debug_PushStack "IGCM_sys_Qsub_job_postrun" $@ 
     516  if ( $DEBUG_sys ) ; then 
     517    echo "IGCM_sys_Qsub_job_postrun :" $@ 
     518  fi 
     519  typeset options status 
     520  options="-o ${EXPERIMENT}/Script_Output_${JobName}.$( printf "%06d" ${CumulPeriod} ) -e ${EXPERIMENT}/Script_Output_${JobName}.$( printf "%06d" ${CumulPeriod} ) --open-mode=append" 
     521 
     522  export job_type="postrun" 
     523  export RUN_DIR=${RUN_DIR} 
     524  export ExecutionFail=${ExecutionFail} 
     525  export FirstInitialize=${FirstInitialize} 
     526  export executionType=${executionType} 
     527  unset SLURM_HOSTFILE 
     528 
     529  sbatch ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 
     530  status=$? 
     531 
     532  cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 
     533  if [ ${status} -gt 0 ] ; then 
     534    IGCM_debug_Print 2 "IGCM_sys_Qsub ${options} $1 : error code ${status}" 
     535    IGCM_debug_Exit "IGCM_sys_Qsub" 
     536  else 
     537    JobID=$( gawk {'print $4'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) 
     538  fi 
     539  IGCM_debug_PopStack "IGCM_sys_Qsub_job_postrun" 
    476540} 
    477541 
     
    11561220 
    11571221  ulimit -s unlimited 
    1158   if [ ${executionType} -eq 2 ] ; then 
     1222  if ( [ ${executionType} == 2 ] && [ ${job_type} == "compute" ]) ; then 
    11591223    export SLURM_HOSTFILE=./hostlist 
    11601224  fi 
     
    14431507             (( index_host = current_core / NB_CORE_PER_NODE )) 
    14441508             host_value=${listnodes[${index_host}]} 
     1509             if [ ${job_type} == "prerun" ] ; then 
     1510                 echo "node_${index_host}_X" >> hostlist_template 
     1511             else 
    14451512             echo "$host_value" >> hostlist 
    1446              if [ ${DRYRUN_DEBUG} = 4 ] ; then 
    1447                  echo "node_${index_host}_X" >> hostlist_template 
    14481513             fi 
    14491514             (( current_core = current_core + offset_comp_proc_loc )) 
     
    18311896} 
    18321897 
     1898#  
     1899#D-#================================================== 
     1900#D-function IGCM_sys_launch_job_compute 
     1901#D-* Purpose: launch the computing part of the simulation on computing partition 
     1902#D-* Examples: 
     1903#D- 
     1904function IGCM_sys_launch_job_compute { 
     1905  IGCM_debug_PushStack "IGCM_sys_launch_job_compute" 
     1906  if ( $DEBUG_sys ) ; then 
     1907    echo "IGCM_sys_launch_job_compute :" 
     1908  fi 
     1909  if [ -f ${libIGCM}/Job_compute_${config_UserChoices_JobName} ] ; then 
     1910 
     1911   SUBMIT_DIRECTORY=$(grep SUBMIT_DIR ${libIGCM}/libIGCM_sys/libIGCM_sys_${SYSTEM}.ksh | grep -m1 typeset | cut -dx -f2) 
     1912      sed -e "s%::EXECUTION::%${EXECUTION}%" \ 
     1913          -e "s%::EXECUTION_TYPE::%${executionType}%" \ 
     1914          -e "s%::JOBNAME::%${config_UserChoices_JobName}%" \ 
     1915          ${libIGCM}/Job_debug_${config_UserChoices_JobName} > ${RUN_DIR}/Job_debug_${config_UserChoices_JobName} 
     1916      cp ${EnvFile} ${RUN_DIR}/. 
     1917      cd ${RUN_DIR} ; IGCM_sys_Qsub_job_compute Job_compute_${config_UserChoices_JobName} 
     1918      IGCM_debug_Verif_Exit 
     1919      echo 
     1920      echo "############################################" 
     1921      echo "#    END OF PRERUN STEP   #" 
     1922      echo "############################################" 
     1923      echo 
     1924      exit 
     1925  else 
     1926      IGCM_debug_Exit "ERROR with computing job !" 
     1927      IGCM_debug_Print 2 "Computing job is missing" 
     1928      IGCM_debug_Verif_Exit 
     1929  fi 
     1930  
     1931  IGCM_debug_PopStack "IGCM_sys_launch_job_compute" 
     1932} 
Note: See TracChangeset for help on using the changeset viewer.