#!/bin/ksh #************************************************************** # Author: Sebastien Denvil, Martial Mancip, Arnaud Caubel # Contact: Arnaud.Caubel__at__lsce.ipsl.fr # $Revision:: $ Revision of last commit # $Author:: $ Author of last commit # $Date:: $ Date of last commit # IPSL (2006) # This software is governed by the CeCILL licence see libIGCM/libIGCM_CeCILL.LIC # #************************************************************** #========================================================= # The documentation of this file can be automatically generated # if you use the prefix #D- for comments to be extracted. # Extract with command: cat lib* | grep "^#D-" | cut -c "4-" #========================================================= #D-#================================================== #D-LibIGCM_sys for Jean-Zay #D-#================================================== #D- #D- This ksh library if a layer under some usefull #D-environment variables and shell commands. #D-All those definitions depend on host particularities. #D-It manages a stack mechanism and test validity of operations. #D-All function described bellow must be prefixed by IGCM_sys. #==================================================== # libIGCM_sys PARAMETERS #==================================================== #==================================================== # set DEBUG_sys to true to output calls of function typeset -r DEBUG_sys=${DEBUG_sys:=true} #==================================================== # Turn in dry run mode ? (sys_Put_Rest, sys_Put_Out, sys_Get) typeset -r DRYRUN=${DRYRUN:=0} # YOU MUST COMPILE YOUR EXE FILES FOR DRYRUN MODE ! # ------------------------------------------------------------------------------------- # | DRYRUN= | Date computations, | sys_Get | Exe | sys_Put_Out; sys_Put_Rest | # | | Cp/Exe/param/files | | | | # | | Chmod Qsub | | | | # ------------------------------------------------------------------------------------- # | 0 | yes | yes | yes | yes | # ------------------------------------------------------------------------------------- # | 1 | yes | yes | yes | no | # ------------------------------------------------------------------------------------- # | 2 | yes | yes | no | no | # ------------------------------------------------------------------------------------- # | 3 | yes | no | no | no | # ------------------------------------------------------------------------------------- #===================================================== # Global Variables : #===================================================== # Language : "fr" or "en" typeset -r MYLANG="fr" #===================================================== # Host user names project maxCpuTime # $hostname ou hostname typeset HOST=${HOST:=$( hostname )} # $username ou whoami typeset LOGIN=${LOGIN:=$( whoami )} # $hostname of the MASTER job typeset MASTER=jeanzay # add default project set by ins_job typeset PROJECT=$( echo ${SLURM_JOB_ACCOUNT:=::default_project::} | cut -c "1-3" ) ### jobWarningDelay in seconds ##typeset jobWarningDelay=${BRIDGE_MSUB_MAXTIME} #D- #D-#================================================== #D-Program used in libIGCM #D-#================================================== # Submit command typeset SUBMIT=${SUBMIT:=sbatch} # rsync with path typeset -r RSYNC=/usr/bin/rsync # RSYNC_opt args to rsync typeset -r RSYNC_opt="-va" # ie storage filesystem typeset -r STOREHOST=${MASTER} typeset -r REMOTE_RSYNC=/usr/bin/rsync #==================================================== # Access to module command #==================================================== # See ticket #348 # Following path should be used according to assist IDRIS . /gpfslocalsup/spack_soft/environment-modules/current/init/ksh # Following old path gives error messages while loading modules but it still works #. /usr/share/Modules/init/ksh #==================================================== # Set environment tools (ferret, nco, cdo, rebuild, ...) #==================================================== if [ X${TaskType} = Xcomputing ] ; then IGCM_debug_Print 1 "Modules will be loaded later in IGCM_sys_activ_variables." else module purge > /dev/null 2>&1 . /gpfswork/rech/psl/commun/MachineEnvironment/jeanzay/env_jeanzay > /dev/null 2>&1 . /gpfswork/rech/psl/commun/MachineEnvironment/jeanzay/env_atlas_jeanzay > /dev/null 2>&1 # export PCMDI_MP=/ccc/work/cont003/igcmg/igcmg/PCMDI-MP export UVCDAT_ANONYMOUS_LOG=FALSE fi # FYI [ ! X${TaskType} = Xchecking ] && IGCM_debug_Print 1 "List of loaded modules:" [ ! X${TaskType} = Xchecking ] && module list # For AddNoise (not yet AddPertu3DOCE) export PATH=${PATH}:/gpfswork/rech/psl/commun/Tools/AddNoise/bin # For rebuild export PATH=${PATH}:/gpfswork/rech/psl/commun/Tools/rebuild/modipsl_IOIPSL_PLUS_v2_2_4/bin #==================================================== # Host specific DIRECTORIES #==================================================== #==================================================== #- MirrorlibIGCM for frontend typeset -r MirrorlibIGCM=${MirrorlibIGCM:=false} #==================================================== #- libIGCM_POST for frontend typeset -r libIGCM_POST=${libIGCM} #==================================================== #- R_EXE (==> BIN_DIR = ${MODIPSL}/bin ) typeset -r R_EXE="${MODIPSL}/bin" #==================================================== #- SUBMIT_DIR : submission dir typeset -x SUBMIT_DIR=${SUBMIT_DIR:=${SLURM_SUBMIT_DIR}} #==================================================== #- IN typeset -r R_IN=${R_IN:=/gpfswork/rech/psl/commun/IGCM} #==================================================== #- RUN_DIR_PATH : Temporary working directory (=> TMP) typeset -r RUN_DIR_PATH=${RUN_DIR_PATH:=${SCRATCH}/RUN_DIR/${SLURM_JOBID}_${$}} #==================================================== #- OUTCOMMAND_PATH : tmp place to store command lines standard error and outputs typeset -r OUTCOMMAND_PATH=/tmp #==================================================== #- HOST_MPIRUN_COMMAND typeset -r HOST_MPIRUN_COMMAND=${HOST_MPIRUN_COMMAND:="/usr/bin/time srun "} #==================================================== #- Max number of arguments passed to nco operator or demigration command UNIX_MAX_LIMIT=360 #==================================================== #- set PackDefault to true PackDefault=true #==================================================== #- Default number of MPI task for IPSL coupled model #- required for backward compatibility #- DEFAULT_NUM_PROC_OCE=5 DEFAULT_NUM_PROC_CPL=1 (( DEFAULT_NUM_PROC_ATM = BATCH_NUM_PROC_TOT - DEFAULT_NUM_PROC_OCE - DEFAULT_NUM_PROC_CPL )) #==================================================== #- Number of computing cores per node #- NB_CORE_PER_NODE=40 #D-#================================================== #D-function IGCM_sys_defineArchives #D-* Purpose: #D-* Load dfldatadir module : According to project used for submission (default) or set in config.card (optional) #D-* Define ARCHIVE : Dedicated to large files #D-* Define STORAGE : Dedicated to small/medium files #D-* Define R_OUT : Output tree located on ARCHIVE #D-* Define R_FIG : Output tree located on STORAGE hosting figures (monsitoring and atlas, and/or small files) #D-* Define R_BUF : Output tree located on SCRATCH hosting files waiting for rebuild or pack processes #D-* if SpaceName=TEST everything is stored on SCRATCH #D-* Examples: #D- function IGCM_sys_defineArchives { IGCM_debug_PushStack "IGCM_sys_defineArchives" # Load dfldatadir depending on the project used for submission (default) or set in config.card (optional) if [ X${config_UserChoices_DataProject} = X ] || [ X${config_UserChoices_DataProject} = DEFAULT ]; then # The variable DataProject is not set in in section UserChoices in config.card or it is set to DEFAULT # Default option: Set DataProject according to the project used for submission DataProject=${PROJECT} else # Use the project set by the variable DataProject in section UserChoices in config.card IGCM_debug_Print 1 "Use DataProject as set in config.card : ${config_UserChoices_DataProject}" DataProject=${config_UserChoices_DataProject} fi IGCM_debug_Print 1 "Data project for output to be used: ${DataProject}" #==================================================== #- RUN_DIR_PATH : Temporary working directory (=> TMP) #- DataProject = for is the dataproject for Training account at Idris #- they are not store in rech but in another partition if [ X${DataProject} = Xfor ]; then typeset -r RUN_DIR_PATH=$SCRATCH/RUN_DIR/${SLURM_JOBID}_${$} else typeset -r RUN_DIR_PATH=${RUN_DIR_PATH:=/gpfsscratch/rech/${DataProject}/${LOGIN}/RUN_DIR/${SLURM_JOBID}_${$}} fi if [ ! X${config_UserChoices_ARCHIVE} = X ]; then #==================================================== #- ARCHIVE (dedicated to large files) ARCHIVE=${config_UserChoices_ARCHIVE} else if [ X${DataProject} = Xfor ]; then ARCHIVE=$STORE else #==================================================== #- ARCHIVE (dedicated to large files) ARCHIVE=/gpfsstore/rech/${DataProject}/${LOGIN} fi fi if [ ! X${config_UserChoices_STORAGE} = X ]; then #==================================================== #- STORAGE (dedicated to small/medium files) STORAGE=${config_UserChoices_STORAGE} else #==================================================== #- STORAGE (dedicated to small/medium files) if [ X${DataProject} = Xfor ]; then STORAGE=$WORK else STORAGE=/gpfswork/rech/${DataProject}/${LOGIN} fi fi if [ X${config_UserChoices_SpaceName} = XTEST ]; then #==================================================== #- R_OUT if [ X${DataProject} = Xfor ]; then R_OUT=$SCRATCH/IGCM_OUT else R_OUT=/gpfsscratch/rech/${DataProject}/${LOGIN}/IGCM_OUT fi #==================================================== #- R_FIG (hosting figures : monitoring and atlas, and/or small files) R_FIG=${R_OUT} IGCM_debug_Print 1 "SpaceName=TEST ==> OVERRULE destination path directories" else #==================================================== #- R_OUT R_OUT=${ARCHIVE}/IGCM_OUT #==================================================== #- R_FIG (hosting figures : monitoring and atlas, and/or small files) R_FIG=${STORAGE}/IGCM_OUT fi #==================================================== #- CMIP6 (hosting CMIP6 files produced by XIOS2 and configured by dr2xml) CMIP6_BUF=${STORAGE}/IGCM_OUT #==================================================== #- R_BUF (ONLY FOR double copy an scratch) if [ X${DataProject} = Xfor ]; then R_BUF=$SCRATCH/IGCM_OUT else R_BUF=/gpfsscratch/rech/${DataProject}/${LOGIN}/IGCM_OUT fi IGCM_debug_Print 1 "R_OUT has been defined = ${R_OUT}" IGCM_debug_Print 1 "R_BUF has been defined = ${R_BUF}" IGCM_debug_Print 1 "R_FIG has been defined = ${R_FIG}" IGCM_debug_PopStack "IGCM_sys_defineArchives" } #D-#================================================== #D-function IGCM_sys_RshArchive #D-* Purpose: Archive rsh command #D-* Examples: #D- function IGCM_sys_RshArchive { IGCM_debug_PushStack "IGCM_sys_RshArchive" $@ /bin/ksh <<-EOF ${@} EOF status=$? if [ ${status} -gt 0 ] ; then IGCM_debug_Print 2 "IGCM_sys_RshArchive : command failed error code ${status}" IGCM_debug_Exit "IGCM_sys_RshArchive" fi IGCM_debug_PopStack "IGCM_sys_RshArchive" } #D-#================================================== #D-function IGCM_sys_RshArchive_NoError #D-* Purpose: Archive rsh command, without error #D-* used only in monitoring.job #D-* Examples: #D- function IGCM_sys_RshArchive_NoError { IGCM_debug_PushStack "IGCM_sys_RshArchive_NoError" $@ /bin/ksh <<-EOF ${@} 2> /dev/null EOF IGCM_debug_PopStack "IGCM_sys_RshArchive_NoError" } #D-#================================================== #D-function IGCM_sys_MkdirArchive #D-* Purpose: Mkdir on Archive #D-* Examples: #D- function IGCM_sys_MkdirArchive { IGCM_debug_PushStack "IGCM_sys_MkdirArchive" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_MkdirArchive :" $@ fi #- creation de repertoire sur le serveur fichier if [ ! -d ${1} ]; then \mkdir -p $1 status=$? if [ ${status} -gt 0 ] ; then IGCM_debug_Print 2 "IGCM_sys_MkdirArchive : mkdir failed error code ${status}" IGCM_debug_Exit "IGCM_sys_MkdirArchive" fi fi IGCM_debug_PopStack "IGCM_sys_MkdirArchive" } #D-#================================================== #D-function IGCM_sys_TestDirArchive #D-* Purpose: Test Directory that must exists on Archive #D-* Examples: #D- function IGCM_sys_TestDirArchive { IGCM_debug_PushStack "IGCM_sys_TestDirArchive" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_TestDirArchive :" $@ fi typeset ExistFlag ExistFlag=$( [ -d $1 ] && echo 0 || echo 1 ) IGCM_debug_PopStack "IGCM_sys_TestDirArchive" return ${ExistFlag} } #D-#================================================== #D-function IGCM_sys_IsFileArchived #D-* Purpose: Test file that must NOT EXISTS on Archive based on filename only #D-* Examples: #D- function IGCM_sys_IsFileArchived { IGCM_debug_PushStack "IGCM_sys_IsFileArchived" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_IsFileArchived :" $@ fi typeset IsArchivedFlag # To be implemented on Jean Zay # IsArchivedFlag=$( [ X$( echo $@ | grep ^\/ccc\/store ) != X ] && echo 0 || echo 1 ) IGCM_debug_PopStack "IGCM_sys_IsFileArchived" return ${IsArchivedFlag} } #D-#================================================== #D-function IGCM_sys_TestFileArchive #D-* Purpose: Test file that must NOT EXISTS on Archive #D-* Examples: #D- function IGCM_sys_TestFileArchive { IGCM_debug_PushStack "IGCM_sys_TestFileArchive" $@ typeset ExistFlag ExistFlag=$( [ -f $1 ] && echo 0 || echo 1 ) IGCM_debug_PopStack "IGCM_sys_TestFileArchive" return ${ExistFlag} } #D-#================================================== #D-function IGCM_sys_CountFileArchive #D-* Purpose: Count files on Archive filesystem #D-* Examples: #D- function IGCM_sys_CountFileArchive { IGCM_debug_PushStack "IGCM_sys_CountFileArchive" $@ ls ${@} 2>/dev/null | wc -l if [ $? -gt 0 ] ; then echo "IGCM_sys_CountFileArchive : erreur." fi IGCM_debug_PopStack "IGCM_sys_CountFileArchive" } #D-#================================================== #D-function IGCM_sys_Tree #D-* Purpose: Tree directories with files on ${ARCHIVE} #D-* Examples: IGCM_sys_Tree ${R_IN} ${R_OUT} #D- function IGCM_sys_Tree { IGCM_debug_PushStack "IGCM_sys_Tree" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_Tree :" $@ fi \ls -lR ${@} IGCM_debug_PopStack "IGCM_sys_Tree" } #D-#================================================== #D-function IGCM_sys_Qsub #D-* Purpose: Qsub new job #D-* Examples: #D- function IGCM_sys_Qsub { IGCM_debug_PushStack "IGCM_sys_Qsub" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_Qsub :" $@ fi typeset options status options="-o ${SUBMIT_DIR}/${Script_Output} -e ${SUBMIT_DIR}/${Script_Output}" /usr/bin/time sbatch ${options} $1 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 status=$? cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ if [ ${status} -gt 0 ] ; then IGCM_debug_Print 2 "IGCM_sys_Qsub ${options} $1 : error code ${status}" IGCM_debug_Exit "IGCM_sys_Qsub" else JobID=$( gawk {'print $4'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) fi IGCM_debug_PopStack "IGCM_sys_Qsub" } #D-#================================================== #D-function IGCM_sys_QsubPost #D-* Purpose: Qsub new job on scalaire #D-* Examples: #D- function IGCM_sys_QsubPost { IGCM_debug_PushStack "IGCM_sys_QsubPost" $@ BAK_DIR=$PWD cd ${SUBMIT_DIR} if ( $DEBUG_sys ) ; then echo "IGCM_sys_QsubPost :" $@ fi typeset options status options="-o ${POST_DIR}/${Script_Post_Output}.out -e ${POST_DIR}/${Script_Post_Output}.out" sbatch ${options} ${libIGCM_POST}/$1.job > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 status=$? cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ if [ ${status} -gt 0 ] ; then IGCM_debug_Print 2 "IGCM_sys_QsubPost ${options} ${libIGCM_POST}/$1.job : error code ${status}" IGCM_debug_Exit "IGCM_sys_QsubPost" else JobID=$( gawk {'print $4'} ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ ) fi cd ${BAK_DIR} IGCM_debug_PopStack "IGCM_sys_QsubPost" } #D-************************* #D- File transfer functions #D-************************* #D- #D-#================================================== #D-function IGCM_sys_RmRunDir #D-* Purpose: rm tmpdir (dummy function most of the time batch #D- scheduler will do the job) #D-* Examples: #D- function IGCM_sys_RmRunDir { IGCM_debug_PushStack "IGCM_sys_RmRunDir" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_RmRunDir :" $@ fi typeset status echo rm $@ > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 \rm $@ >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 status=$? if [ ${status} -gt 0 ] ; then IGCM_debug_Print 1 "IGCM_sys_RmRunDir : rm error code is ${status}." cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ IGCM_debug_Exit "IGCM_sys_RmRunDir" else \rm ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ fi IGCM_debug_PopStack "IGCM_sys_RmRunDir" } #D-#================================================== #D-function IGCM_sys_Put_Dir #D-* Purpose: Copy a complete directory on $(ARCHIVE) #D-* Examples: #D- function IGCM_sys_Put_Dir { IGCM_debug_PushStack "IGCM_sys_Put_Dir" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_Put_Dir :" $@ fi if [ $DRYRUN = 0 ]; then if [ ! -d ${1} ] ; then echo "WARNING : IGCM_sys_Put_Dir ${1} DOES NOT EXIST ." IGCM_debug_PopStack "IGCM_sys_Put_Dir" return fi typeset status # Only if we use rsync #IGCM_sys_TestDirArchive $( dirname $2 ) # #USUAL WAY \cp -r $1 $2 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 status=$? if [ ${status} -gt 0 ] ; then IGCM_debug_Print 2 "IGCM_sys_Put_Dir : cp failed error code ${status}" cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ IGCM_debug_Exit "IGCM_sys_Put_Dir" else \rm ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ fi fi IGCM_debug_PopStack "IGCM_sys_Put_Dir" } #D-#================================================== #D-function IGCM_sys_Get_Dir #D-* Purpose: Copy a complete directory from ${ARCHIVE} #D-* Examples: #D- function IGCM_sys_Get_Dir { IGCM_debug_PushStack "IGCM_sys_Get_Dir" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_Get_Dir :" $@ fi if [ $DRYRUN = 0 ]; then typeset NB_ESSAI DELAI status i # number of tentative NB_ESSAI=3 # time delay between tentative DELAI=2 # # USUAL WAY # add here command to demigrate all offline files to reduce time of this command # actually on Jean Zay files are not archive on bands i=0 while [ $i -lt $NB_ESSAI ] ; do \cp -ur $1 $2 >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 status=$? if [ ${status} -gt 0 ] ; then IGCM_debug_Print 2 "IGCM_sys_Get_Dir : cp failed error code ${status} ${i}/${NB_ESSAI}" IGCM_debug_Print 2 "IGCM_sys_Get_Dir : sleep ${DELAI} seconds and try again." sleep $DELAI else break fi (( i = i + 1 )) done if [ ${status} -gt 0 ] ; then IGCM_debug_Print 2 "IGCM_sys_Get_Dir : cp failed error code ${status}" cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ IGCM_debug_Exit "IGCM_sys_Get_Dir" else \rm ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ fi fi IGCM_debug_PopStack "IGCM_sys_Get_Dir" } #D-#================================================== #D-function IGCM_sys_Put_Rest #D-* Purpose: Put computied restarts on ${ARCHIVE}. #D- File and target directory must exist. #D-* Examples: #D- function IGCM_sys_Put_Rest { IGCM_debug_PushStack "IGCM_sys_Put_Rest" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_Put_Rest :" $@ fi if [ $DRYRUN = 0 ]; then if [ ! -f ${1} ] ; then echo "ERROR : IGCM_sys_Put_Rest ${1} DOES NOT EXIST ." IGCM_debug_Exit "IGCM_sys_Put_Rest" fi typeset status # # USUAL WAY \cp $1 $2 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 status=$? # #RSYNC WITH NETWORK SSH CALL # echo ${RSYNC} ${RSYNC_opt} -e ssh ${RUN_DIR}/$1 ${STOREHOST}:${2} > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # ${RSYNC} ${RSYNC_opt} -e ssh ${RUN_DIR}/$1 ${STOREHOST}:${2} >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # #RSYNC WITH NFS USE # echo ${RSYNC} ${RSYNC_opt} ${RUN_DIR}/$1 ${2} > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # ${RSYNC} ${RSYNC_opt} ${RUN_DIR}/$1 ${2} >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # status=$? # IGCM_sys_Rsync_out $status # ${libIGCM}/libIGCM_sys/IGCM_analyse_rsync_out.awk ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ # (( status=status+$? )) if [ ${status} -gt 0 ] ; then IGCM_debug_Print 2 "IGCM_sys_Put_Rest : cp failed error code ${status}" [ -f ${1} ] && ls -l ${1} [ -f ${2} ] && ls -l ${2} [ -f ${2}/${1} ] && ls -l ${2}/${1} cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ IGCM_debug_Exit "IGCM_sys_Put_Rest" else if [ X${JobType} = XRUN ] ; then [ -f ${2} ] && IGCM_sys_Chmod 444 ${2} [ -f ${2}/${1} ] && IGCM_sys_Chmod 444 ${2}/${1} fi \rm ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ fi fi IGCM_debug_PopStack "IGCM_sys_Put_Rest" } #D-#================================================== #D-function IGCM_sys_Put_Out #D-* Purpose: Copy a file on ${ARCHIVE} after having chmod it in readonly #D-* Examples: #D- function IGCM_sys_Put_Out { IGCM_debug_PushStack "IGCM_sys_Put_Out" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_Put_Out :" $@ fi typeset NB_ESSAI DELAI status i exist skip typeset fileDeviceNumberInHex directoryDeviceNumberInHex # number of tentative NB_ESSAI=3 # time delay between tentative DELAI=2 if [ $DRYRUN = 0 ]; then if [ ! -f ${1} ] ; then echo "WARNING : IGCM_sys_Put_Out ${1} DOES NOT EXIST ." IGCM_debug_PopStack "IGCM_sys_Put_Out" return 1 fi # IGCM_sys_MkdirArchive $( dirname $2 ) # exist=false skip=false if [ -f $2 ] ; then IGCM_debug_Print 1 "$2 already exist" # add here command to demigrate all offline files to reduce time of this command # actually on Jean Zay files are not archive on bands exist=true if [ "X$( diff $1 $2 )" = X ] ; then IGCM_debug_Print 2 "$1 and $2 are the same file, we skip the copy" skip=true else IGCM_debug_Print 2 "$1 and $2 are not the same file, we force the copy" skip=false fi fi # if ( [ X${exist} = Xtrue ] && [ X${skip} = Xfalse ] ) ; then IGCM_sys_Chmod u+w $2 fi if [ X${skip} = Xfalse ] ; then i=0 while [ $i -lt $NB_ESSAI ] ; do # Identify file system fileDeviceNumberInHex=$( stat -c %d $1 ) status=$? if [ ${status} -gt 0 ] ; then IGCM_debug_Exit "IGCM_sys_Put_Out" fi # Identify file system directoryDeviceNumberInHex=$( stat -c %d $( dirname $2 ) ) status=$? if [ ${status} -gt 0 ] ; then IGCM_debug_Exit "IGCM_sys_Put_Out" fi if ( [ ${fileDeviceNumberInHex} -ne ${directoryDeviceNumberInHex} ] || [ X$3 = XNOMOVE ] ) ; then # They are not on the same device. USUAL WAY \cp $1 $2 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 status=$? else # They are on the same device. NOT SO USUAL WAY \mv $1 $2 > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 status=$? fi if [ ${status} -gt 0 ]; then IGCM_debug_Print 2 "IGCM_sys_Put_Out : cp failed error code ${status} ${i}/${NB_ESSAI}" IGCM_debug_Print 2 "IGCM_sys_Put_Out : sleep ${DELAI} seconds and try again." [ -f ${1} ] && ls -l ${1} [ -f ${2} ] && ls -l ${2} [ -f ${2}/${1} ] && ls -l ${2}/${1} sleep $DELAI else break fi (( i = i + 1 )) done fi # #RSYNC WITH NETWORK SSH CALL # echo ${RSYNC} ${RSYNC_opt} -e ssh ${RUN_DIR}/$1 ${STOREHOST}:${2} > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # ${RSYNC} ${RSYNC_opt} -e ssh ${RUN_DIR}/$1 ${STOREHOST}:${2} >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # #RSYNC WITH NFS USE # echo ${RSYNC} ${RSYNC_opt} ${RUN_DIR}/$1 ${2} > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # ${RSYNC} ${RSYNC_opt} ${RUN_DIR}/$1 ${2} >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # status=$? # IGCM_sys_Rsync_out $status # ${libIGCM}/libIGCM_sys/IGCM_analyse_rsync_out.awk ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ # (( status=status+$? )) if [ ${status} -gt 0 ] ; then IGCM_debug_Print 2 "IGCM_sys_Put_Out : cp failed error code ${status}" [ -f ${1} ] && ls -l ${1} [ -f ${2} ] && ls -l ${2} [ -f ${2}/${1} ] && ls -l ${2}/${1} cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ IGCM_debug_Exit "IGCM_sys_Put_Out" else if [ X${JobType} = XRUN ] ; then if [ X${3} = X ] ; then [ -f ${2} ] && IGCM_sys_Chmod 444 ${2} [ -f ${2}/${1} ] && IGCM_sys_Chmod 444 ${2}/${1} fi fi \rm ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ fi fi IGCM_debug_PopStack "IGCM_sys_Put_Out" return 0 } #D-#================================================== #D-function IGCM_sys_Get #D-* Purpose: Get a file from ${ARCHIVE} #D-* Examples: IGCM_sys_Get myfile /destpath/myfile_with_PREFIX #D- IGCM_sys_Get /l Array_contain_myfiles /destpath/ function IGCM_sys_Get { IGCM_debug_PushStack "IGCM_sys_Get" $@ typeset DEST dm_liste target file_work typeset NB_ESSAI DELAI status i if ( $DEBUG_sys ) ; then echo "IGCM_sys_Get :" $@ fi # number of tentative NB_ESSAI=3 # time delay between tentative DELAI=2 if [ $DRYRUN -le 2 ]; then if [ X${1} = X'/l' ] ; then eval set +A dm_liste \${${2}} else eval set +A dm_liste ${1} fi eval DEST=\${${#}} # add here command to demigrate all offline files to reduce time of this command # actually on Jean Zay files are not archive on bands status=$? if [ ${status} -gt 0 ] ; then echo "WARNING IGCM_sys_Get : error code ${status}" cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ echo "WARNING IGCM_sys_Get : will stop later if the cp fails." fi #if [ ${status} -gt 0 ] ; then # if [ ! "X$( grep "Lost dmusrcmd connection" ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ )" = "X" ] ; then # cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ # echo "WARNING IGCM_sys_Get : Lost dmusrcmd connection : " # sleep 30 # echo "We try another time" ## dmget ${dm_liste[*]} > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # ccc_hsm get ${dm_liste[*]} > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # status=$? # if [ ${status} -gt 0 ] ; then # echo "ERROR IGCM_sys_Get : again demigration error :" # cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ # IGCM_debug_Exit "IGCM_sys_Get" # fi # else # echo "ERROR IGCM_sys_Get : demigration error :" # cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ # IGCM_debug_Exit "IGCM_sys_Get" # fi #fi # #RSYNC WITH NETWORK SSH CALL # echo ${RSYNC} ${RSYNC_opt} -e ssh ${STOREHOST}:"${dm_liste}" ${STOREHOST}:${RUN_DIR}/${DEST} > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # ${RSYNC} ${RSYNC_opt} -e ssh ${STOREHOST}:"${dm_liste}" ${STOREHOST}:${RUN_DIR}/${DEST} >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # #RSYNC WITH NFS USE # echo ${RSYNC} ${RSYNC_opt} ${dm_liste} ${RUN_DIR}/${DEST} > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # ${RSYNC} ${RSYNC_opt} ${dm_liste} ${RUN_DIR}/${DEST} >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # status=$? # IGCM_sys_Rsync_out $status # ${libIGCM}/libIGCM_sys/IGCM_analyse_rsync_out.awk ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ # (( status=status+$? )) #USUAL WAY if [ X${1} = X'/l' ] ; then for target in ${dm_liste[*]} ; do local_file=$( basename ${target} ) # test if the target file is present before the loop IGCM_sys_TestFileArchive ${target} status=$? if [ ${status} -gt 0 ] ; then echo "IGCM_sys_Get, ERROR : regular file ${target} DOES NOT EXIST ." IGCM_debug_Exit "IGCM_sys_Get" else i=0 while [ $i -lt $NB_ESSAI ] ; do #if [ X${DoLink} = Xtrue ] ; then # \ln -s ${target} ${DEST}/${local_file} >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # status=$? # else # \cp ${target} ${DEST}/${local_file} >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # status=$? #fi \ln -s ${target} ${DEST}/${local_file} >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 status=$? if [ ${status} -gt 0 ]; then IGCM_debug_Print 2 "IGCM_sys_Get : cp failed error code ${status} ${i}/${NB_ESSAI}" IGCM_debug_Print 2 "IGCM_sys_Get : sleep ${DELAI} seconds and try again." sleep $DELAI else break fi (( i = i + 1 )) done if [ ${status} -gt 0 ] ; then echo "IGCM_sys_Get : error" cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ \rm ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ IGCM_debug_Exit "IGCM_sys_Get" else \rm ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ fi fi done else i=0 while [ $i -lt $NB_ESSAI ] ; do \cp ${dm_liste} ${DEST} >> ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 status=$? if [ ${status} -gt 0 ]; then IGCM_debug_Print 2 "IGCM_sys_Get : cp failed error code ${status} ${i}/${NB_ESSAI}" IGCM_debug_Print 2 "IGCM_sys_Get : sleep ${DELAI} seconds and try again." sleep $DELAI else break fi (( i = i + 1 )) done if [ ${status} -gt 0 ] ; then echo "IGCM_sys_Get : error" cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ \rm ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ IGCM_debug_Exit "IGCM_sys_Get" else \rm ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ fi fi fi IGCM_debug_PopStack "IGCM_sys_Get" } #D-#================================================== #D-function IGCM_sys_GetDate_Monitoring #D-* Purpose: get the last year for which the monitoring has been computed #D-* Examples: #D- function IGCM_sys_GetDate_Monitoring { IGCM_debug_PushStack "IGCM_sys_GetDate_Monitoring" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_GetDate_Monitoring :" $@ fi eval ${2}=$( cdo showyear ${1} 2> /dev/null | gawk '{ print $NF }' ) IGCM_debug_PopStack "IGCM_sys_GetDate_Monitoring" } #D-#================================================== #D-function IGCM_sys_Dods_Rm #D-* Purpose: DO NOTHING ! Put ${ARCHIVE} files on DODS internet protocole. #D-* Examples: #D- function IGCM_sys_Dods_Rm { if ( $DEBUG_sys ) ; then echo "IGCM_sys_Dods_Rm :" $@ fi typeset status if [ $DRYRUN = 0 ]; then # if [ ! -d /dmnfs/cont003/dods/public/${LOGIN}/${R_DODS}/${1} ] ; then # echo "WARNING : IGCM_sys_Dods_Rm /dmnfs/cont003/dods/public/${LOGIN}/${R_DODS}/${1} DOES NOT EXIST ." # echo "Nothing has been done." # return # fi if [ "$#" -eq 1 ]; then thredds_rm ${R_DODS}/${1} status=$? else thredds_rm ${R_DODS} status=$? fi # if [ ${status} -gt 0 ] ; then # echo "IGCM_sys_Dods_Rm : error." # cat out_dods_rm # IGCM_debug_Exit "IGCM_sys_Dods_Rm" # else # rm out_dods_rm # fi fi return $status } #D-#================================================== #D-function IGCM_sys_Dods_Cp #D-* Purpose: Copy $(ARCHIVE) files on DODS internet protocole. #D-* Examples: #D- function IGCM_sys_Dods_Cp { if ( $DEBUG_sys ) ; then echo "IGCM_sys_Dods_Cp :" $@ fi typeset status if [ $DRYRUN = 0 ]; then # if [ ! -d ${R_SAVE}/${1} ] ; then # echo "WARNING : IGCM_sys_Dods_Cp ${R_SAVE}/${1} DOES NOT EXIST ." # echo "Nothing has been done." # return # fi thredds_cp ${1} ./${R_DODS}/. status=$? # if [ ${status} -gt 0 ] ; then # echo "IGCM_sys_Dods_Cp : error." # cat out_dods_cp # IGCM_debug_Exit "IGCM_sys_Dods_Cp" # else # rm out_dods_cp # fi fi return $status } #D-#================================================== #D-function IGCM_sys_Put_Dods #D-* Purpose: Put ${ARCHIVE} files on DODS internet protocole. #D-* Examples: #D- function IGCM_sys_Put_Dods { IGCM_debug_PushStack "IGCM_sys_Put_Dods" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_Put_Dods :" $@ fi typeset status if [ $DRYRUN = 0 ]; then if ( [ ! -d ${R_SAVE}/${1} ] && [ ! -d ${R_FIGR}/${1} ] ) ; then echo "WARNING IGCM_sys_Put_Dods : None of the following directories exist. Exactly one should." echo "WARNING IGCM_sys_Put_Dods : ${R_SAVE}/${1} DOES NOT EXIST." echo "WARNING IGCM_sys_Put_Dods : ${R_FIGR}/${1} DOES NOT EXIST." IGCM_debug_PopStack "IGCM_sys_Put_Dods" return fi if ( [ -d ${R_SAVE}/${1} ] && [ -d ${R_FIGR}/${1} ] ) ; then echo "WARNING IGCM_sys_Put_Dods : Both of the following directories exist. Exactly one should." echo "WARNING IGCM_sys_Put_Dods : ${R_SAVE}/${1} EXISTS." echo "WARNING IGCM_sys_Put_Dods : ${R_FIGR}/${1} EXISTS." IGCM_debug_PopStack "IGCM_sys_Put_Dods" return fi # if [ -d ${R_SAVE}/${1} ] ; then cd ${R_SAVE} elif [ -d ${R_FIGR}/${1} ] ; then cd ${R_FIGR} fi IGCM_sys_Dods_Rm ${1} IGCM_sys_Dods_Cp ${1} status=0 if [ ${status} -gt 0 ] ; then echo "IGCM_sys_Put_Dods : error." IGCM_debug_Exit "IGCM_sys_Put_Dods" fi fi IGCM_debug_PopStack "IGCM_sys_Put_Dods" } ############################################################## # REBUILD OPERATOR #D-#================================================== #D-function IGCM_sys_sync #D-* Purpose: flush buffer on disk #D-* Examples: #D- function IGCM_sys_sync { IGCM_debug_PushStack "IGCM_sys_sync" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_sync :" $@ fi /bin/sync IGCM_debug_PopStack "IGCM_sys_sync" } ############################################################ # Activate Running Environnment Variables #D-#================================================== #D-function IGCM_sys_activ_variables #D-* Purpose: set environement variables prior to execution #D-* Examples: #D- function IGCM_sys_activ_variables { IGCM_debug_PushStack "IGCM_sys_activ_variables" if ( $DEBUG_sys ) ; then echo "IGCM_sys_activ_variables" fi # -------------------------------------------------------------------- #D- MODULE specifications # -------------------------------------------------------------------- # Source the file EnvFile containing all module load needed to run the model. # EnvFile can be specified in config.card. If this is the case and if the file exists, # it will be used. Otherwise a default file will be used. if [ ! X${config_UserChoices_EnvFile} = X ] && [ -f ${config_UserChoices_EnvFile} ] ; then # EnvFile is set in config.card and the file exists IGCM_debug_Print 1 "EnvFile set in config.card will be used" EnvFile=${config_UserChoices_EnvFile} else IGCM_debug_Print 1 "IGCM_sys_active_variables : Default modules will be used" module purge > /dev/null 2>&1 EnvFile=/gpfswork/rech/psl/commun/MachineEnvironment/jeanzay/env_jeanzay fi IGCM_debug_Print 1 "IGCM_sys_active_variables : Following EnvFile will be used :" IGCM_debug_Print 1 ${EnvFile} . ${EnvFile} IGCM_debug_Print 1 "IGCM_sys_active_variables : Now loaded modules for Jean-Zay. " module list # -------------------------------------------------------------------- #D- MPI specifications # -------------------------------------------------------------------- # -------------------------------------------------------------------- #D- Other specifications # -------------------------------------------------------------------- ulimit -s unlimited if [ ${executionType} -eq 2 ] ; then export SLURM_HOSTFILE=./hostlist fi IGCM_debug_PopStack "IGCM_sys_activ_variables" } ############################################################ # Desactivate Running Environnment Variables #D-#================================================== #D-function IGCM_sys_desactiv_variables #D-* Purpose: unset environement variables after execution #D-* Examples: #D- function IGCM_sys_desactiv_variables { IGCM_debug_PushStack "IGCM_sys_desactiv_variables" if ( $DEBUG_sys ) ; then echo "IGCM_sys_desactiv_variables" fi # -------------------------------------------------------------------- #D- MPI specifications # -------------------------------------------------------------------- if [ ${executionType} -eq 2 ] ; then unset SLURM_HOSTFILE fi # -------------------------------------------------------------------- #D- Other specifications # -------------------------------------------------------------------- IGCM_debug_PopStack "IGCM_sys_desactiv_variables" } ############################################################ # Update job headers to be used by the scheduler #D-#================================================== #D-function IGCM_sys_updateHeaders #D-* Purpose: Update job headers to be used by the scheduler #D-* Examples: IGCM_sys_updateHeaders /path/to/Job_MYEXP #D- function IGCM_sys_updateHeaders { IGCM_debug_PushStack "IGCM_sys_updateHeaders" if ( $DEBUG_sys ) ; then echo "IGCM_sys_updateHeaders" fi typeset file file=$1 if [ ${executionType} -eq 1 ] ; then # MPMD + MPI sed -e "/::openMPthreads::/d" \ -e "s/::JobNumProcTot::/${coreNumber}/" \ ${file} > ${file}.tmp elif [ ${executionType} -eq 2 ] ; then # MPMD + MPI + OMP (( nodeNumber = coreNumber / NB_CORE_PER_NODE )) [ $(( ${coreNumber} % ${NB_CORE_PER_NODE} )) -ne 0 ] && (( nodeNumber = nodeNumber + 1 )) sed -e "/::openMPthreads::/d" \ -e "s/::JobNumProcTot::/${mpiTasks}/" \ -e "/ntasks/i\#SBATCH --nodes=${nodeNumber}"\ -e "/ntasks/i\#SBATCH --exclusive" \ ${file} > ${file}.tmp elif [ ${executionType} -eq 3 ] ; then # SPMD + MPI/OMP sed -e "s/::openMPthreads::/${openMPthreads}/" \ -e "s/::JobNumProcTot::/${mpiTasks}/" \ ${file} > ${file}.tmp elif [ ${executionType} -eq 4 ] ; then # SPMD + MPI only sed -e "s/::JobNumProcTot::/${mpiTasks}/" \ -e "/::openMPthreads::/d" \ ${file} > ${file}.tmp elif [ ${executionType} -eq 5 ] ; then # SPMD + OMP only sed -e "s/::openMPthreads::/${openMPthreads}/" \ -e "/::JobNumProcTot::/d" \ ${file} > ${file}.tmp elif [ ${executionType} -eq 6 ] ; then # SEQUENTIAL THEN sed -e "s/::JobNumProcTot::/1/" \ -e "/::openMPthreads::/d" \ ${file} > ${file}.tmp fi IGCM_sys_Mv ${file}.tmp ${file} IGCM_debug_PopStack "IGCM_sys_updateHeaders" } ############################################################ # Build MPI/OMP scripts run file (dummy function) #D-#================================================== #D-function IGCM_sys_build_run_file #D-* Purpose: build run file (deprecated) #D-* Examples: #D- function IGCM_sys_build_run_file { IGCM_debug_Print 3 " dummy function : IGCM_sys_build_run_file " } ############################################################ # Build MPI/OMP scripts #D-#================================================== #D-function IGCM_sys_build_execution_scripts #D-* Purpose: build execution scripts to be launch by ${HOST_MPIRUN_COMMAND} #D-* Examples: #D- function IGCM_sys_build_execution_scripts { IGCM_debug_PushStack "IGCM_sys_build_execution_scripts" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_build_execution_scripts " $@ fi IGCM_debug_Print 1 "executionType= ${executionType}" EXECUTION=${HOST_MPIRUN_COMMAND} if ( ${OK_PARA_MPMD} ) ; then # MPMD mode # 1 MPI only : executionType=1 # 2 MPI/OpenMP : executionType=2 if [ -f run_file ] ; then IGCM_sys_Rm -f run_file fi touch run_file # case 1 : Only MPI (MPMD) if ( ! ${OK_PARA_OMP} ) ; then # Build run_file current_core=0 # First loop on the components for the coupler ie oasis (only if oasis3) # the coupler ie oasis3 must be the first one for comp in ${config_ListOfComponents[*]} ; do eval ExeNameIn=\${config_Executable_${comp}[0]} eval ExeNameOut=\${config_Executable_${comp}[1]} # Only if the component has an executable if ( [ "X${ExeNameOut}" != X\"\" ] ) ; then eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} (( end_core = ${current_core} + ${comp_proc_mpi_loc} - 1 )) echo "${current_core}-${end_core} ./${ExeNameOut}" >> run_file (( current_core = ${end_core} + 1 )) fi done EXECUTION="${HOST_MPIRUN_COMMAND} --multi-prog ./run_file" IGCM_sys_Chmod u+x run_file if ( $DEBUG_sys ) ; then echo "run_file contains : " cat run_file fi else # 2 MPI/OpenMP : executionType=2 # MPI-OpenMP (MPMD) # export SLURM_HOSTFILE=./hostlist # srun --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file # example of hostlist file : # r3i3n33 # r3i3n33 # ... # example of run_file : # 0-70 ./prog_lmdz.x.sh %o %t # 71-430 ./prog_opa.xx.sh %o %t # 431-431 ./prog_xios.x.sh %o %t # examples of prog_file : # prog_lmdz.x.sh : # (( init = 0 + $1 )) # (( index = init * 10 )) # (( slot = index % 40 )) # taskset -c $slot-$((slot + 10 - 1)) ./script_lmdz.x.ksh # that will become # taskset -c 0-9 ./script_lmdz.x.ksh # ... # with script_lmdz.x.ksh # export OMP_STACKSIZE=3g # export OMP_PLACES=cores # export OMP_NUM_THREADS=10 # ./lmdz.x > out_lmdz.x.out.${SLURM_PROCID} 2>out_lmdz.x.err.${SLURM_PROCID} # Hosts treatment _bkIFS=$IFS; IFS=$'\n'; set -f listnodes=($(< <( scontrol show hostnames $SLURM_JOB_NODELIST ))) IFS=$_bkIFS; set +f rm -f hostlist # Loop on the components to build run_file and script_exec files rank=0 current_core=0 current_core_mpi=0 for comp in ${config_ListOfComponents[*]} ; do eval ExeNameIn=\${config_Executable_${comp}[0]} eval ExeNameOut=\${config_Executable_${comp}[1]} # Not possible if oasis has an executable (i.e old version of oasis3) if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" = "XCPL" ] ) ; then IGCM_debug_Exit "ERROR MPMD with hybrid MPI-OpenMP is not available with oasis3 version" IGCM_debug_Print 2 "Only available with oasis3-MCT version coupler" IGCM_debug_Verif_Exit fi # Only if we really have an executable for the component : if [ "X${ExeNameOut}" != X\"\" ] ; then eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} eval comp_proc_omp_loc=\${${comp}_PROC_OMP} eval comp_proc_nod_loc=\${${comp}_PROC_NOD} # Build script files echo "#!/bin/ksh" > script_${ExeNameOut}.ksh echo "" >> script_${ExeNameOut}.ksh if [ ${comp_proc_omp_loc} -gt 1 ] ; then # Check if the number of threads is correct case ${comp_proc_omp_loc} in 2|4|5|10|20) IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" ;; *) IGCM_debug_Exit "ERROR with OMP parameters !" IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" IGCM_debug_Print 2 "Only 2,4,5,10,20 as number of OMP threads are possible " IGCM_debug_Verif_Exit ;; esac echo "export OMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh echo "export OMP_PLACES=cores" >> script_${ExeNameOut}.ksh echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh fi # to have out/err per process on different files echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${SLURM_PROCID} 2>out_${ExeNameOut}.err.\${SLURM_PROCID}" >> script_${ExeNameOut}.ksh IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh # Build run_file # Only if the component has an executable if ( [ "X${ExeNameOut}" != X\"\" ] ) ; then eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} (( end_core = ${current_core_mpi} + ${comp_proc_mpi_loc} - 1 )) echo "${current_core_mpi}-${end_core} ./prog_${ExeNameOut}.sh %o %t" >> run_file (( current_core_mpi = ${end_core} + 1 )) fi if [ ${comp_proc_nod_loc} -gt 1 ] ; then (( offset_comp_proc_loc = NB_CORE_PER_NODE / (comp_proc_mpi_loc / comp_proc_nod_loc) )) else (( offset_comp_proc_loc = comp_proc_omp_loc )) fi # Build configuration file echo "#!/bin/sh" > prog_${ExeNameOut}.sh echo "(( init = $current_core + \$1 ))" >> prog_${ExeNameOut}.sh echo "(( index = init * $comp_proc_omp_loc ))" >> prog_${ExeNameOut}.sh echo "(( slot = index % 40 ))" >> prog_${ExeNameOut}.sh echo "echo ${ExeNameOut} taskset -c \$slot"-"\$((slot + $comp_proc_omp_loc - 1))" >> prog_${ExeNameOut}.sh echo "taskset -c \$slot"-"\$((slot + $comp_proc_omp_loc - 1)) ./script_${ExeNameOut}.ksh" >> prog_${ExeNameOut}.sh IGCM_sys_Chmod u+x prog_${ExeNameOut}.sh # Build hostlist file for nb_proc_mpi in `seq 0 $(($comp_proc_mpi_loc-1))`; do (( index_host = current_core / NB_CORE_PER_NODE )) host_value=${listnodes[${index_host}]} echo "$host_value" >> hostlist (( current_core = current_core + offset_comp_proc_loc )) done fi done ## variable added to stop after 60s instead of 600s by default. ## This is used when no error comes from executables and when something stopped an executable without notice. export SLURM_WAIT=60 EXECUTION="${HOST_MPIRUN_COMMAND} --cpu-bind=none --distribution=arbitrary --multi-prog ./run_file" IGCM_sys_Chmod u+x run_file if ( $DEBUG_sys ) ; then echo "run_file contains : " cat run_file fi fi # if ${OK_PARA_MPMD} else # Only one executable (SPMD mode): executionType=3, 4, 5 and 6 for comp in ${config_ListOfComponents[*]} ; do # Only if we really have an executable for the component : eval ExeNameOut=\${config_Executable_${comp}[1]} if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${ExeNameOut}" != "Xinca.dat" ] ) ; then # Build script files echo "#!/bin/ksh" > script_${ExeNameOut}.ksh echo "" >> script_${ExeNameOut}.ksh IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh if ( ${OK_PARA_OMP} ) ; then eval comp_proc_omp_loc=\${${comp}_PROC_OMP} # Check if the number of threads is correct case ${comp_proc_omp_loc} in 2|4|5|10|20) IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" ;; *) IGCM_debug_Exit "ERROR with OMP parameters !" IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" IGCM_debug_Print 2 "Only 2,4,5,10,20 as number of OMP threads are possible " IGCM_debug_Verif_Exit ;; esac echo "" >> script_${ExeNameOut}.ksh echo "export OMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh echo "export OMP_PLACES=cores" >> script_${ExeNameOut}.ksh echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh fi eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} # To have out/err per process on different files echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${SLURM_PROCID} 2>out_${ExeNameOut}.err.\${SLURM_PROCID}" >> script_${ExeNameOut}.ksh EXECUTION="${HOST_MPIRUN_COMMAND} ./script_${ExeNameOut}.ksh" IGCM_debug_Print 1 "sys Irene : script_${ExeNameOut}.ksh contains" cat script_${ExeNameOut}.ksh fi done fi # ${OK_PARA_MPMD} IGCM_debug_Print 1 "sys Irene : execution command is " IGCM_debug_Print 1 "$EXECUTION" IGCM_debug_PopStack "IGCM_sys_build_execution_scripts" } #D-#================================================== #D-function IGCM_sys_check_path #D-* Purpose: check that RUN_DIR_PATH that will be removed on some machine #D-* do not point to an important use directory. Stop immediately in that case. #D-* Examples: #D- function IGCM_sys_check_path { IGCM_debug_PushStack "IGCM_sys_check_path" if ( $DEBUG_sys ) ; then echo "IGCM_sys_check_path" fi if ( [ X${RUN_DIR_PATH} = X${HOME} ] || [ X${RUN_DIR_PATH} = X${WORK} ] || [ X${RUN_DIR_PATH} = X${SCRATCH} ] || [ X${RUN_DIR_PATH} = X${STORE} ] ) ; then IGCM_debug_Print 1 "Variable RUN_DIR_PATH is pointing to an important directory : ${RUN_DIR_PATH}" IGCM_debug_Print 1 "Please check the RUN_DIR_PATH definition in your Job : Job_${config_UserChoices_JobName}" IGCM_debug_Exit "This will stop the job" elif ( [ X${RUN_DIR_PATH} = X${HOME} ] || [ X${RUN_DIR_PATH} = X/gpfswork/rech/${DataProject}/${LOGIN} ] || [ X${RUN_DIR_PATH} = X/gpfsscratch/rech/${DataProject}/${LOGIN} ] || [ X${RUN_DIR_PATH} = X/gpfsstore/rech/${DataProject}/${LOGIN} ] ) ; then IGCM_debug_Print 1 "Variable RUN_DIR_PATH is pointing to an important directory : ${RUN_DIR_PATH}" IGCM_debug_Print 1 "Please check the RUN_DIR_PATH definition in your Job : Job_${config_UserChoices_JobName}" IGCM_debug_Exit "This will stop the job" fi IGCM_debug_PopStack "IGCM_sys_check_path" } #D-#================================================== #D-function IGCM_sys_check_quota #D-* Purpose: check user quota. Stop the simulation if quota above 90% #D-* Examples: #D- function IGCM_sys_check_quota { IGCM_debug_PushStack "IGCM_sys_check_quota" echo "IGCM_sys_check_quota is desactivated on JeanZay because there is actually no way to check scratch quota on this computing center (2019.11.19)" # if ( $DEBUG_sys ) ; then # echo "IGCM_sys_check_quota" # fi # # Limit of quota (in %) # limit_quota=90 # # # Check of the volume # volume_quota=$(ccc_quota | grep ' scratch' | gawk '{print $2}') # volume_avail=$(ccc_quota | grep ' scratch' | gawk '{print $3}') # # if ( [ ! X${volume_quota} = X ] && [ ! ${volume_quota} = "-" ] ) ; then # # unit_avail=${volume_avail: -1} # unit_quota=${volume_quota: -1} # # if [ "${unit_quota}" = "*" ] ; then # IGCM_debug_Print 1 "Please, check your quota of volume on scratch" # IGCM_debug_Print 1 "More than 100% of your quota is used" # IGCM_debug_Print 1 "Use the ccc_quota command to check" # IGCM_debug_Print 1 "You must have more than 10% available to run" # IGCM_debug_Exit "Not enough space to run ! STOP HERE" # IGCM_debug_Verif_Exit # fi # # temp_avail=${volume_avail%%${volume_avail: -1}*} # temp_quota=${volume_quota%%${volume_quota: -1}*} # # if [ ! ${unit_avail} = ${unit_quota} ] ; then # # # Convertion # if [ ${volume_avail: -1} = "T" ] ; then # (( temp_avail = temp_avail * 1000000000000 )) # elif [ ${volume_avail: -1} = "G" ] ; then # (( temp_avail = temp_avail * 1000000000 )) # elif [ ${volume_avail: -1} = "M" ] ; then # (( temp_avail = temp_avail * 1000000 )) # elif [ ${volume_avail: -1} = "k" ] ; then # (( temp_avail = temp_avail * 1000 )) # else # (( temp_avail = volume_avail )) # fi # if [ ${volume_quota: -1} = "T" ] ; then # (( temp_quota = temp_quota * 1000000000000 )) # elif [ ${volume_quota: -1} = "G" ] ; then # (( temp_quota = temp_quota * 1000000000 )) # elif [ ${volume_quota: -1} = "M" ] ; then # (( temp_quota = temp_quota * 1000000 )) # elif [ ${volume_quota: -1} = "k" ] ; then # (( temp_quota = temp_quota * 1000 )) # else # (( temp_quota = volume_quota )) # fi # fi # # quota_volume=$(echo "scale=2 ; $temp_quota/$temp_avail*100" | bc) ## echo "volume ratio is " $quota_volume # # if [ ${quota_volume} -ge ${limit_quota} ] ; then # IGCM_debug_Print 1 "Please, check your quota of volume on scratch" # IGCM_debug_Print 1 "${quota_volume}% of your quota is used" # IGCM_debug_Print 1 "Use the ccc_quota command to check" # IGCM_debug_Print 1 "You must have more than 10% available to run" # IGCM_debug_Exit "Not enough space to run ! STOP HERE" # IGCM_debug_Verif_Exit # fi # # fi # ## Check of the number of inodes # # inode_quota=$(ccc_quota | grep ' scratch' | gawk '{print $6}') # inode_avail=$(ccc_quota | grep ' scratch' | gawk '{print $7}') # # if ( [ ! X${inode_quota} = X ] && [ ! ${inode_quota} = "-" ] ) ; then # # unit_avail=${inode_avail: -1} # unit_quota=${inode_quota: -1} # # if [ "${unit_quota}" = "*" ] ; then # IGCM_debug_Print 1 "Please, check your quota of inode on scratch" # IGCM_debug_Print 1 "More than 100% of your quota is used" # IGCM_debug_Print 1 "Use the ccc_quota command to check" # IGCM_debug_Print 1 "You must have more than 10% available to run" # IGCM_debug_Exit "Not enough space to run ! STOP HERE" # IGCM_debug_Verif_Exit # fi # # temp_avail=${inode_avail%%${inode_avail: -1}*} # temp_quota=${inode_quota%%${inode_quota: -1}*} # # if [ ! ${unit_avail} = ${unit_quota} ] ; then # # # Convertion # if [ ${inode_avail: -1} = "T" ] ; then # (( temp_avail = temp_avail * 1000000000000 )) # elif [ ${inode_avail: -1} = "G" ] ; then # (( temp_avail = temp_avail * 1000000000 )) # elif [ ${inode_avail: -1} = "M" ] ; then # (( temp_avail = temp_avail * 1000000 )) # elif [ ${inode_avail: -1} = "k" ] ; then # (( temp_avail = temp_avail * 1000 )) # else # (( temp_avail = inode_avail )) # fi # # if [ ${inode_quota: -1} = "T" ] ; then # (( temp_quota = temp_quota * 1000000000000 )) # elif [ ${inode_quota: -1} = "G" ] ; then # (( temp_quota = temp_quota * 1000000000 )) # elif [ ${inode_quota: -1} = "M" ] ; then # (( temp_quota = temp_quota * 1000000 )) # elif [ ${inode_quota: -1} = "k" ] ; then # (( temp_quota = temp_quota * 1000 )) # else # (( temp_quota = inode_quota )) # fi # fi # quota_inode=$(echo "scale=2 ; $temp_quota/$temp_avail*100" | bc) ## echo "inode ratio is " $quota_inode # # if [ ${quota_inode} -ge ${limit_quota} ] ; then # IGCM_debug_Print 1 "Please, check your quota of inode on scratch" # IGCM_debug_Print 1 "${quota_inode}% of your quota is used" # IGCM_debug_Print 1 "Use the ccc_quota command to check" # IGCM_debug_Print 1 "You must have more than 10% available to run" # IGCM_debug_Exit "Not enough space to run ! STOP HERE" # IGCM_debug_Verif_Exit # fi # fi IGCM_debug_PopStack "IGCM_sys_check_quota" } #D-#================================================== #D-function IGCM_sys_projectAccounting #D-* Purpose: store project accounting information in a file #D-* Examples: #D- function IGCM_sys_projectAccounting { IGCM_debug_PushStack "IGCM_sys_projectAccounting" if ( $DEBUG_sys ) ; then echo "IGCM_sys_check_quota" fi #need to implemented on Jean Zay touch $1 # ssh irene191 /usr/bin/ccc_myproject > $1 IGCM_debug_PopStack "IGCM_sys_projectAccounting" } #D-#================================================== #D-function IGCM_sys_getJobSchedulerID #D-* Purpose: Get the job ID during execution #D-* Examples: IGCM_sys_getJobSchedulerID jobSchedulerID #D- function IGCM_sys_getJobSchedulerID { IGCM_debug_PushStack "IGCM_sys_getJobSchedulerID" if ( $DEBUG_sys ) ; then echo "IGCM_sys_getJobSchedulerID" fi eval ${1}=${SLURM_JOBID} IGCM_debug_PopStack "IGCM_sys_getJobSchedulerID" } #D-#================================================== #D-function IGCM_sys_GetJobID #D-* Purpose: Get the job ID from the JobName #D-* Examples: IGCM_sys_GetJobID ${JobName} ${TargetUsr} JobID #D- function IGCM_sys_GetJobID { IGCM_debug_PushStack "IGCM_sys_GetJobID" if ( $DEBUG_sys ) ; then echo "IGCM_sys_GetJobID" fi ID=$( squeue -u $user | grep Job_${config_UserChoices_JobName} | gawk ' { print $1}' ) eval ${3}=${ID} IGCM_debug_PopStack "IGCM_sys_GetJobID" } #D-#================================================== #D-function IGCM_sys_CountJobInQueue #D-* Purpose: Count number of users job #D-* Examples: IGCM_sys_CountJobInQueue ${JobName} NbRun #D- function IGCM_sys_CountJobInQueue { IGCM_debug_PushStack "IGCM_sys_CountJobInQueue" if ( $DEBUG_sys ) ; then echo "IGCM_sys_CountJobInQueue" fi # With -f option, the full job name is given in the last column NbRun=$(squeue -u $user | \ grep -v JOBID | gawk 'BEGIN { x=0 } ( $NF ~ JobName ) { x=x+1 } END { print x }' ) eval ${2}=${NbRun} IGCM_debug_PopStack "IGCM_sys_CountJobInQueue" } #D-#================================================== #D-function IGCM_sys_ListJobInQueue #D-* Purpose: Produce a list of users computing jobs (excluding post-processing) #D-* Examples: IGCM_sys_ListJobInQueue ${User} JobNameList #D- function IGCM_sys_ListJobInQueue { IGCM_debug_PushStack "IGCM_sys_ListJobInQueue" if ( $DEBUG_sys ) ; then echo "IGCM_sys_ListJobInQueue" fi # to be implemented on Jean Zay set -A JobList $( squeue -u $user | gawk '{print $3}' | grep -v NAME) # # With -f option, the full job name is given in the last column # set -A JobList $( ccc_mstat -f | gawk -v User=$1 \ # '( $2 == User && \ # $NF != /TS/ && \ # $NF !~ /PACK/ && \ # $NF !~ /REBUILD/ && \ # $NF !~ /pack/ ) \ # { print $NF }' | sed -e "s/\(.*\)\.[0-9]*/\1/" ) # eval set -A ${2} ${JobList[*]} IGCM_debug_PopStack "IGCM_sys_ListJobInQueue" } #D-#================================================== #D-function IGCM_sys_atlas #D-* Purpose: encapsulate atlas call so as to manage error code and curie specificity #D-* Examples: #D- function IGCM_sys_atlas { IGCM_debug_PushStack "IGCM_sys_atlas" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_atlas :" $@ fi typeset status # To be implemented on Jean Zay # # \ccc_mprun atlas $@ > ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ 2>&1 # status=$? # if [ ${status} -gt 0 ] ; then # echo "IGCM_sys_atlas : error code ${status}" # cat ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ # \rm ${OUTCOMMAND_PATH}/out_command_${LOGIN}.$$ # IGCM_debug_PopStack "IGCM_sys_atlas" # return 1 # else # IGCM_debug_PopStack "IGCM_sys_atlas" # return 0 # fi # IGCM_debug_PopStack "IGCM_sys_atlas" } #D-#================================================== #D-function IGCM_sys_rebuild_nemo #D-* Purpose: rebuild nemo parallel files with nemo specific rebuild on curie #D-* Examples: #D- function IGCM_sys_rebuild_nemo { IGCM_debug_PushStack "IGCM_sys_rebuild_nemo" $@ if ( $DEBUG_sys ) ; then echo "IGCM_sys_rebuild_nemo :" $@ fi /gpfswork/rech/psl/commun/Tools/rebuild_nemo/bin/rebuild_nemo ${1} ${2} IGCM_debug_PopStack "IGCM_sys_rebuild_nemo" }