1 | #!/usr/bin/env bash |
---|
2 | |
---|
3 | #SBATCH -J sette |
---|
4 | #SBATCH -o sette.%j.out |
---|
5 | #SBATCH -e sette.%j.err |
---|
6 | #SBATCH --export=ALL |
---|
7 | #SBATCH --parsable |
---|
8 | #SBATCH --exclusive |
---|
9 | #SBATCH -N 1 |
---|
10 | |
---|
11 | #SBATCH -p normal256 |
---|
12 | #SBATCH --time=01:00:00 |
---|
13 | ##SBATCH --time=00:15:00 |
---|
14 | |
---|
15 | #SBATCH -A smer |
---|
16 | ##SBATCH -A cmems |
---|
17 | |
---|
18 | #SBATCH --qos=normal |
---|
19 | ##SBATCH --qos=coper |
---|
20 | |
---|
21 | |
---|
22 | # Test specific settings. Do not hand edit these lines; the fcm_job.sh script will set these |
---|
23 | # (via sed operating on this template job file). |
---|
24 | # |
---|
25 | echo " "; |
---|
26 | OCORES=NPROCS |
---|
27 | XCORES=NXIOPROCS |
---|
28 | O_PER_NODE=32 |
---|
29 | X_PER_NODE=8 |
---|
30 | if [ $XCORES -le $X_PER_NODE ]; then X_PER_NODE=$XCORES; fi |
---|
31 | if [ $OCORES -le $O_PER_NODE ]; then O_PER_NODE=$OCORES; fi |
---|
32 | export SETTE_DIR=DEF_SETTE_DIR |
---|
33 | |
---|
34 | ############################################################### |
---|
35 | # |
---|
36 | # |
---|
37 | # load sette functions (only post_test_tidyup needed) |
---|
38 | # |
---|
39 | . ${SETTE_DIR}/all_functions.sh |
---|
40 | ############################################################### |
---|
41 | # |
---|
42 | # modules to load |
---|
43 | module purge |
---|
44 | module load intel/2018.5.274 |
---|
45 | module load intelmpi/2018.5.274 |
---|
46 | module load phdf5/1.8.18 |
---|
47 | module load netcdf_par/4.7.1_V2 |
---|
48 | module load xios-2.5_rev1903 |
---|
49 | export XIO_HOME=${XIOS_DIR} |
---|
50 | |
---|
51 | # Don't remove neither change the following line |
---|
52 | # BODY |
---|
53 | # |
---|
54 | # Test specific settings. Do not hand edit these lines; the fcm_job.sh script will set these |
---|
55 | # (via sed operating on this template job file). Note that the number of compute nodes required |
---|
56 | # is also set by the fcm_job.sh on the PBS select header line above. |
---|
57 | # |
---|
58 | # These variables are needed by post_test_tidyup function in all_functions.sh |
---|
59 | # |
---|
60 | export INPUT_DIR=DEF_INPUT_DIR |
---|
61 | export CONFIG_DIR=DEF_CONFIG_DIR |
---|
62 | export TOOLS_DIR=DEF_TOOLS_DIR |
---|
63 | export NEMO_VALIDATION_DIR=DEF_NEMO_VALIDATION |
---|
64 | export NEW_CONF=DEF_NEW_CONF |
---|
65 | export CMP_NAM=DEF_CMP_NAM |
---|
66 | export TEST_NAME=DEF_TEST_NAME |
---|
67 | export EXE_DIR=DEF_EXE_DIR |
---|
68 | ulimit -a |
---|
69 | ulimit -s unlimited |
---|
70 | # |
---|
71 | # end of set up |
---|
72 | ############################################################### |
---|
73 | # |
---|
74 | # change to the working directory |
---|
75 | # |
---|
76 | cd $EXE_DIR |
---|
77 | |
---|
78 | echo Running on host `hostname` |
---|
79 | echo Time is `date` |
---|
80 | echo Directory is `pwd` |
---|
81 | # |
---|
82 | # Run the parallel MPI executable |
---|
83 | # |
---|
84 | |
---|
85 | # Comm/Fabric |
---|
86 | # ----------- |
---|
87 | export DAPL_ACK_RETRY=7 |
---|
88 | export DAPL_ACK_TIMER=20 |
---|
89 | export DAPL_IB_SL=0 |
---|
90 | export DAPL_UCM_CQ_SIZE=8192 |
---|
91 | export DAPL_UCM_DREQ_RETRY=4 |
---|
92 | export DAPL_UCM_QP_SIZE=8192 |
---|
93 | export DAPL_UCM_REP_TIME=8000 |
---|
94 | export DAPL_UCM_RTU_TIME=8000 |
---|
95 | export DAPL_UCM_WAIT_TIME=10000 |
---|
96 | export I_MPI_CHECK_DAPL_PROVIDER_COMPATIBILITY=0 |
---|
97 | export I_MPI_CHECK_DAPL_PROVIDER_MISMATCH=none |
---|
98 | export I_MPI_DAPL_RDMA_MIXED=enable |
---|
99 | export I_MPI_DAPL_SCALABLE_PROGRESS=1 |
---|
100 | export I_MPI_DAPL_TRANSLATION_CACHE=1 |
---|
101 | export I_MPI_DAPL_UD_DIRECT_COPY_THRESHOLD=65536 |
---|
102 | export I_MPI_DAPL_UD=on |
---|
103 | export I_MPI_FABRICS=shm:dapl |
---|
104 | export I_MPI_DAPL_PROVIDER=ofa-v2-mlx5_0-1u |
---|
105 | export I_MPI_FALLBACK=disable |
---|
106 | export I_MPI_FALLBACK_DEVICE=disable |
---|
107 | export I_MPI_DYNAMIC_CONNECTION=1 |
---|
108 | export I_MPI_FAST_COLLECTIVES=1 |
---|
109 | export I_MPI_LARGE_SCALE_THRESHOLD=8192 |
---|
110 | # File system |
---|
111 | # ----------- |
---|
112 | export I_MPI_EXTRA_FILESYSTEM_LIST=lustre |
---|
113 | export I_MPI_EXTRA_FILESYSTEM=on |
---|
114 | # Slurm |
---|
115 | # ----- |
---|
116 | export I_MPI_HYDRA_BOOTSTRAP=slurm |
---|
117 | export I_MPI_SLURM_EXT=0 |
---|
118 | # Force kill job |
---|
119 | # -------------- |
---|
120 | export I_MPI_JOB_SIGNAL_PROPAGATION=on |
---|
121 | export I_MPI_JOB_ABORT_SIGNAL=9 |
---|
122 | # Extra |
---|
123 | # ----- |
---|
124 | export I_MPI_LIBRARY_KIND=release_mt |
---|
125 | export EC_MPI_ATEXIT=0 |
---|
126 | export EC_PROFILE_HEAP=0 |
---|
127 | # Process placement (cyclic) |
---|
128 | # -------------------------- |
---|
129 | export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=off |
---|
130 | export I_MPI_PERHOST=1 |
---|
131 | # Process pinning |
---|
132 | # --------------- |
---|
133 | export I_MPI_PIN=enable |
---|
134 | export I_MPI_PIN_PROCESSOR_LIST="allcores:map=scatter" # map=spread |
---|
135 | |
---|
136 | if [ $XCORES -gt 0 ]; then |
---|
137 | # |
---|
138 | # Run MPMD case |
---|
139 | # |
---|
140 | #XIOS will run on a separate node so will run in parallel queue |
---|
141 | if [ ! -f ./xios_server.exe ] && [ -f ${XIO_HOME}/bin/xios_server.exe ]; then |
---|
142 | cp ${XIO_HOME}/bin/xios_server.exe . |
---|
143 | fi |
---|
144 | if [ ! -f ./xios_server.exe ]; then |
---|
145 | echo "./xios_server.exe not found" |
---|
146 | echo "run aborted" |
---|
147 | exit |
---|
148 | fi |
---|
149 | |
---|
150 | # cat > mpmd.conf <<EOF |
---|
151 | #0-$((OCORES-1)) ./nemo |
---|
152 | #${OCORES}-39 ./xios_server.exe |
---|
153 | #EOF |
---|
154 | cat > mpmd.conf <<EOF |
---|
155 | -n ${OCORES} ./nemo |
---|
156 | -n ${XCORES} ./xios_server.exe |
---|
157 | EOF |
---|
158 | |
---|
159 | # echo time srun --cpu_bind=cores --mpi=pmi2 -m cyclic -n 40 --multi-prog ./mpmd.conf |
---|
160 | # time srun --cpu_bind=cores --mpi=pmi2 -m cyclic -n 40 --multi-prog ./mpmd.conf |
---|
161 | echo time mpiexec.hydra -configfile ./mpmd.conf |
---|
162 | time mpiexec.hydra -configfile ./mpmd.conf |
---|
163 | # |
---|
164 | else |
---|
165 | # |
---|
166 | # Run SPMD case |
---|
167 | # |
---|
168 | # echo time srun --cpu_bind=cores --mpi=pmi2 -m cyclic -n ${OCORES} ./nemo |
---|
169 | # time srun --cpu_bind=cores --mpi=pmi2 -m cyclic -n ${OCORES} ./nemo |
---|
170 | echo time mpiexec.hydra -n ${OCORES} ./nemo |
---|
171 | time mpiexec.hydra -n ${OCORES} ./nemo |
---|
172 | fi |
---|
173 | # |
---|
174 | |
---|
175 | # |
---|
176 | post_test_tidyup |
---|
177 | # END_BODY |
---|
178 | # Don't remove neither change the previous line |
---|
179 | exit |
---|
180 | |
---|