1 | #!/usr/bin/env bash |
---|
2 | |
---|
3 | #SBATCH -J sette |
---|
4 | #SBATCH -o sette.%j.out |
---|
5 | #SBATCH -e sette.%j.err |
---|
6 | #SBATCH --export=ALL |
---|
7 | #SBATCH --parsable |
---|
8 | #SBATCH --exclusive |
---|
9 | #SBATCH -N 1 |
---|
10 | |
---|
11 | ##SBATCH --time=00:30:00 |
---|
12 | #SBATCH --time=00:15:00 |
---|
13 | |
---|
14 | ##SBATCH -p debug64 |
---|
15 | ##SBATCH -A smer |
---|
16 | ##SBATCH --qos=debug |
---|
17 | |
---|
18 | #SBATCH -p normal256 |
---|
19 | ##SBATCH -A cmems |
---|
20 | ##SBATCH --qos=coper |
---|
21 | |
---|
22 | |
---|
23 | # Test specific settings. Do not hand edit these lines; the fcm_job.sh script will set these |
---|
24 | # (via sed operating on this template job file). |
---|
25 | # |
---|
26 | echo " "; |
---|
27 | export XIO_HOME="/home/ext/mr/smer/samsong/SRC/XIOS/xios-2.5/BEL_INTEL18_r1900" |
---|
28 | O_PER_NODE=32 |
---|
29 | X_PER_NODE=8 |
---|
30 | OCORES=NPROCS |
---|
31 | XCORES=NXIOPROCS |
---|
32 | if [ $OCORES -le 32 ] ; then O_PER_NODE=$OCORES; fi |
---|
33 | export SETTE_DIR=DEF_SETTE_DIR |
---|
34 | |
---|
35 | ############################################################### |
---|
36 | # |
---|
37 | # |
---|
38 | # load sette functions (only post_test_tidyup needed) |
---|
39 | # |
---|
40 | . ${SETTE_DIR}/all_functions.sh |
---|
41 | ############################################################### |
---|
42 | # |
---|
43 | # modules to load |
---|
44 | module purge |
---|
45 | module load intel/2018.5.274 |
---|
46 | module load intelmpi/2018.5.274 |
---|
47 | module load phdf5/1.10.5 |
---|
48 | module load netcdf_par/4.7.1 |
---|
49 | |
---|
50 | # Don't remove neither change the following line |
---|
51 | # BODY |
---|
52 | # |
---|
53 | # Test specific settings. Do not hand edit these lines; the fcm_job.sh script will set these |
---|
54 | # (via sed operating on this template job file). Note that the number of compute nodes required |
---|
55 | # is also set by the fcm_job.sh on the PBS select header line above. |
---|
56 | # |
---|
57 | # These variables are needed by post_test_tidyup function in all_functions.sh |
---|
58 | # |
---|
59 | export INPUT_DIR=DEF_INPUT_DIR |
---|
60 | export CONFIG_DIR=DEF_CONFIG_DIR |
---|
61 | export TOOLS_DIR=DEF_TOOLS_DIR |
---|
62 | export NEMO_VALIDATION_DIR=DEF_NEMO_VALIDATION |
---|
63 | export NEW_CONF=DEF_NEW_CONF |
---|
64 | export CMP_NAM=DEF_CMP_NAM |
---|
65 | export TEST_NAME=DEF_TEST_NAME |
---|
66 | export EXE_DIR=DEF_EXE_DIR |
---|
67 | ulimit -a |
---|
68 | ulimit -s unlimited |
---|
69 | # |
---|
70 | # end of set up |
---|
71 | ############################################################### |
---|
72 | # |
---|
73 | # change to the working directory |
---|
74 | # |
---|
75 | cd $EXE_DIR |
---|
76 | |
---|
77 | echo Running on host `hostname` |
---|
78 | echo Time is `date` |
---|
79 | echo Directory is `pwd` |
---|
80 | # |
---|
81 | # Run the parallel MPI executable |
---|
82 | # |
---|
83 | if [ $XCORES -gt 0 ]; then |
---|
84 | # |
---|
85 | # Run MPMD case |
---|
86 | # |
---|
87 | #XIOS will run on a separate node so will run in parallel queue |
---|
88 | if [ ! -f ./xios_server.exe ] && [ -f ${XIO_HOME}/bin/xios_server.exe ]; then |
---|
89 | cp ${XIO_HOME}/bin/xios_server.exe . |
---|
90 | fi |
---|
91 | if [ ! -f ./xios_server.exe ]; then |
---|
92 | echo "./xios_server.exe not found" |
---|
93 | echo "run aborted" |
---|
94 | exit |
---|
95 | fi |
---|
96 | |
---|
97 | # cat > mpmd.conf <<EOF |
---|
98 | #0-$((OCORES-1)) ./nemo |
---|
99 | #${OCORES}-39 ./xios_server.exe |
---|
100 | #EOF |
---|
101 | cat > mpmd.conf <<EOF |
---|
102 | -n ${OCORES} ./nemo |
---|
103 | -n ${XCORES} ./xios_server.exe |
---|
104 | EOF |
---|
105 | |
---|
106 | # echo time srun --cpu_bind=cores --mpi=pmi2 -m cyclic -n 40 --multi-prog ./mpmd.conf |
---|
107 | # time srun --cpu_bind=cores --mpi=pmi2 -m cyclic -n 40 --multi-prog ./mpmd.conf |
---|
108 | |
---|
109 | # Comm/Fabric |
---|
110 | # ----------- |
---|
111 | export DAPL_ACK_RETRY=7 |
---|
112 | export DAPL_ACK_TIMER=20 |
---|
113 | export DAPL_IB_SL=0 |
---|
114 | export DAPL_UCM_CQ_SIZE=8192 |
---|
115 | export DAPL_UCM_DREQ_RETRY=4 |
---|
116 | export DAPL_UCM_QP_SIZE=8192 |
---|
117 | export DAPL_UCM_REP_TIME=8000 |
---|
118 | export DAPL_UCM_RTU_TIME=8000 |
---|
119 | export DAPL_UCM_WAIT_TIME=10000 |
---|
120 | export I_MPI_CHECK_DAPL_PROVIDER_COMPATIBILITY=0 |
---|
121 | export I_MPI_CHECK_DAPL_PROVIDER_MISMATCH=none |
---|
122 | export I_MPI_DAPL_RDMA_MIXED=enable |
---|
123 | export I_MPI_DAPL_SCALABLE_PROGRESS=1 |
---|
124 | export I_MPI_DAPL_TRANSLATION_CACHE=1 |
---|
125 | export I_MPI_DAPL_UD_DIRECT_COPY_THRESHOLD=65536 |
---|
126 | export I_MPI_DAPL_UD=on |
---|
127 | export I_MPI_FABRICS=shm:dapl |
---|
128 | export I_MPI_DAPL_PROVIDER=ofa-v2-mlx5_0-1u |
---|
129 | export I_MPI_FALLBACK=disable |
---|
130 | export I_MPI_FALLBACK_DEVICE=disable |
---|
131 | export I_MPI_DYNAMIC_CONNECTION=1 |
---|
132 | export I_MPI_FAST_COLLECTIVES=1 |
---|
133 | export I_MPI_LARGE_SCALE_THRESHOLD=8192 |
---|
134 | # File system |
---|
135 | # ----------- |
---|
136 | export I_MPI_EXTRA_FILESYSTEM_LIST=lustre |
---|
137 | export I_MPI_EXTRA_FILESYSTEM=on |
---|
138 | # Slurm |
---|
139 | # ----- |
---|
140 | export I_MPI_HYDRA_BOOTSTRAP=slurm |
---|
141 | export I_MPI_SLURM_EXT=0 |
---|
142 | # Force kill job |
---|
143 | # -------------- |
---|
144 | export I_MPI_JOB_SIGNAL_PROPAGATION=on |
---|
145 | export I_MPI_JOB_ABORT_SIGNAL=9 |
---|
146 | # Extra |
---|
147 | # ----- |
---|
148 | export I_MPI_LIBRARY_KIND=release_mt |
---|
149 | export EC_MPI_ATEXIT=0 |
---|
150 | export EC_PROFILE_HEAP=0 |
---|
151 | # Process placement (cyclic) |
---|
152 | # -------------------------- |
---|
153 | export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=off |
---|
154 | export I_MPI_PERHOST=1 |
---|
155 | # Process pinning |
---|
156 | # --------------- |
---|
157 | export I_MPI_PIN=enable |
---|
158 | export I_MPI_PIN_PROCESSOR_LIST="allcores:map=scatter" # map=spread |
---|
159 | |
---|
160 | echo time mpiexec.hydra -configfile ./mpmd.conf |
---|
161 | time mpiexec.hydra -configfile ./mpmd.conf |
---|
162 | # |
---|
163 | else |
---|
164 | # |
---|
165 | # Run SPMD case |
---|
166 | # |
---|
167 | echo time srun --cpu_bind=cores --mpi=pmi2 -m cyclic -n ${OCORES} ./nemo |
---|
168 | time srun --cpu_bind=cores --mpi=pmi2 -m cyclic -n ${OCORES} ./nemo |
---|
169 | fi |
---|
170 | # |
---|
171 | |
---|
172 | # |
---|
173 | post_test_tidyup |
---|
174 | # END_BODY |
---|
175 | # Don't remove neither change the previous line |
---|
176 | exit |
---|
177 | |
---|