Changeset 1287
- Timestamp:
- 10/04/17 11:45:14 (5 years ago)
- Location:
- XIOS/dev/branch_openmp
- Files:
-
- 5 added
- 1 deleted
- 48 edited
Legend:
- Unmodified
- Added
- Removed
-
XIOS/dev/branch_openmp/arch/arch-X64_ADA.fcm
r1134 r1287 10 10 %PROD_CFLAGS -O3 -D BOOST_DISABLE_ASSERTS 11 11 %DEV_CFLAGS -g -traceback 12 %DEBUG_CFLAGS -DBZ_DEBUG -g -fno-inline 12 %DEBUG_CFLAGS -DBZ_DEBUG -g -fno-inline -std=c++11 13 13 14 14 %BASE_FFLAGS -D__NONE__ -
XIOS/dev/branch_openmp/bld.cfg
r1209 r1287 42 42 bld::target test_remap_omp.exe 43 43 bld::target test_unstruct_omp.exe 44 bld::target test_netcdf_omp.exe44 #bld::target test_netcdf_omp.exe 45 45 #bld::target test_client.exe 46 46 #bld::target test_complete.exe -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_alltoall.cpp
r1147 r1287 1 1 #include "ep_lib.hpp" 2 2 #include <mpi.h> 3 //#include "ep_declaration.hpp" 3 #include "ep_mpi.hpp" 4 4 5 5 6 namespace ep_lib 6 7 { 7 8 9 int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) 10 { 11 if(!comm.is_ep) 12 { 13 return ::MPI_Alltoall(sendbuf, sendcount, to_mpi_type(sendtype), recvbuf, recvcount, to_mpi_type(recvtype), to_mpi_comm(comm.mpi_comm)); 14 } 8 15 9 16 10 int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) 11 { 12 assert(static_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype)); 13 ::MPI_Aint typesize, llb; 14 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(sendtype), &llb, &typesize); 17 assert(valid_type(sendtype) && valid_type(recvtype)); 18 assert(sendcount == recvcount); 19 20 ::MPI_Aint datasize, llb; 21 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(sendtype), &llb, &datasize); 22 23 int count = sendcount; 15 24 16 int ep_size; 17 MPI_Comm_size(comm, &ep_size); 25 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 26 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 27 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 28 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 29 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 30 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 31 32 void* tmp_recvbuf; 33 if(ep_rank == 0) tmp_recvbuf = new void*[count * ep_size * ep_size * datasize]; 34 35 MPI_Gather(sendbuf, count*ep_size, sendtype, tmp_recvbuf, count*ep_size, recvtype, 0, comm); 36 18 37 38 39 // reorder tmp_buf 40 void* tmp_sendbuf; 41 if(ep_rank == 0) tmp_sendbuf = new void*[count * ep_size * ep_size * datasize]; 19 42 43 if(ep_rank == 0) 20 44 for(int i=0; i<ep_size; i++) 21 45 { 22 ep_lib::MPI_Gather(sendbuf+i*sendcount*typesize, sendcount, sendtype, recvbuf, recvcount, recvtype, i, comm); 46 for(int j=0; j<ep_size; j++) 47 { 48 //printf("tmp_recv[%d] = tmp_send[%d]\n", i*ep_size*count + j*count, j*ep_size*count + i*count); 49 50 memcpy(tmp_sendbuf + j*ep_size*count*datasize + i*count*datasize, tmp_recvbuf + i*ep_size*count*datasize + j*count*datasize, count*datasize); 51 } 23 52 } 24 53 54 MPI_Scatter(tmp_sendbuf, ep_size*count, sendtype, recvbuf, ep_size*recvcount, recvtype, 0, comm); 55 56 if(ep_rank == 0) 57 { 58 delete[] tmp_recvbuf; 59 delete[] tmp_sendbuf; 60 } 25 61 26 62 return 0; -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_barrier.cpp
r1185 r1287 27 27 return 0; 28 28 } 29 else if(comm.mpi_comm != MPI_COMM_NULL_STD)29 else if(comm.mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 30 30 { 31 31 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm> (comm.mpi_comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_bcast.cpp
r1134 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 #include "ep_mpi.hpp" 11 12 12 13 using namespace std; 13 14 14 15 15 namespace ep_lib 16 16 { 17 int MPI_Bcast_local(void *buffer, int count, MPI_Datatype datatype, MPI_Comm comm)18 {19 if(datatype == MPI_INT)20 {21 return MPI_Bcast_local_int(buffer, count, comm);22 }23 else if(datatype == MPI_FLOAT)24 {25 return MPI_Bcast_local_float(buffer, count, comm);26 }27 else if(datatype == MPI_DOUBLE)28 {29 return MPI_Bcast_local_double(buffer, count, comm);30 }31 else if(datatype == MPI_CHAR)32 {33 return MPI_Bcast_local_char(buffer, count, comm);34 }35 else if(datatype == MPI_LONG)36 {37 return MPI_Bcast_local_long(buffer, count, comm);38 }39 else if(datatype == MPI_UNSIGNED_LONG)40 {41 return MPI_Bcast_local_char(buffer, count, comm);42 }43 else44 {45 printf("MPI_Bcast Datatype not supported!\n");46 exit(0);47 }48 }49 50 int MPI_Bcast_local_int(void *buf, int count, MPI_Comm comm)51 {52 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;53 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;54 55 int *buffer = comm.my_buffer->buf_int;56 int *tmp = static_cast<int*>(buf);57 58 for(int j=0; j<count; j+=BUFFER_SIZE)59 {60 if(my_rank == 0)61 {62 #pragma omp critical (write_to_buffer)63 {64 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);65 }66 #pragma omp flush67 }68 69 MPI_Barrier_local(comm);70 71 72 73 if(my_rank != 0)74 {75 #pragma omp flush76 #pragma omp critical (read_from_buffer)77 {78 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);79 }80 }81 82 MPI_Barrier_local(comm);83 }84 }85 86 int MPI_Bcast_local_float(void *buf, int count, MPI_Comm comm)87 {88 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;89 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;90 91 float *buffer = comm.my_buffer->buf_float;92 float *tmp = static_cast<float*>(buf);93 94 for(int j=0; j<count; j+=BUFFER_SIZE)95 {96 if(my_rank == 0)97 {98 #pragma omp critical (write_to_buffer)99 {100 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);101 }102 #pragma omp flush103 }104 105 MPI_Barrier_local(comm);106 107 108 if(my_rank != 0)109 {110 #pragma omp flush111 #pragma omp critical (read_from_buffer)112 {113 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);114 }115 }116 117 MPI_Barrier_local(comm);118 }119 }120 121 int MPI_Bcast_local_double(void *buf, int count, MPI_Comm comm)122 {123 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;124 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;125 126 double *buffer = comm.my_buffer->buf_double;127 double *tmp = static_cast<double*>(buf);128 129 for(int j=0; j<count; j+=BUFFER_SIZE)130 {131 if(my_rank == 0)132 {133 #pragma omp critical (write_to_buffer)134 {135 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);136 }137 #pragma omp flush138 }139 140 MPI_Barrier_local(comm);141 142 143 if(my_rank != 0)144 {145 #pragma omp flush146 #pragma omp critical (read_from_buffer)147 {148 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);149 }150 }151 152 MPI_Barrier_local(comm);153 }154 }155 156 157 int MPI_Bcast_local_char(void *buf, int count, MPI_Comm comm)158 {159 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;160 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;161 162 char *buffer = comm.my_buffer->buf_char;163 char *tmp = static_cast<char*>(buf);164 165 for(int j=0; j<count; j+=BUFFER_SIZE)166 {167 if(my_rank == 0)168 {169 #pragma omp critical (write_to_buffer)170 {171 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);172 }173 #pragma omp flush174 }175 176 MPI_Barrier_local(comm);177 178 179 if(my_rank != 0)180 {181 #pragma omp flush182 #pragma omp critical (read_from_buffer)183 {184 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);185 }186 }187 188 MPI_Barrier_local(comm);189 }190 }191 192 int MPI_Bcast_local_long(void *buf, int count, MPI_Comm comm)193 {194 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;195 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;196 197 long *buffer = comm.my_buffer->buf_long;198 long *tmp = static_cast<long*>(buf);199 200 for(int j=0; j<count; j+=BUFFER_SIZE)201 {202 if(my_rank == 0)203 {204 #pragma omp critical (write_to_buffer)205 {206 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);207 }208 #pragma omp flush209 }210 211 MPI_Barrier_local(comm);212 213 214 if(my_rank != 0)215 {216 #pragma omp flush217 #pragma omp critical (read_from_buffer)218 {219 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);220 }221 }222 223 MPI_Barrier_local(comm);224 }225 }226 227 int MPI_Bcast_local_ulong(void *buf, int count, MPI_Comm comm)228 {229 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;230 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;231 232 unsigned long *buffer = comm.my_buffer->buf_ulong;233 unsigned long *tmp = static_cast<unsigned long*>(buf);234 235 for(int j=0; j<count; j+=BUFFER_SIZE)236 {237 if(my_rank == 0)238 {239 #pragma omp critical (write_to_buffer)240 {241 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);242 }243 #pragma omp flush244 }245 246 MPI_Barrier_local(comm);247 248 249 if(my_rank != 0)250 {251 #pragma omp flush252 #pragma omp critical (read_from_buffer)253 {254 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);255 }256 }257 258 MPI_Barrier_local(comm);259 }260 }261 262 17 263 18 int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm) … … 266 21 if(!comm.is_ep) 267 22 { 23 #pragma omp single nowait 268 24 ::MPI_Bcast(buffer, count, static_cast< ::MPI_Datatype>(datatype), root, static_cast< ::MPI_Comm>(comm.mpi_comm)); 269 25 return 0; … … 271 27 272 28 273 int ep_rank, ep_rank_loc, mpi_rank; 274 int ep_size, num_ep, mpi_size; 275 276 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 277 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 278 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 279 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 280 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 281 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 282 283 29 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 30 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 31 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 284 32 285 33 int root_mpi_rank = comm.rank_map->at(root).second; 286 34 int root_ep_rank_loc = comm.rank_map->at(root).first; 287 35 36 // printf("root_mpi_rank = %d\n", root_mpi_rank); 288 37 289 // if root is not master thread, send first to master 290 if(root_ep_rank_loc != 0 && mpi_rank == root_mpi_rank) 38 if((ep_rank_loc==0 && mpi_rank != root_mpi_rank ) || ep_rank == root) 291 39 { 292 innode_memcpy(root_ep_rank_loc, buffer, 0, buffer, count, datatype, comm);40 ::MPI_Bcast(buffer, count, to_mpi_type(datatype), root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 293 41 } 294 42 295 296 if(ep_rank_loc==0) 297 { 298 ::MPI_Bcast(buffer, count, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 299 } 300 301 MPI_Bcast_local(buffer, count, datatype, comm); 43 if(mpi_rank == root_mpi_rank) MPI_Bcast_local(buffer, count, datatype, root_ep_rank_loc, comm); 44 else MPI_Bcast_local(buffer, count, datatype, 0, comm); 302 45 303 46 return 0; … … 305 48 306 49 50 51 int MPI_Bcast_local(void *buffer, int count, MPI_Datatype datatype, int local_root, MPI_Comm comm) 52 { 53 assert(valid_type(datatype)); 54 55 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 56 57 ::MPI_Aint datasize, lb; 58 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 59 60 61 if(ep_rank_loc == local_root) 62 { 63 comm.my_buffer->void_buffer[local_root] = buffer; 64 } 65 66 // #pragma omp flush 67 MPI_Barrier_local(comm); 68 // #pragma omp flush 69 70 if(ep_rank_loc != local_root) 71 { 72 #pragma omp critical (_bcast) 73 memcpy(buffer, comm.my_buffer->void_buffer[local_root], datasize * count); 74 } 75 76 MPI_Barrier_local(comm); 77 } 78 307 79 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_create.cpp
r1134 r1287 52 52 } 53 53 54 ::MPI_Allgather(&num_ep, 1, MPI_INT _STD, &recv_num_ep[0], 1, MPI_INT_STD, mpi_base_comm);54 ::MPI_Allgather(&num_ep, 1, MPI_INT, &recv_num_ep[0], 1, MPI_INT, mpi_base_comm); 55 55 56 56 … … 63 63 64 64 out_comm_hdls[0].my_buffer = new BUFFER; 65 out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE];66 out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE];67 out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE];68 out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE];69 out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE];70 out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE];65 // out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE]; 66 // out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE]; 67 // out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE]; 68 // out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE]; 69 // out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE]; 70 // out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE]; 71 71 72 72 out_comm_hdls[0].rank_map = new RANK_MAP; … … 135 135 } 136 136 137 ::MPI_Allgather(&num_ep, 1, MPI_INT _STD, &recv_num_ep[0], 1, MPI_INT_STD, mpi_base_comm);137 ::MPI_Allgather(&num_ep, 1, MPI_INT, &recv_num_ep[0], 1, MPI_INT, mpi_base_comm); 138 138 139 139 int sum = 0; // representing total ep number of process with smaller rank … … 145 145 146 146 out_comm_hdls[0].my_buffer = new BUFFER; 147 out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE];148 out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE];149 out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE];150 out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE];151 out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE];152 out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE];147 // out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE]; 148 // out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE]; 149 // out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE]; 150 // out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE]; 151 // out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE]; 152 // out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE]; 153 153 154 154 out_comm_hdls[0].rank_map = new RANK_MAP; … … 222 222 } 223 223 224 ::MPI_Allgather(&num_ep, 1, MPI_INT _STD, &recv_num_ep[0], 1, MPI_INT_STD, mpi_base_comm);224 ::MPI_Allgather(&num_ep, 1, MPI_INT, &recv_num_ep[0], 1, MPI_INT, mpi_base_comm); 225 225 226 226 … … 233 233 234 234 out_comm_hdls[0].my_buffer = new BUFFER; 235 out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE];236 out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE];237 out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE];238 out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE];239 out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE];240 out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE];235 // out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE]; 236 // out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE]; 237 // out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE]; 238 // out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE]; 239 // out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE]; 240 // out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE]; 241 241 242 242 out_comm_hdls[0].rank_map = new RANK_MAP; -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_declaration.cpp
r1134 r1287 4 4 5 5 #include <mpi.h> 6 7 ::MPI_Comm MPI_COMM_WORLD_STD = MPI_COMM_WORLD; 8 #undef MPI_COMM_WORLD 9 10 11 ::MPI_Comm MPI_COMM_NULL_STD = MPI_COMM_NULL; 12 #undef MPI_COMM_NULL 13 14 15 ::MPI_Request MPI_REQUEST_NULL_STD = MPI_REQUEST_NULL; 16 #undef MPI_REQUEST_NULL 17 18 ::MPI_Info MPI_INFO_NULL_STD = MPI_INFO_NULL; 19 #undef MPI_INFO_NULL 20 21 ::MPI_Datatype MPI_INT_STD = MPI_INT; 22 ::MPI_Datatype MPI_FLOAT_STD = MPI_FLOAT; 23 ::MPI_Datatype MPI_DOUBLE_STD = MPI_DOUBLE; 24 ::MPI_Datatype MPI_LONG_STD = MPI_LONG; 25 ::MPI_Datatype MPI_CHAR_STD = MPI_CHAR; 26 ::MPI_Datatype MPI_UNSIGNED_LONG_STD = MPI_UNSIGNED_LONG; 27 ::MPI_Datatype MPI_UNSIGNED_CHAR_STD = MPI_UNSIGNED_CHAR; 28 29 #undef MPI_INT 30 #undef MPI_FLOAT 31 #undef MPI_DOUBLE 32 #undef MPI_LONG 33 #undef MPI_CHAR 34 #undef MPI_UNSIGNED_LONG 35 #undef MPI_UNSIGNED_CHAR 36 37 38 ::MPI_Op MPI_SUM_STD = MPI_SUM; 39 ::MPI_Op MPI_MAX_STD = MPI_MAX; 40 ::MPI_Op MPI_MIN_STD = MPI_MIN; 41 42 #undef MPI_SUM 43 #undef MPI_MAX 44 #undef MPI_MIN 45 6 46 7 47 #undef MPI_INT -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_declaration.hpp
r1160 r1287 1 1 #ifndef EP_DECLARATION_HPP_INCLUDED 2 2 #define EP_DECLARATION_HPP_INCLUDED 3 3 /* 4 4 extern ::MPI_Datatype MPI_INT_STD; 5 5 extern ::MPI_Datatype MPI_FLOAT_STD; … … 20 20 extern ::MPI_Request MPI_REQUEST_NULL_STD; 21 21 extern ::MPI_Info MPI_INFO_NULL_STD; 22 22 */ 23 23 #undef MPI_INT 24 24 #undef MPI_FLOAT … … 37 37 38 38 #undef MPI_REQUEST_NULL 39 40 41 42 39 #undef MPI_STATUS_IGNORE 40 #undef MPI_INFO_NULL 43 41 44 42 extern ep_lib::MPI_Datatype MPI_INT; … … 59 57 extern ep_lib::MPI_Status MPI_STATUS_IGNORE; 60 58 extern ep_lib::MPI_Request MPI_REQUEST_NULL; 61 //extern ep_lib::MPI_Info MPI_INFO_NULL; 59 extern ep_lib::MPI_Info MPI_INFO_NULL; 60 62 61 63 62 #endif // EP_DECLARATION_HPP_INCLUDED -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_dup.cpp
r1134 r1287 28 28 29 29 // for intracomm 30 if(comm.mpi_comm == MPI_COMM_NULL_STD) return 0;30 if(comm.mpi_comm == static_cast< ::MPI_Comm >(MPI_COMM_NULL.mpi_comm)) return 0; 31 31 32 32 … … 59 59 { 60 60 61 if(comm.mpi_comm == MPI_COMM_NULL_STD) return 0;61 if(comm.mpi_comm == static_cast< ::MPI_Comm >(MPI_COMM_NULL.mpi_comm)) return 0; 62 62 63 63 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_exscan.cpp
r1134 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 #include "ep_mpi.hpp" 11 12 12 13 using namespace std; … … 26 27 } 27 28 29 template<typename T> 30 void reduce_max(const T * buffer, T* recvbuf, int count) 31 { 32 transform(buffer, buffer+count, recvbuf, recvbuf, max_op<T>); 33 } 34 35 template<typename T> 36 void reduce_min(const T * buffer, T* recvbuf, int count) 37 { 38 transform(buffer, buffer+count, recvbuf, recvbuf, min_op<T>); 39 } 40 41 template<typename T> 42 void reduce_sum(const T * buffer, T* recvbuf, int count) 43 { 44 transform(buffer, buffer+count, recvbuf, recvbuf, std::plus<T>()); 45 } 46 47 28 48 int MPI_Exscan_local(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) 29 49 { 30 if(datatype == MPI_INT) 31 { 32 return MPI_Exscan_local_int(sendbuf, recvbuf, count, op, comm); 33 } 34 else if(datatype == MPI_FLOAT) 35 { 36 return MPI_Exscan_local_float(sendbuf, recvbuf, count, op, comm); 37 } 38 else if(datatype == MPI_DOUBLE) 39 { 40 return MPI_Exscan_local_double(sendbuf, recvbuf, count, op, comm); 41 } 42 else if(datatype == MPI_LONG) 43 { 44 return MPI_Exscan_local_long(sendbuf, recvbuf, count, op, comm); 45 } 46 else if(datatype == MPI_UNSIGNED_LONG) 47 { 48 return MPI_Exscan_local_ulong(sendbuf, recvbuf, count, op, comm); 49 } 50 else if(datatype == MPI_CHAR) 51 { 52 return MPI_Exscan_local_char(sendbuf, recvbuf, count, op, comm); 53 } 54 else 55 { 56 printf("MPI_Exscan Datatype not supported!\n"); 57 exit(0); 58 } 59 } 60 61 62 63 64 int MPI_Exscan_local_int(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 65 { 66 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 67 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 68 69 int *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_int; 70 int *send_buf = static_cast<int*>(const_cast<void*>(sendbuf)); 71 int *recv_buf = static_cast<int*>(recvbuf); 72 73 for(int j=0; j<count; j+=BUFFER_SIZE) 74 { 75 76 if(my_rank == 0) 77 { 78 79 #pragma omp critical (write_to_buffer) 80 { 81 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 82 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 83 #pragma omp flush 84 } 85 } 86 87 MPI_Barrier_local(comm); 88 89 for(int k=1; k<num_ep; k++) 90 { 91 #pragma omp critical (write_to_buffer) 92 { 93 if(my_rank == k) 94 { 95 #pragma omp flush 96 if(op == MPI_SUM) 97 { 98 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 99 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<int>()); 100 101 } 102 else if(op == MPI_MAX) 103 { 104 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 105 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<int>); 106 } 107 else if(op == MPI_MIN) 108 { 109 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 110 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<int>); 111 } 112 else 113 { 114 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 115 exit(1); 116 } 117 #pragma omp flush 118 } 119 } 120 121 MPI_Barrier_local(comm); 122 } 123 } 124 125 } 126 127 int MPI_Exscan_local_float(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 128 { 129 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 130 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 131 132 float *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_float; 133 float *send_buf = static_cast<float*>(const_cast<void*>(sendbuf)); 134 float *recv_buf = static_cast<float*>(recvbuf); 135 136 for(int j=0; j<count; j+=BUFFER_SIZE) 137 { 138 if(my_rank == 0) 139 { 140 141 #pragma omp critical (write_to_buffer) 142 { 143 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 144 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 145 #pragma omp flush 146 } 147 } 148 149 MPI_Barrier_local(comm); 150 151 for(int k=1; k<num_ep; k++) 152 { 153 #pragma omp critical (write_to_buffer) 154 { 155 if(my_rank == k) 156 { 157 #pragma omp flush 158 if(op == MPI_SUM) 159 { 160 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 161 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<float>()); 162 } 163 else if(op == MPI_MAX) 164 { 165 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 166 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<float>); 167 } 168 else if(op == MPI_MIN) 169 { 170 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 171 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<float>); 172 } 173 else 174 { 175 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 176 exit(1); 177 } 178 #pragma omp flush 179 } 180 } 181 182 MPI_Barrier_local(comm); 183 } 184 } 185 } 186 187 int MPI_Exscan_local_double(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 188 { 189 190 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 191 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 192 193 double *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_double; 194 double *send_buf = static_cast<double*>(const_cast<void*>(sendbuf)); 195 double *recv_buf = static_cast<double*>(recvbuf); 196 197 for(int j=0; j<count; j+=BUFFER_SIZE) 198 { 199 if(my_rank == 0) 200 { 201 202 #pragma omp critical (write_to_buffer) 203 { 204 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 205 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 206 #pragma omp flush 207 } 208 } 209 210 MPI_Barrier_local(comm); 211 212 for(int k=1; k<num_ep; k++) 213 { 214 #pragma omp critical (write_to_buffer) 215 { 216 if(my_rank == k) 217 { 218 #pragma omp flush 219 if(op == MPI_SUM) 220 { 221 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 222 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<double>()); 223 } 224 else if(op == MPI_MAX) 225 { 226 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 227 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<double>); 228 } 229 else if(op == MPI_MIN) 230 { 231 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 232 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<double>); 233 } 234 else 235 { 236 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 237 exit(1); 238 } 239 #pragma omp flush 240 } 241 } 242 243 MPI_Barrier_local(comm); 244 } 245 } 246 } 247 248 int MPI_Exscan_local_long(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 249 { 250 251 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 252 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 253 254 long *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_long; 255 long *send_buf = static_cast<long*>(const_cast<void*>(sendbuf)); 256 long *recv_buf = static_cast<long*>(recvbuf); 257 258 for(int j=0; j<count; j+=BUFFER_SIZE) 259 { 260 if(my_rank == 0) 261 { 262 263 #pragma omp critical (write_to_buffer) 264 { 265 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 266 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 267 #pragma omp flush 268 } 269 } 270 271 MPI_Barrier_local(comm); 272 273 for(int k=1; k<num_ep; k++) 274 { 275 #pragma omp critical (write_to_buffer) 276 { 277 if(my_rank == k) 278 { 279 #pragma omp flush 280 if(op == MPI_SUM) 281 { 282 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 283 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<long>()); 284 } 285 else if(op == MPI_MAX) 286 { 287 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 288 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<long>); 289 } 290 else if(op == MPI_MIN) 291 { 292 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 293 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<long>); 294 } 295 else 296 { 297 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 298 exit(1); 299 } 300 #pragma omp flush 301 } 302 } 303 304 MPI_Barrier_local(comm); 305 } 306 } 307 } 308 309 int MPI_Exscan_local_ulong(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 310 { 311 312 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 313 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 314 315 unsigned long *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_ulong; 316 unsigned long *send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf)); 317 unsigned long *recv_buf = static_cast<unsigned long*>(recvbuf); 318 319 for(int j=0; j<count; j+=BUFFER_SIZE) 320 { 321 if(my_rank == 0) 322 { 323 324 #pragma omp critical (write_to_buffer) 325 { 326 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 327 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 328 #pragma omp flush 329 } 330 } 331 332 MPI_Barrier_local(comm); 333 334 for(int k=1; k<num_ep; k++) 335 { 336 #pragma omp critical (write_to_buffer) 337 { 338 if(my_rank == k) 339 { 340 #pragma omp flush 341 if(op == MPI_SUM) 342 { 343 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 344 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<unsigned long>()); 345 } 346 else if(op == MPI_MAX) 347 { 348 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 349 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<unsigned long>); 350 } 351 else if(op == MPI_MIN) 352 { 353 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 354 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<unsigned long>); 355 } 356 else 357 { 358 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 359 exit(1); 360 } 361 #pragma omp flush 362 } 363 } 364 365 MPI_Barrier_local(comm); 366 } 367 } 368 } 369 370 int MPI_Exscan_local_char(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 371 { 372 373 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 374 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 375 376 char *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_char; 377 char *send_buf = static_cast<char*>(const_cast<void*>(sendbuf)); 378 char *recv_buf = static_cast<char*>(recvbuf); 379 380 for(int j=0; j<count; j+=BUFFER_SIZE) 381 { 382 if(my_rank == 0) 383 { 384 385 #pragma omp critical (write_to_buffer) 386 { 387 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 388 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 389 #pragma omp flush 390 } 391 } 392 393 MPI_Barrier_local(comm); 394 395 for(int k=1; k<num_ep; k++) 396 { 397 #pragma omp critical (write_to_buffer) 398 { 399 if(my_rank == k) 400 { 401 #pragma omp flush 402 if(op == MPI_SUM) 403 { 404 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 405 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<char>()); 406 } 407 else if(op == MPI_MAX) 408 { 409 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 410 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<char>); 411 } 412 else if(op == MPI_MIN) 413 { 414 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 415 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<char>); 416 } 417 else 418 { 419 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 420 exit(1); 421 } 422 #pragma omp flush 423 } 424 } 425 426 MPI_Barrier_local(comm); 427 } 428 } 429 } 430 50 valid_op(op); 51 52 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 53 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 54 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 55 56 57 ::MPI_Aint datasize, lb; 58 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 59 60 if(ep_rank_loc == 0 && mpi_rank != 0) 61 { 62 comm.my_buffer->void_buffer[0] = recvbuf; 63 } 64 if(ep_rank_loc == 0 && mpi_rank == 0) 65 { 66 comm.my_buffer->void_buffer[0] = const_cast<void*>(sendbuf); 67 } 68 69 70 MPI_Barrier_local(comm); 71 72 memcpy(recvbuf, comm.my_buffer->void_buffer[0], datasize*count); 73 74 MPI_Barrier_local(comm); 75 76 comm.my_buffer->void_buffer[ep_rank_loc] = const_cast<void*>(sendbuf); 77 78 MPI_Barrier_local(comm); 79 80 if(op == MPI_SUM) 81 { 82 if(datatype == MPI_INT && datasize == sizeof(int)) 83 { 84 for(int i=0; i<ep_rank_loc; i++) 85 reduce_sum<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 86 } 87 88 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 89 { 90 for(int i=0; i<ep_rank_loc; i++) 91 reduce_sum<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 92 } 93 94 95 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 96 { 97 for(int i=0; i<ep_rank_loc; i++) 98 reduce_sum<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 99 } 100 101 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 102 { 103 for(int i=0; i<ep_rank_loc; i++) 104 reduce_sum<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 105 } 106 107 else if(datatype == MPI_LONG && datasize == sizeof(long)) 108 { 109 for(int i=0; i<ep_rank_loc; i++) 110 reduce_sum<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 111 } 112 113 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 114 { 115 for(int i=0; i<ep_rank_loc; i++) 116 reduce_sum<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 117 } 118 119 else printf("datatype Error\n"); 120 121 122 } 123 124 else if(op == MPI_MAX) 125 { 126 if(datatype == MPI_INT && datasize == sizeof(int)) 127 for(int i=0; i<ep_rank_loc; i++) 128 reduce_max<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 129 130 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 131 for(int i=0; i<ep_rank_loc; i++) 132 reduce_max<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 133 134 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 135 for(int i=0; i<ep_rank_loc; i++) 136 reduce_max<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 137 138 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 139 for(int i=0; i<ep_rank_loc; i++) 140 reduce_max<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 141 142 else if(datatype == MPI_LONG && datasize == sizeof(long)) 143 for(int i=0; i<ep_rank_loc; i++) 144 reduce_max<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 145 146 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 147 for(int i=0; i<ep_rank_loc; i++) 148 reduce_max<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 149 150 else printf("datatype Error\n"); 151 } 152 153 else //if(op == MPI_MIN) 154 { 155 if(datatype == MPI_INT && datasize == sizeof(int)) 156 for(int i=0; i<ep_rank_loc; i++) 157 reduce_min<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 158 159 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 160 for(int i=0; i<ep_rank_loc; i++) 161 reduce_min<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 162 163 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 164 for(int i=0; i<ep_rank_loc; i++) 165 reduce_min<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 166 167 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 168 for(int i=0; i<ep_rank_loc; i++) 169 reduce_min<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 170 171 else if(datatype == MPI_LONG && datasize == sizeof(long)) 172 for(int i=0; i<ep_rank_loc; i++) 173 reduce_min<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 174 175 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 176 for(int i=0; i<ep_rank_loc; i++) 177 reduce_min<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 178 179 else printf("datatype Error\n"); 180 } 181 182 MPI_Barrier_local(comm); 183 184 } 431 185 432 186 int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) 433 187 { 434 435 188 if(!comm.is_ep) 436 189 { 437 ::MPI_Exscan(const_cast<void*>(sendbuf), recvbuf, count, static_cast< ::MPI_Datatype>(datatype), 438 static_cast< ::MPI_Op>(op), static_cast< ::MPI_Comm>(comm.mpi_comm)); 439 return 0; 440 } 441 if(!comm.mpi_comm) return 0; 442 443 int ep_rank, ep_rank_loc, mpi_rank; 444 int ep_size, num_ep, mpi_size; 445 446 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 447 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 448 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 449 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 450 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 451 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 452 453 190 return ::MPI_Scan(sendbuf, recvbuf, count, to_mpi_type(datatype), to_mpi_op(op), to_mpi_comm(comm.mpi_comm)); 191 } 192 193 valid_type(datatype); 194 195 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 196 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 197 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 198 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 199 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 200 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 454 201 455 202 ::MPI_Aint datasize, lb; 456 457 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 458 459 void* local_scan_recvbuf; 460 local_scan_recvbuf = new void*[datasize * count]; 461 462 463 // local scan 464 MPI_Exscan_local(sendbuf, recvbuf, count, datatype, op, comm); 465 466 // MPI_scan 467 void* local_sum; 468 void* mpi_scan_recvbuf; 469 470 471 mpi_scan_recvbuf = new void*[datasize*count]; 203 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 204 205 void* tmp_sendbuf; 206 tmp_sendbuf = new void*[datasize * count]; 207 208 int my_src = 0; 209 int my_dst = ep_rank; 210 211 std::vector<int> my_map(mpi_size, 0); 212 213 for(int i=0; i<comm.rank_map->size(); i++) my_map[comm.rank_map->at(i).second]++; 214 215 for(int i=0; i<mpi_rank; i++) my_src += my_map[i]; 216 my_src += ep_rank_loc; 217 218 219 for(int i=0; i<mpi_size; i++) 220 { 221 if(my_dst < my_map[i]) 222 { 223 my_dst = get_ep_rank(comm, my_dst, i); 224 break; 225 } 226 else 227 my_dst -= my_map[i]; 228 } 229 230 if(ep_rank != my_dst) 231 { 232 MPI_Request request[2]; 233 MPI_Status status[2]; 234 235 MPI_Isend(sendbuf, count, datatype, my_dst, my_dst, comm, &request[0]); 236 237 MPI_Irecv(tmp_sendbuf, count, datatype, my_src, ep_rank, comm, &request[1]); 238 239 MPI_Waitall(2, request, status); 240 } 241 242 else memcpy(tmp_sendbuf, sendbuf, datasize*count); 243 244 245 void* tmp_recvbuf; 246 tmp_recvbuf = new void*[datasize * count]; 247 248 MPI_Reduce_local(tmp_sendbuf, tmp_recvbuf, count, datatype, op, 0, comm); 472 249 473 250 if(ep_rank_loc == 0) 474 { 475 local_sum = new void*[datasize*count]; 476 } 477 478 479 MPI_Reduce_local(sendbuf, local_sum, count, datatype, op, comm); 480 481 if(ep_rank_loc == 0) 482 { 483 ::MPI_Exscan(local_sum, mpi_scan_recvbuf, count, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Op>(op), static_cast< ::MPI_Comm>(comm.mpi_comm)); 484 } 485 486 487 if(mpi_rank > 0) 488 { 489 MPI_Bcast_local(mpi_scan_recvbuf, count, datatype, comm); 490 } 491 492 493 if(datatype == MPI_DOUBLE) 494 { 495 double* sum_buf = static_cast<double*>(mpi_scan_recvbuf); 496 double* recv_buf = static_cast<double*>(recvbuf); 497 498 if(mpi_rank != 0) 499 { 500 if(op == MPI_SUM) 501 { 502 if(ep_rank_loc == 0) 503 { 504 copy(sum_buf, sum_buf+count, recv_buf); 505 } 506 else 507 { 508 for(int i=0; i<count; i++) 509 { 510 recv_buf[i] += sum_buf[i]; 511 } 512 } 513 } 514 else if (op == MPI_MAX) 515 { 516 if(ep_rank_loc == 0) 517 { 518 copy(sum_buf, sum_buf+count, recv_buf); 519 } 520 else 521 { 522 for(int i=0; i<count; i++) 523 { 524 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 525 } 526 } 527 } 528 else if(op == MPI_MIN) 529 { 530 if(ep_rank_loc == 0) 531 { 532 copy(sum_buf, sum_buf+count, recv_buf); 533 } 534 else 535 { 536 for(int i=0; i<count; i++) 537 { 538 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 539 } 540 } 541 } 542 else 543 { 544 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 545 exit(1); 546 } 547 } 548 549 delete[] static_cast<double*>(mpi_scan_recvbuf); 550 if(ep_rank_loc == 0) 551 { 552 delete[] static_cast<double*>(local_sum); 553 } 554 } 555 556 else if(datatype == MPI_FLOAT) 557 { 558 float* sum_buf = static_cast<float*>(mpi_scan_recvbuf); 559 float* recv_buf = static_cast<float*>(recvbuf); 560 561 if(mpi_rank != 0) 562 { 563 if(op == MPI_SUM) 564 { 565 if(ep_rank_loc == 0) 566 { 567 copy(sum_buf, sum_buf+count, recv_buf); 568 } 569 else 570 { 571 for(int i=0; i<count; i++) 572 { 573 recv_buf[i] += sum_buf[i]; 574 } 575 } 576 } 577 else if (op == MPI_MAX) 578 { 579 if(ep_rank_loc == 0) 580 { 581 copy(sum_buf, sum_buf+count, recv_buf); 582 } 583 else 584 { 585 for(int i=0; i<count; i++) 586 { 587 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 588 } 589 } 590 } 591 else if(op == MPI_MIN) 592 { 593 if(ep_rank_loc == 0) 594 { 595 copy(sum_buf, sum_buf+count, recv_buf); 596 } 597 else 598 { 599 for(int i=0; i<count; i++) 600 { 601 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 602 } 603 } 604 } 605 else 606 { 607 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 608 exit(1); 609 } 610 } 611 612 delete[] static_cast<float*>(mpi_scan_recvbuf); 613 if(ep_rank_loc == 0) 614 { 615 delete[] static_cast<float*>(local_sum); 616 } 617 } 618 619 else if(datatype == MPI_INT) 620 { 621 int* sum_buf = static_cast<int*>(mpi_scan_recvbuf); 622 int* recv_buf = static_cast<int*>(recvbuf); 623 624 if(mpi_rank != 0) 625 { 626 if(op == MPI_SUM) 627 { 628 if(ep_rank_loc == 0) 629 { 630 copy(sum_buf, sum_buf+count, recv_buf); 631 } 632 else 633 { 634 for(int i=0; i<count; i++) 635 { 636 recv_buf[i] += sum_buf[i]; 637 } 638 } 639 } 640 else if (op == MPI_MAX) 641 { 642 if(ep_rank_loc == 0) 643 { 644 copy(sum_buf, sum_buf+count, recv_buf); 645 } 646 else 647 { 648 for(int i=0; i<count; i++) 649 { 650 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 651 } 652 } 653 } 654 else if(op == MPI_MIN) 655 { 656 if(ep_rank_loc == 0) 657 { 658 copy(sum_buf, sum_buf+count, recv_buf); 659 } 660 else 661 { 662 for(int i=0; i<count; i++) 663 { 664 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 665 } 666 } 667 } 668 else 669 { 670 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 671 exit(1); 672 } 673 } 674 675 delete[] static_cast<int*>(mpi_scan_recvbuf); 676 if(ep_rank_loc == 0) 677 { 678 delete[] static_cast<int*>(local_sum); 679 } 680 } 681 682 else if(datatype == MPI_CHAR) 683 { 684 char* sum_buf = static_cast<char*>(mpi_scan_recvbuf); 685 char* recv_buf = static_cast<char*>(recvbuf); 686 687 if(mpi_rank != 0) 688 { 689 if(op == MPI_SUM) 690 { 691 if(ep_rank_loc == 0) 692 { 693 copy(sum_buf, sum_buf+count, recv_buf); 694 } 695 else 696 { 697 for(int i=0; i<count; i++) 698 { 699 recv_buf[i] += sum_buf[i]; 700 } 701 } 702 } 703 else if (op == MPI_MAX) 704 { 705 if(ep_rank_loc == 0) 706 { 707 copy(sum_buf, sum_buf+count, recv_buf); 708 } 709 else 710 { 711 for(int i=0; i<count; i++) 712 { 713 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 714 } 715 } 716 } 717 else if(op == MPI_MIN) 718 { 719 if(ep_rank_loc == 0) 720 { 721 copy(sum_buf, sum_buf+count, recv_buf); 722 } 723 else 724 { 725 for(int i=0; i<count; i++) 726 { 727 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 728 } 729 } 730 } 731 else 732 { 733 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 734 exit(1); 735 } 736 } 737 738 delete[] static_cast<char*>(mpi_scan_recvbuf); 739 if(ep_rank_loc == 0) 740 { 741 delete[] static_cast<char*>(local_sum); 742 } 743 } 744 745 else if(datatype == MPI_LONG) 746 { 747 long* sum_buf = static_cast<long*>(mpi_scan_recvbuf); 748 long* recv_buf = static_cast<long*>(recvbuf); 749 750 if(mpi_rank != 0) 751 { 752 if(op == MPI_SUM) 753 { 754 if(ep_rank_loc == 0) 755 { 756 copy(sum_buf, sum_buf+count, recv_buf); 757 } 758 else 759 { 760 for(int i=0; i<count; i++) 761 { 762 recv_buf[i] += sum_buf[i]; 763 } 764 } 765 } 766 else if (op == MPI_MAX) 767 { 768 if(ep_rank_loc == 0) 769 { 770 copy(sum_buf, sum_buf+count, recv_buf); 771 } 772 else 773 { 774 for(int i=0; i<count; i++) 775 { 776 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 777 } 778 } 779 } 780 else if(op == MPI_MIN) 781 { 782 if(ep_rank_loc == 0) 783 { 784 copy(sum_buf, sum_buf+count, recv_buf); 785 } 786 else 787 { 788 for(int i=0; i<count; i++) 789 { 790 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 791 } 792 } 793 } 794 else 795 { 796 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 797 exit(1); 798 } 799 } 800 801 delete[] static_cast<long*>(mpi_scan_recvbuf); 802 if(ep_rank_loc == 0) 803 { 804 delete[] static_cast<long*>(local_sum); 805 } 806 } 807 808 else if(datatype == MPI_UNSIGNED_LONG) 809 { 810 unsigned long* sum_buf = static_cast<unsigned long*>(mpi_scan_recvbuf); 811 unsigned long* recv_buf = static_cast<unsigned long*>(recvbuf); 812 813 if(mpi_rank != 0) 814 { 815 if(op == MPI_SUM) 816 { 817 if(ep_rank_loc == 0) 818 { 819 copy(sum_buf, sum_buf+count, recv_buf); 820 } 821 else 822 { 823 for(int i=0; i<count; i++) 824 { 825 recv_buf[i] += sum_buf[i]; 826 } 827 } 828 } 829 else if (op == MPI_MAX) 830 { 831 if(ep_rank_loc == 0) 832 { 833 copy(sum_buf, sum_buf+count, recv_buf); 834 } 835 else 836 { 837 for(int i=0; i<count; i++) 838 { 839 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 840 } 841 } 842 } 843 else if(op == MPI_MIN) 844 { 845 if(ep_rank_loc == 0) 846 { 847 copy(sum_buf, sum_buf+count, recv_buf); 848 } 849 else 850 { 851 for(int i=0; i<count; i++) 852 { 853 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 854 } 855 } 856 } 857 else 858 { 859 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 860 exit(1); 861 } 862 } 863 864 delete[] static_cast<unsigned long*>(mpi_scan_recvbuf); 865 if(ep_rank_loc == 0) 866 { 867 delete[] static_cast<unsigned long*>(local_sum); 868 } 869 } 870 871 872 } 873 874 251 ::MPI_Exscan(MPI_IN_PLACE, tmp_recvbuf, count, to_mpi_type(datatype), to_mpi_op(op), to_mpi_comm(comm.mpi_comm)); 252 253 // printf(" ID=%d : %d %d \n", ep_rank, static_cast<int*>(tmp_recvbuf)[0], static_cast<int*>(tmp_recvbuf)[1]); 254 255 MPI_Exscan_local(tmp_sendbuf, tmp_recvbuf, count, datatype, op, comm); 256 257 // printf(" ID=%d : after local tmp_sendbuf = %d %d ; tmp_recvbuf = %d %d \n", ep_rank, static_cast<int*>(tmp_sendbuf)[0], static_cast<int*>(tmp_sendbuf)[1], static_cast<int*>(tmp_recvbuf)[0], static_cast<int*>(tmp_recvbuf)[1]); 258 259 260 261 if(ep_rank != my_src) 262 { 263 MPI_Request request[2]; 264 MPI_Status status[2]; 265 266 MPI_Isend(tmp_recvbuf, count, datatype, my_src, my_src, comm, &request[0]); 267 268 MPI_Irecv(recvbuf, count, datatype, my_dst, ep_rank, comm, &request[1]); 269 270 MPI_Waitall(2, request, status); 271 } 272 273 else memcpy(recvbuf, tmp_recvbuf, datasize*count); 274 275 276 277 278 delete[] tmp_sendbuf; 279 delete[] tmp_recvbuf; 280 281 } 875 282 876 283 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_fortran.cpp
r1196 r1287 65 65 #endif 66 66 67 if(base_comm != MPI_COMM_NULL_STD)67 if(base_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 68 68 { 69 69 if(omp_get_thread_num() == 0) -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_free.cpp
r1134 r1287 6 6 { 7 7 8 9 10 8 int MPI_Comm_free(MPI_Comm *comm) 11 9 { … … 13 11 if(! comm->is_ep) 14 12 { 15 if(comm->mpi_comm != MPI_COMM_NULL_STD)13 if(comm->mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 16 14 { 17 15 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm>(comm->mpi_comm); … … 41 39 Debug("comm is EP, mpi_comm_ptr != NULL\n"); 42 40 43 if(comm->my_buffer != NULL)44 {45 if(comm->my_buffer->buf_int != NULL) delete[] comm->my_buffer->buf_int; Debug("buf_int freed\n");46 if(comm->my_buffer->buf_float != NULL) delete[] comm->my_buffer->buf_float; Debug("buf_float freed\n");47 if(comm->my_buffer->buf_double != NULL) delete[] comm->my_buffer->buf_double; Debug("buf_double freed\n");48 if(comm->my_buffer->buf_long != NULL) delete[] comm->my_buffer->buf_long; Debug("buf_long freed\n");49 if(comm->my_buffer->buf_ulong != NULL) delete[] comm->my_buffer->buf_ulong; Debug("buf_ulong freed\n");50 if(comm->my_buffer->buf_char != NULL) delete[] comm->my_buffer->buf_char; Debug("buf_char freed\n");51 }52 41 53 42 if(comm->ep_barrier != NULL) … … 77 66 } 78 67 79 if(comm->mpi_comm != MPI_COMM_NULL_STD) 68 if( comm->mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm) 69 && comm->mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_WORLD.mpi_comm)) 80 70 { 81 71 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm>(comm->mpi_comm); … … 107 97 Debug("comm is EP, mpi_comm_ptr != NULL\n"); 108 98 109 if(comm->my_buffer != NULL)110 {111 if(comm->my_buffer->buf_int != NULL) delete[] comm->my_buffer->buf_int; Debug("buf_int freed\n");112 if(comm->my_buffer->buf_float != NULL) delete[] comm->my_buffer->buf_float; Debug("buf_float freed\n");113 if(comm->my_buffer->buf_double != NULL) delete[] comm->my_buffer->buf_double; Debug("buf_double freed\n");114 if(comm->my_buffer->buf_long != NULL) delete[] comm->my_buffer->buf_long; Debug("buf_long freed\n");115 if(comm->my_buffer->buf_ulong != NULL) delete[] comm->my_buffer->buf_ulong; Debug("buf_ulong freed\n");116 if(comm->my_buffer->buf_char != NULL) delete[] comm->my_buffer->buf_char; Debug("buf_char freed\n");117 }118 99 119 100 if(comm->ep_barrier != NULL) … … 151 132 } 152 133 153 if(comm->mpi_comm != MPI_COMM_NULL_STD)134 if(comm->mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 154 135 { 155 136 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm>(comm->mpi_comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_gather.cpp
r1164 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 11 #include "ep_mpi.hpp" 12 12 13 13 using namespace std; … … 16 16 { 17 17 18 int MPI_Gather_local(const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, MPI_Comm comm)18 int MPI_Gather_local(const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, int local_root, MPI_Comm comm) 19 19 { 20 if(datatype == MPI_INT) 20 assert(valid_type(datatype)); 21 22 ::MPI_Aint datasize, lb; 23 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 24 25 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 26 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 27 28 #pragma omp critical (_gather) 29 comm.my_buffer->void_buffer[ep_rank_loc] = const_cast< void* >(sendbuf); 30 31 MPI_Barrier_local(comm); 32 33 if(ep_rank_loc == local_root) 21 34 { 22 Debug("datatype is INT\n"); 23 return MPI_Gather_local_int(sendbuf, count, recvbuf, comm); 24 } 25 else if(datatype == MPI_FLOAT) 26 { 27 Debug("datatype is FLOAT\n"); 28 return MPI_Gather_local_float(sendbuf, count, recvbuf, comm); 29 } 30 else if(datatype == MPI_DOUBLE) 31 { 32 Debug("datatype is DOUBLE\n"); 33 return MPI_Gather_local_double(sendbuf, count, recvbuf, comm); 34 } 35 else if(datatype == MPI_LONG) 36 { 37 Debug("datatype is LONG\n"); 38 return MPI_Gather_local_long(sendbuf, count, recvbuf, comm); 39 } 40 else if(datatype == MPI_UNSIGNED_LONG) 41 { 42 Debug("datatype is uLONG\n"); 43 return MPI_Gather_local_ulong(sendbuf, count, recvbuf, comm); 44 } 45 else if(datatype == MPI_CHAR) 46 { 47 Debug("datatype is CHAR\n"); 48 return MPI_Gather_local_char(sendbuf, count, recvbuf, comm); 49 } 50 else 51 { 52 printf("MPI_Gather Datatype not supported!\n"); 53 exit(0); 54 } 55 } 35 for(int i=0; i<num_ep; i++) 36 memcpy(recvbuf + datasize * i * count, comm.my_buffer->void_buffer[i], datasize * count); 56 37 57 int MPI_Gather_local_int(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm) 58 { 59 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 60 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 61 62 int *buffer = comm.my_buffer->buf_int; 63 int *send_buf = static_cast<int*>(const_cast<void*>(sendbuf)); 64 int *recv_buf = static_cast<int*>(recvbuf); 65 66 if(my_rank == 0) 67 { 68 copy(send_buf, send_buf+count, recv_buf); 38 //printf("local_recvbuf = %d %d \n", static_cast<int*>(recvbuf)[0], static_cast<int*>(recvbuf)[1] ); 69 39 } 70 40 71 for(int j=0; j<count; j+=BUFFER_SIZE) 72 { 73 for(int k=1; k<num_ep; k++) 74 { 75 if(my_rank == k) 76 { 77 #pragma omp critical (write_to_buffer) 78 { 79 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 80 #pragma omp flush 81 } 82 } 83 84 MPI_Barrier_local(comm); 85 86 if(my_rank == 0) 87 { 88 #pragma omp flush 89 #pragma omp critical (read_from_buffer) 90 { 91 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count); 92 } 93 } 94 95 MPI_Barrier_local(comm); 96 } 97 } 41 MPI_Barrier_local(comm); 98 42 } 99 100 int MPI_Gather_local_float(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)101 {102 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;103 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;104 105 float *buffer = comm.my_buffer->buf_float;106 float *send_buf = static_cast<float*>(const_cast<void*>(sendbuf));107 float *recv_buf = static_cast<float*>(recvbuf);108 109 if(my_rank == 0)110 {111 copy(send_buf, send_buf+count, recv_buf);112 }113 114 for(int j=0; j<count; j+=BUFFER_SIZE)115 {116 for(int k=1; k<num_ep; k++)117 {118 if(my_rank == k)119 {120 #pragma omp critical (write_to_buffer)121 {122 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);123 #pragma omp flush124 }125 }126 127 MPI_Barrier_local(comm);128 129 if(my_rank == 0)130 {131 #pragma omp flush132 #pragma omp critical (read_from_buffer)133 {134 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count);135 }136 }137 138 MPI_Barrier_local(comm);139 }140 }141 }142 143 int MPI_Gather_local_double(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)144 {145 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;146 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;147 148 double *buffer = comm.my_buffer->buf_double;149 double *send_buf = static_cast<double*>(const_cast<void*>(sendbuf));150 double *recv_buf = static_cast<double*>(recvbuf);151 152 if(my_rank == 0)153 {154 copy(send_buf, send_buf+count, recv_buf);155 }156 157 for(int j=0; j<count; j+=BUFFER_SIZE)158 {159 for(int k=1; k<num_ep; k++)160 {161 if(my_rank == k)162 {163 #pragma omp critical (write_to_buffer)164 {165 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);166 #pragma omp flush167 }168 }169 170 MPI_Barrier_local(comm);171 172 if(my_rank == 0)173 {174 #pragma omp flush175 #pragma omp critical (read_from_buffer)176 {177 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count);178 }179 }180 181 MPI_Barrier_local(comm);182 }183 }184 }185 186 int MPI_Gather_local_long(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)187 {188 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;189 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;190 191 long *buffer = comm.my_buffer->buf_long;192 long *send_buf = static_cast<long*>(const_cast<void*>(sendbuf));193 long *recv_buf = static_cast<long*>(recvbuf);194 195 if(my_rank == 0)196 {197 copy(send_buf, send_buf+count, recv_buf);198 }199 200 for(int j=0; j<count; j+=BUFFER_SIZE)201 {202 for(int k=1; k<num_ep; k++)203 {204 if(my_rank == k)205 {206 #pragma omp critical (write_to_buffer)207 {208 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);209 #pragma omp flush210 }211 }212 213 MPI_Barrier_local(comm);214 215 if(my_rank == 0)216 {217 #pragma omp flush218 #pragma omp critical (read_from_buffer)219 {220 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count);221 }222 }223 224 MPI_Barrier_local(comm);225 }226 }227 }228 229 int MPI_Gather_local_ulong(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)230 {231 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;232 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;233 234 unsigned long *buffer = comm.my_buffer->buf_ulong;235 unsigned long *send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf));236 unsigned long *recv_buf = static_cast<unsigned long*>(recvbuf);237 238 if(my_rank == 0)239 {240 copy(send_buf, send_buf+count, recv_buf);241 }242 243 for(int j=0; j<count; j+=BUFFER_SIZE)244 {245 for(int k=1; k<num_ep; k++)246 {247 if(my_rank == k)248 {249 #pragma omp critical (write_to_buffer)250 {251 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);252 #pragma omp flush253 }254 }255 256 MPI_Barrier_local(comm);257 258 if(my_rank == 0)259 {260 #pragma omp flush261 #pragma omp critical (read_from_buffer)262 {263 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count);264 }265 }266 267 MPI_Barrier_local(comm);268 }269 }270 }271 272 273 int MPI_Gather_local_char(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)274 {275 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;276 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;277 278 char *buffer = comm.my_buffer->buf_char;279 char *send_buf = static_cast<char*>(const_cast<void*>(sendbuf));280 char *recv_buf = static_cast<char*>(recvbuf);281 282 if(my_rank == 0)283 {284 copy(send_buf, send_buf+count, recv_buf);285 }286 287 for(int j=0; j<count; j+=BUFFER_SIZE)288 {289 for(int k=1; k<num_ep; k++)290 {291 if(my_rank == k)292 {293 #pragma omp critical (write_to_buffer)294 {295 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);296 #pragma omp flush297 }298 }299 300 MPI_Barrier_local(comm);301 302 if(my_rank == 0)303 {304 #pragma omp flush305 #pragma omp critical (read_from_buffer)306 {307 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count);308 }309 }310 311 MPI_Barrier_local(comm);312 }313 }314 }315 316 317 43 318 44 int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) 319 45 { 320 if(!comm.is_ep && comm.mpi_comm)46 if(!comm.is_ep) 321 47 { 322 ::MPI_Gather(const_cast<void*>(sendbuf), sendcount, static_cast< ::MPI_Datatype>(sendtype), recvbuf, recvcount, static_cast< ::MPI_Datatype>(recvtype), 323 root, static_cast< ::MPI_Comm>(comm.mpi_comm)); 324 return 0; 48 return ::MPI_Gather(const_cast<void*>(sendbuf), sendcount, to_mpi_type(sendtype), recvbuf, recvcount, to_mpi_type(recvtype), 49 root, to_mpi_comm(comm.mpi_comm)); 325 50 } 326 51 327 if(!comm.mpi_comm) return 0; 328 329 MPI_Bcast(&recvcount, 1, MPI_INT, root, comm); 52 assert(sendcount == recvcount && sendtype == recvtype); 330 53 331 assert(static_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype) && sendcount == recvcount); 332 333 MPI_Datatype datatype = sendtype; 334 int count = sendcount; 335 336 int ep_rank, ep_rank_loc, mpi_rank; 337 int ep_size, num_ep, mpi_size; 338 339 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 340 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 341 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 342 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 343 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 344 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 345 54 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 55 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 56 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 57 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 58 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 59 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 346 60 347 61 int root_mpi_rank = comm.rank_map->at(root).second; 348 62 int root_ep_loc = comm.rank_map->at(root).first; 349 63 64 ::MPI_Aint datasize, lb; 65 ::MPI_Type_get_extent(to_mpi_type(sendtype), &lb, &datasize); 350 66 351 ::MPI_Aint datasize, lb; 67 bool is_master = (ep_rank_loc==0 && mpi_rank != root_mpi_rank ) || ep_rank == root; 68 bool is_root = ep_rank == root; 352 69 353 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize);70 void* local_recvbuf; 354 71 355 void *local_gather_recvbuf; 356 void *master_recvbuf; 357 if(ep_rank_loc == 0 && mpi_rank == root_mpi_rank && root_ep_loc != 0) 72 if(is_master) 358 73 { 359 master_recvbuf = new void*[datasize*ep_size*count];74 local_recvbuf = new void*[datasize * num_ep * sendcount]; 360 75 } 361 76 362 if(ep_rank_loc==0) 363 { 364 local_gather_recvbuf = new void*[datasize*num_ep*count]; 365 } 366 367 // local gather to master 368 MPI_Gather_local(sendbuf, count, datatype, local_gather_recvbuf, comm); 369 370 //MPI_Gather 371 372 if(ep_rank_loc == 0) 373 { 374 int *gatherv_recvcnt; 375 int *gatherv_displs; 376 int gatherv_cnt = count*num_ep; 377 378 gatherv_recvcnt = new int[mpi_size]; 379 gatherv_displs = new int[mpi_size]; 77 void* tmp_recvbuf; 78 if(is_root) tmp_recvbuf = new void*[datasize * recvcount * ep_size]; 380 79 381 80 382 ::MPI_Allgather(&gatherv_cnt, 1, MPI_INT_STD, gatherv_recvcnt, 1, MPI_INT_STD, static_cast< ::MPI_Comm>(comm.mpi_comm)); 81 if(mpi_rank == root_mpi_rank) MPI_Gather_local(sendbuf, sendcount, sendtype, local_recvbuf, root_ep_loc, comm); 82 else MPI_Gather_local(sendbuf, sendcount, sendtype, local_recvbuf, 0, comm); 383 83 384 gatherv_displs[0] = 0; 385 for(int i=1; i<mpi_size; i++) 84 std::vector<int> recvcounts(mpi_size, 0); 85 std::vector<int> displs(mpi_size, 0); 86 87 88 if(is_master) 89 { 90 for(int i=0; i<ep_size; i++) 386 91 { 387 gatherv_displs[i] = gatherv_recvcnt[i-1] + gatherv_displs[i-1];92 recvcounts[comm.rank_map->at(i).second]+=sendcount; 388 93 } 389 94 390 if(root_ep_loc != 0) // gather to root_master 95 for(int i=1; i<mpi_size; i++) 96 displs[i] = displs[i-1] + recvcounts[i-1]; 97 98 ::MPI_Gatherv(local_recvbuf, sendcount*num_ep, sendtype, tmp_recvbuf, recvcounts.data(), displs.data(), recvtype, root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 99 } 100 101 102 // reorder data 103 if(is_root) 104 { 105 // printf("tmp_recvbuf = %d %d %d %d %d %d %d %d\n", static_cast<int*>(tmp_recvbuf)[0], static_cast<int*>(tmp_recvbuf)[1], 106 // static_cast<int*>(tmp_recvbuf)[2], static_cast<int*>(tmp_recvbuf)[3], 107 // static_cast<int*>(tmp_recvbuf)[4], static_cast<int*>(tmp_recvbuf)[5], 108 // static_cast<int*>(tmp_recvbuf)[6], static_cast<int*>(tmp_recvbuf)[7] ); 109 110 int offset; 111 for(int i=0; i<ep_size; i++) 391 112 { 392 ::MPI_Gatherv(local_gather_recvbuf, count*num_ep, static_cast< ::MPI_Datatype>(datatype), master_recvbuf, gatherv_recvcnt, 393 gatherv_displs, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 394 } 395 else 396 { 397 ::MPI_Gatherv(local_gather_recvbuf, count*num_ep, static_cast< ::MPI_Datatype>(datatype), recvbuf, gatherv_recvcnt, 398 gatherv_displs, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 113 offset = displs[comm.rank_map->at(i).second] + comm.rank_map->at(i).first * sendcount; 114 memcpy(recvbuf + i*sendcount*datasize, tmp_recvbuf+offset*datasize, sendcount*datasize); 115 116 399 117 } 400 118 401 delete[] gatherv_recvcnt;402 delete[] gatherv_displs;403 119 } 404 120 405 121 406 if( root_ep_loc != 0 && mpi_rank == root_mpi_rank) // root is not master, master send to root and root receive from master122 if(is_master) 407 123 { 408 innode_memcpy(0, master_recvbuf, root_ep_loc, recvbuf, count*ep_size, datatype, comm);124 delete[] local_recvbuf; 409 125 } 410 411 412 413 if(ep_rank_loc==0) 414 { 415 if(datatype == MPI_INT) 416 { 417 delete[] static_cast<int*>(local_gather_recvbuf); 418 } 419 else if(datatype == MPI_FLOAT) 420 { 421 delete[] static_cast<float*>(local_gather_recvbuf); 422 } 423 else if(datatype == MPI_DOUBLE) 424 { 425 delete[] static_cast<double*>(local_gather_recvbuf); 426 } 427 else if(datatype == MPI_CHAR) 428 { 429 delete[] static_cast<char*>(local_gather_recvbuf); 430 } 431 else if(datatype == MPI_LONG) 432 { 433 delete[] static_cast<long*>(local_gather_recvbuf); 434 } 435 else// if(datatype == MPI_UNSIGNED_LONG) 436 { 437 delete[] static_cast<unsigned long*>(local_gather_recvbuf); 438 } 439 440 if(root_ep_loc != 0 && mpi_rank == root_mpi_rank) delete[] master_recvbuf; 441 } 126 if(is_root) delete[] tmp_recvbuf; 127 442 128 } 443 129 444 445 int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)446 {447 if(!comm.is_ep && comm.mpi_comm)448 {449 ::MPI_Allgather(const_cast<void*>(sendbuf), sendcount, static_cast< ::MPI_Datatype>(sendtype), recvbuf, recvcount, static_cast< ::MPI_Datatype>(recvtype),450 static_cast< ::MPI_Comm>(comm.mpi_comm));451 return 0;452 }453 454 if(!comm.mpi_comm) return 0;455 456 assert(static_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype) && sendcount == recvcount);457 458 MPI_Datatype datatype = sendtype;459 int count = sendcount;460 461 int ep_rank, ep_rank_loc, mpi_rank;462 int ep_size, num_ep, mpi_size;463 464 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;465 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;466 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;467 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;468 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;469 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;470 471 472 ::MPI_Aint datasize, lb;473 474 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize);475 476 void *local_gather_recvbuf;477 478 if(ep_rank_loc==0)479 {480 local_gather_recvbuf = new void*[datasize*num_ep*count];481 }482 483 // local gather to master484 MPI_Gather_local(sendbuf, count, datatype, local_gather_recvbuf, comm);485 486 //MPI_Gather487 488 if(ep_rank_loc == 0)489 {490 int *gatherv_recvcnt;491 int *gatherv_displs;492 int gatherv_cnt = count*num_ep;493 494 gatherv_recvcnt = new int[mpi_size];495 gatherv_displs = new int[mpi_size];496 497 ::MPI_Allgather(&gatherv_cnt, 1, MPI_INT_STD, gatherv_recvcnt, 1, MPI_INT_STD, static_cast< ::MPI_Comm>(comm.mpi_comm));498 499 gatherv_displs[0] = 0;500 for(int i=1; i<mpi_size; i++)501 {502 gatherv_displs[i] = gatherv_recvcnt[i-1] + gatherv_displs[i-1];503 }504 505 ::MPI_Allgatherv(local_gather_recvbuf, count*num_ep, static_cast< ::MPI_Datatype>(datatype), recvbuf, gatherv_recvcnt,506 gatherv_displs, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Comm>(comm.mpi_comm));507 508 delete[] gatherv_recvcnt;509 delete[] gatherv_displs;510 }511 512 MPI_Bcast_local(recvbuf, count*ep_size, datatype, comm);513 514 515 if(ep_rank_loc==0)516 {517 if(datatype == MPI_INT)518 {519 delete[] static_cast<int*>(local_gather_recvbuf);520 }521 else if(datatype == MPI_FLOAT)522 {523 delete[] static_cast<float*>(local_gather_recvbuf);524 }525 else if(datatype == MPI_DOUBLE)526 {527 delete[] static_cast<double*>(local_gather_recvbuf);528 }529 else if(datatype == MPI_CHAR)530 {531 delete[] static_cast<char*>(local_gather_recvbuf);532 }533 else if(datatype == MPI_LONG)534 {535 delete[] static_cast<long*>(local_gather_recvbuf);536 }537 else// if(datatype == MPI_UNSIGNED_LONG)538 {539 delete[] static_cast<unsigned long*>(local_gather_recvbuf);540 }541 }542 }543 544 545 130 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_gatherv.cpp
r1218 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 11 #include "ep_mpi.hpp" 12 12 13 13 using namespace std; … … 16 16 { 17 17 18 int MPI_Gatherv_local(const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm)18 int MPI_Gatherv_local(const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, const int recvcounts[], const int displs[], int local_root, MPI_Comm comm) 19 19 { 20 if(datatype == MPI_INT) 20 assert(valid_type(datatype)); 21 22 ::MPI_Aint datasize, lb; 23 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 24 25 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 26 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 27 28 //if(ep_rank_loc == local_root) printf("local_gatherv : recvcounts = %d %d\n\n", recvcounts[0], recvcounts[1]); 29 //if(ep_rank_loc == local_root) printf("local_gatherv : displs = %d %d\n\n", displs[0], displs[1]); 30 31 #pragma omp critical (_gatherv) 32 comm.my_buffer->void_buffer[ep_rank_loc] = const_cast< void* >(sendbuf); 33 34 MPI_Barrier_local(comm); 35 36 if(ep_rank_loc == local_root) 21 37 { 22 Debug("datatype is INT\n"); 23 return MPI_Gatherv_local_int(sendbuf, count, recvbuf, recvcounts, displs, comm); 38 for(int i=0; i<num_ep; i++) 39 memcpy(recvbuf + datasize*displs[i], comm.my_buffer->void_buffer[i], datasize*recvcounts[i]); 40 24 41 } 25 else if(datatype == MPI_FLOAT) 26 { 27 Debug("datatype is FLOAT\n"); 28 return MPI_Gatherv_local_float(sendbuf, count, recvbuf, recvcounts, displs, comm); 29 } 30 else if(datatype == MPI_DOUBLE) 31 { 32 Debug("datatype is DOUBLE\n"); 33 return MPI_Gatherv_local_double(sendbuf, count, recvbuf, recvcounts, displs, comm); 34 } 35 else if(datatype == MPI_LONG) 36 { 37 Debug("datatype is LONG\n"); 38 return MPI_Gatherv_local_long(sendbuf, count, recvbuf, recvcounts, displs, comm); 39 } 40 else if(datatype == MPI_UNSIGNED_LONG) 41 { 42 Debug("datatype is uLONG\n"); 43 return MPI_Gatherv_local_ulong(sendbuf, count, recvbuf, recvcounts, displs, comm); 44 } 45 else if(datatype == MPI_CHAR) 46 { 47 Debug("datatype is CHAR\n"); 48 return MPI_Gatherv_local_char(sendbuf, count, recvbuf, recvcounts, displs, comm); 49 } 50 else 51 { 52 printf("MPI_Gatherv Datatype not supported!\n"); 53 exit(0); 54 } 42 43 MPI_Barrier_local(comm); 55 44 } 56 45 57 int MPI_Gatherv_local_int(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 58 { 59 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 60 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 61 62 int *buffer = comm.my_buffer->buf_int; 63 int *send_buf = static_cast<int*>(const_cast<void*>(sendbuf)); 64 int *recv_buf = static_cast<int*>(recvbuf); 65 66 if(my_rank == 0) 67 { 68 assert(count == recvcounts[0]); 69 copy(send_buf, send_buf+count, recv_buf + displs[0]); 70 } 71 72 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 73 { 74 for(int k=1; k<num_ep; k++) 75 { 76 if(my_rank == k) 77 { 78 #pragma omp critical (write_to_buffer) 79 { 80 if(count!=0) copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 81 #pragma omp flush 82 } 83 } 84 85 MPI_Barrier_local(comm); 86 87 if(my_rank == 0) 88 { 89 #pragma omp flush 90 #pragma omp critical (read_from_buffer) 91 { 92 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 93 } 94 } 95 96 MPI_Barrier_local(comm); 97 } 98 } 99 } 100 101 int MPI_Gatherv_local_float(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 102 { 103 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 104 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 105 106 float *buffer = comm.my_buffer->buf_float; 107 float *send_buf = static_cast<float*>(const_cast<void*>(sendbuf)); 108 float *recv_buf = static_cast<float*>(recvbuf); 109 110 if(my_rank == 0) 111 { 112 assert(count == recvcounts[0]); 113 copy(send_buf, send_buf+count, recv_buf + displs[0]); 114 } 115 116 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 117 { 118 for(int k=1; k<num_ep; k++) 119 { 120 if(my_rank == k) 121 { 122 #pragma omp critical (write_to_buffer) 123 { 124 if(count!=0) copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 125 #pragma omp flush 126 } 127 } 128 129 MPI_Barrier_local(comm); 130 131 if(my_rank == 0) 132 { 133 #pragma omp flush 134 #pragma omp critical (read_from_buffer) 135 { 136 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 137 } 138 } 139 140 MPI_Barrier_local(comm); 141 } 142 } 143 } 144 145 int MPI_Gatherv_local_double(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 146 { 147 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 148 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 149 150 double *buffer = comm.my_buffer->buf_double; 151 double *send_buf = static_cast<double*>(const_cast<void*>(sendbuf)); 152 double *recv_buf = static_cast<double*>(recvbuf); 153 154 if(my_rank == 0) 155 { 156 assert(count == recvcounts[0]); 157 copy(send_buf, send_buf+count, recv_buf + displs[0]); 158 } 159 160 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 161 { 162 for(int k=1; k<num_ep; k++) 163 { 164 if(my_rank == k) 165 { 166 #pragma omp critical (write_to_buffer) 167 { 168 if(count!=0) copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 169 #pragma omp flush 170 } 171 } 172 173 MPI_Barrier_local(comm); 174 175 if(my_rank == 0) 176 { 177 #pragma omp flush 178 #pragma omp critical (read_from_buffer) 179 { 180 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 181 } 182 } 183 184 MPI_Barrier_local(comm); 185 } 186 } 187 } 188 189 int MPI_Gatherv_local_long(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 190 { 191 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 192 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 193 194 long *buffer = comm.my_buffer->buf_long; 195 long *send_buf = static_cast<long*>(const_cast<void*>(sendbuf)); 196 long *recv_buf = static_cast<long*>(recvbuf); 197 198 if(my_rank == 0) 199 { 200 assert(count == recvcounts[0]); 201 copy(send_buf, send_buf+count, recv_buf + displs[0]); 202 } 203 204 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 205 { 206 for(int k=1; k<num_ep; k++) 207 { 208 if(my_rank == k) 209 { 210 #pragma omp critical (write_to_buffer) 211 { 212 if(count!=0)copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 213 #pragma omp flush 214 } 215 } 216 217 MPI_Barrier_local(comm); 218 219 if(my_rank == 0) 220 { 221 #pragma omp flush 222 #pragma omp critical (read_from_buffer) 223 { 224 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 225 } 226 } 227 228 MPI_Barrier_local(comm); 229 } 230 } 231 } 232 233 int MPI_Gatherv_local_ulong(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 234 { 235 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 236 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 237 238 unsigned long *buffer = comm.my_buffer->buf_ulong; 239 unsigned long *send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf)); 240 unsigned long *recv_buf = static_cast<unsigned long*>(recvbuf); 241 242 if(my_rank == 0) 243 { 244 assert(count == recvcounts[0]); 245 copy(send_buf, send_buf+count, recv_buf + displs[0]); 246 } 247 248 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 249 { 250 for(int k=1; k<num_ep; k++) 251 { 252 if(my_rank == k) 253 { 254 #pragma omp critical (write_to_buffer) 255 { 256 if(count!=0) copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 257 #pragma omp flush 258 } 259 } 260 261 MPI_Barrier_local(comm); 262 263 if(my_rank == 0) 264 { 265 #pragma omp flush 266 #pragma omp critical (read_from_buffer) 267 { 268 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 269 } 270 } 271 272 MPI_Barrier_local(comm); 273 } 274 } 275 } 276 277 int MPI_Gatherv_local_char(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 278 { 279 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 280 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 281 282 char *buffer = comm.my_buffer->buf_char; 283 char *send_buf = static_cast<char*>(const_cast<void*>(sendbuf)); 284 char *recv_buf = static_cast<char*>(recvbuf); 285 286 if(my_rank == 0) 287 { 288 assert(count == recvcounts[0]); 289 copy(send_buf, send_buf+count, recv_buf + displs[0]); 290 } 291 292 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 293 { 294 for(int k=1; k<num_ep; k++) 295 { 296 if(my_rank == k) 297 { 298 #pragma omp critical (write_to_buffer) 299 { 300 if(count!=0) copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 301 #pragma omp flush 302 } 303 } 304 305 MPI_Barrier_local(comm); 306 307 if(my_rank == 0) 308 { 309 #pragma omp flush 310 #pragma omp critical (read_from_buffer) 311 { 312 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 313 } 314 } 315 316 MPI_Barrier_local(comm); 317 } 318 } 319 } 320 321 322 int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], 46 int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int input_recvcounts[], const int input_displs[], 323 47 MPI_Datatype recvtype, int root, MPI_Comm comm) 324 48 { 325 49 326 if(!comm.is_ep && comm.mpi_comm)50 if(!comm.is_ep) 327 51 { 328 ::MPI_Gatherv(const_cast<void*>(sendbuf), sendcount, static_cast< ::MPI_Datatype>(sendtype), recvbuf, const_cast<int*>( recvcounts), const_cast<int*>(displs),52 ::MPI_Gatherv(const_cast<void*>(sendbuf), sendcount, static_cast< ::MPI_Datatype>(sendtype), recvbuf, const_cast<int*>(input_recvcounts), const_cast<int*>(input_displs), 329 53 static_cast< ::MPI_Datatype>(recvtype), root, static_cast< ::MPI_Comm>(comm.mpi_comm)); 330 54 return 0; 331 55 } 332 56 333 if(!comm.mpi_comm) return 0;334 57 335 assert(s tatic_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype));58 assert(sendtype == recvtype); 336 59 337 MPI_Datatype datatype = sendtype;338 int count = sendcount;339 340 int ep_rank, ep_rank_loc, mpi_rank;341 int ep_size, num_ep, mpi_size;342 343 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;344 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;345 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;346 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;347 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;348 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;349 60 350 351 352 if(ep_size == mpi_size) 353 return ::MPI_Gatherv(sendbuf, sendcount, static_cast< ::MPI_Datatype>(datatype), recvbuf, recvcounts, displs, 354 static_cast< ::MPI_Datatype>(datatype), root, static_cast< ::MPI_Comm>(comm.mpi_comm)); 355 356 if(ep_rank != root) 357 { 358 recvcounts = new int[ep_size]; 359 displs = new int[ep_size]; 360 } 361 362 MPI_Bcast(const_cast< int* >(displs), ep_size, MPI_INT, root, comm); 363 MPI_Bcast(const_cast< int* >(recvcounts), ep_size, MPI_INT, root, comm); 364 365 366 int recv_plus_displs[ep_size]; 367 for(int i=0; i<ep_size; i++) recv_plus_displs[i] = recvcounts[i] + displs[i]; 368 369 for(int j=0; j<mpi_size; j++) 370 { 371 if(recv_plus_displs[j*num_ep] < displs[j*num_ep+1] || 372 recv_plus_displs[j*num_ep + num_ep -1] < displs[j*num_ep + num_ep -2]) 373 { 374 Debug("Call special implementation of mpi_gatherv. 1st condition not OK\n"); 375 return MPI_Allgatherv_special(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); 376 } 377 378 for(int i=1; i<num_ep-1; i++) 379 { 380 if(recv_plus_displs[j*num_ep+i] < displs[j*num_ep+i+1] || 381 recv_plus_displs[j*num_ep+i] < displs[j*num_ep+i-1]) 382 { 383 Debug("Call special implementation of mpi_gatherv. 2nd condition not OK\n"); 384 return MPI_Allgatherv_special(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); 385 } 386 } 387 } 388 389 390 int root_mpi_rank = comm.rank_map->at(root).second; 391 int root_ep_loc = comm.rank_map->at(root).first; 392 393 394 ::MPI_Aint datasize, lb; 395 396 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 397 398 void *local_gather_recvbuf; 399 int buffer_size; 400 void *master_recvbuf; 401 402 if(ep_rank_loc == 0 && mpi_rank == root_mpi_rank && root_ep_loc != 0) 403 { 404 master_recvbuf = new void*[sizeof(recvbuf)]; 405 assert(root_ep_loc == 0); 406 } 407 408 if(ep_rank_loc==0) 409 { 410 buffer_size = *std::max_element(recv_plus_displs+ep_rank, recv_plus_displs+ep_rank+num_ep); 411 412 local_gather_recvbuf = new void*[datasize*buffer_size]; 413 } 414 415 MPI_Gatherv_local(sendbuf, count, datatype, local_gather_recvbuf, recvcounts+ep_rank-ep_rank_loc, displs+ep_rank-ep_rank_loc, comm); 416 417 //MPI_Gather 418 if(ep_rank_loc == 0) 419 { 420 int *mpi_recvcnt= new int[mpi_size]; 421 int *mpi_displs= new int[mpi_size]; 422 423 int buff_start = *std::min_element(displs+ep_rank, displs+ep_rank+num_ep);; 424 int buff_end = buffer_size; 425 426 int mpi_sendcnt = buff_end - buff_start; 427 428 429 ::MPI_Gather(&mpi_sendcnt, 1, MPI_INT_STD, mpi_recvcnt, 1, MPI_INT_STD, root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 430 ::MPI_Gather(&buff_start, 1, MPI_INT_STD, mpi_displs, 1, MPI_INT_STD, root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 431 432 if(root_ep_loc == 0) 433 { ::MPI_Gatherv(local_gather_recvbuf + datasize*buff_start, mpi_sendcnt, static_cast< ::MPI_Datatype>(datatype), recvbuf, mpi_recvcnt, 434 mpi_displs, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 435 } 436 else // gatherv to master_recvbuf 437 { ::MPI_Gatherv(local_gather_recvbuf + datasize*buff_start, mpi_sendcnt, static_cast< ::MPI_Datatype>(datatype), master_recvbuf, mpi_recvcnt, 438 mpi_displs, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 439 } 440 441 delete[] mpi_recvcnt; 442 delete[] mpi_displs; 443 } 444 445 int global_min_displs = *std::min_element(displs, displs+ep_size); 446 int global_recvcnt = *std::max_element(recv_plus_displs, recv_plus_displs+ep_size); 447 448 449 if(root_ep_loc != 0 && mpi_rank == root_mpi_rank) // root is not master, master send to root and root receive from master 450 { 451 innode_memcpy(0, master_recvbuf+datasize*global_min_displs, root_ep_loc, recvbuf+datasize*global_min_displs, global_recvcnt, datatype, comm); 452 if(ep_rank_loc == 0) delete[] master_recvbuf; 453 } 454 455 456 457 if(ep_rank_loc==0) 458 { 459 if(datatype == MPI_INT) 460 { 461 delete[] static_cast<int*>(local_gather_recvbuf); 462 } 463 else if(datatype == MPI_FLOAT) 464 { 465 delete[] static_cast<float*>(local_gather_recvbuf); 466 } 467 else if(datatype == MPI_DOUBLE) 468 { 469 delete[] static_cast<double*>(local_gather_recvbuf); 470 } 471 else if(datatype == MPI_LONG) 472 { 473 delete[] static_cast<long*>(local_gather_recvbuf); 474 } 475 else if(datatype == MPI_UNSIGNED_LONG) 476 { 477 delete[] static_cast<unsigned long*>(local_gather_recvbuf); 478 } 479 else // if(datatype == MPI_CHAR) 480 { 481 delete[] static_cast<char*>(local_gather_recvbuf); 482 } 483 } 484 else 485 { 486 delete[] recvcounts; 487 delete[] displs; 488 } 489 return 0; 490 } 491 492 493 494 int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], 495 MPI_Datatype recvtype, MPI_Comm comm) 496 { 497 498 if(!comm.is_ep && comm.mpi_comm) 499 { 500 ::MPI_Allgatherv(sendbuf, sendcount, static_cast< ::MPI_Datatype>(sendtype), recvbuf, recvcounts, displs, 501 static_cast< ::MPI_Datatype>(recvtype), static_cast< ::MPI_Comm>(comm.mpi_comm)); 502 return 0; 503 } 504 505 if(!comm.mpi_comm) return 0; 506 507 assert(static_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype)); 508 509 510 MPI_Datatype datatype = sendtype; 511 int count = sendcount; 512 513 int ep_rank, ep_rank_loc, mpi_rank; 514 int ep_size, num_ep, mpi_size; 515 516 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 517 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 518 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 519 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 520 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 521 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 522 523 if(ep_size == mpi_size) // needed by servers 524 return ::MPI_Allgatherv(sendbuf, sendcount, static_cast< ::MPI_Datatype>(datatype), recvbuf, recvcounts, displs, 525 static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Comm>(comm.mpi_comm)); 526 527 int recv_plus_displs[ep_size]; 528 for(int i=0; i<ep_size; i++) recv_plus_displs[i] = recvcounts[i] + displs[i]; 529 530 531 for(int j=0; j<mpi_size; j++) 532 { 533 if(recv_plus_displs[j*num_ep] < displs[j*num_ep+1] || 534 recv_plus_displs[j*num_ep + num_ep -1] < displs[j*num_ep + num_ep -2]) 535 { 536 printf("proc %d/%d Call special implementation of mpi_allgatherv.\n", ep_rank, ep_size); 537 for(int k=0; k<ep_size; k++) 538 printf("recv_plus_displs[%d] = %d\t displs[%d] = %d\n", k, recv_plus_displs[k], k, displs[k]); 539 540 return MPI_Allgatherv_special(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); 541 } 542 543 for(int i=1; i<num_ep-1; i++) 544 { 545 if(recv_plus_displs[j*num_ep+i] < displs[j*num_ep+i+1] || 546 recv_plus_displs[j*num_ep+i] < displs[j*num_ep+i-1]) 547 { 548 printf("proc %d/%d Call special implementation of mpi_allgatherv.\n", ep_rank, ep_size); 549 return MPI_Allgatherv_special(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); 550 } 551 } 552 } 553 554 ::MPI_Aint datasize, lb; 555 556 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 557 558 void *local_gather_recvbuf; 559 int buffer_size; 560 561 if(ep_rank_loc==0) 562 { 563 buffer_size = *std::max_element(recv_plus_displs+ep_rank, recv_plus_displs+ep_rank+num_ep); 564 565 local_gather_recvbuf = new void*[datasize*buffer_size]; 566 } 567 568 // local gather to master 569 MPI_Gatherv_local(sendbuf, count, datatype, local_gather_recvbuf, recvcounts+ep_rank-ep_rank_loc, displs+ep_rank-ep_rank_loc, comm); 570 571 //MPI_Gather 572 if(ep_rank_loc == 0) 573 { 574 int *mpi_recvcnt= new int[mpi_size]; 575 int *mpi_displs= new int[mpi_size]; 576 577 int buff_start = *std::min_element(displs+ep_rank, displs+ep_rank+num_ep);; 578 int buff_end = buffer_size; 579 580 int mpi_sendcnt = buff_end - buff_start; 581 582 583 ::MPI_Allgather(&mpi_sendcnt, 1, MPI_INT_STD, mpi_recvcnt, 1, MPI_INT_STD, static_cast< ::MPI_Comm>(comm.mpi_comm)); 584 ::MPI_Allgather(&buff_start, 1, MPI_INT_STD, mpi_displs, 1, MPI_INT_STD, static_cast< ::MPI_Comm>(comm.mpi_comm)); 585 586 587 ::MPI_Allgatherv((char*)local_gather_recvbuf + datasize*buff_start, mpi_sendcnt, static_cast< ::MPI_Datatype>(datatype), recvbuf, mpi_recvcnt, 588 mpi_displs, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Comm>(comm.mpi_comm)); 589 590 delete[] mpi_recvcnt; 591 delete[] mpi_displs; 592 } 593 594 int global_min_displs = *std::min_element(displs, displs+ep_size); 595 int global_recvcnt = *std::max_element(recv_plus_displs, recv_plus_displs+ep_size); 596 597 MPI_Bcast_local(recvbuf+datasize*global_min_displs, global_recvcnt, datatype, comm); 598 599 if(ep_rank_loc==0) 600 { 601 if(datatype == MPI_INT) 602 { 603 delete[] static_cast<int*>(local_gather_recvbuf); 604 } 605 else if(datatype == MPI_FLOAT) 606 { 607 delete[] static_cast<float*>(local_gather_recvbuf); 608 } 609 else if(datatype == MPI_DOUBLE) 610 { 611 delete[] static_cast<double*>(local_gather_recvbuf); 612 } 613 else if(datatype == MPI_LONG) 614 { 615 delete[] static_cast<long*>(local_gather_recvbuf); 616 } 617 else if(datatype == MPI_UNSIGNED_LONG) 618 { 619 delete[] static_cast<unsigned long*>(local_gather_recvbuf); 620 } 621 else // if(datatype == MPI_CHAR) 622 { 623 delete[] static_cast<char*>(local_gather_recvbuf); 624 } 625 } 626 } 627 628 int MPI_Gatherv_special(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], 629 MPI_Datatype recvtype, int root, MPI_Comm comm) 630 { 631 int ep_rank, ep_rank_loc, mpi_rank; 632 int ep_size, num_ep, mpi_size; 633 634 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 635 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 636 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 637 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 638 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 639 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 61 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 62 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 63 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 64 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 65 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 66 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 640 67 641 68 int root_mpi_rank = comm.rank_map->at(root).second; … … 643 70 644 71 ::MPI_Aint datasize, lb; 645 ::MPI_Type_get_extent( static_cast< ::MPI_Datatype>(sendtype), &lb, &datasize);72 ::MPI_Type_get_extent(to_mpi_type(sendtype), &lb, &datasize); 646 73 647 void *local_gather_recvbuf;648 int buffer_size;74 int *recvcounts; 75 int* displs; 649 76 650 int *local_displs = new int[num_ep]; 651 int *local_rvcnts = new int[num_ep]; 652 for(int i=0; i<num_ep; i++) local_rvcnts[i] = recvcounts[ep_rank-ep_rank_loc + i]; 653 local_displs[0] = 0; 654 for(int i=1; i<num_ep; i++) local_displs[i] = local_displs[i-1] + local_rvcnts[i-1]; 77 recvcounts = new int[ep_size]; 78 displs = new int[ep_size]; 655 79 656 if(ep_rank_loc==0) 657 { 658 buffer_size = local_displs[num_ep-1] + recvcounts[ep_rank+num_ep-1]; 659 local_gather_recvbuf = new void*[datasize*buffer_size]; 80 81 bool is_master = (ep_rank_loc==0 && mpi_rank != root_mpi_rank ) || ep_rank == root; 82 bool is_root = ep_rank == root; 83 84 void* local_recvbuf; 85 std::vector<int>local_recvcounts(num_ep, 0); 86 std::vector<int>local_displs(num_ep, 0); 87 88 89 if(is_root) 90 { 91 copy(input_recvcounts, input_recvcounts+ep_size, recvcounts); 92 copy(input_displs, input_displs+ep_size, displs); 660 93 } 661 94 662 // local gather to master663 MPI_ Gatherv_local(sendbuf, sendcount, sendtype, local_gather_recvbuf, local_rvcnts, local_displs, comm); // all sendbuf gathered to master95 MPI_Bcast(recvcounts, ep_size, MPI_INT, root, comm); 96 MPI_Bcast(displs, ep_size, MPI_INT, root, comm); 664 97 665 int **mpi_recvcnts = new int*[num_ep]; 666 int **mpi_displs = new int*[num_ep]; 667 for(int i=0; i<num_ep; i++) 98 if(mpi_rank == root_mpi_rank) MPI_Gather_local(&sendcount, 1, MPI_INT, local_recvcounts.data(), root_ep_loc, comm); 99 else MPI_Gather_local(&sendcount, 1, MPI_INT, local_recvcounts.data(), 0, comm); 100 101 102 103 if(is_master) 668 104 { 669 mpi_recvcnts[i] = new int[mpi_size]; 670 mpi_displs[i] = new int[mpi_size]; 671 for(int j=0; j<mpi_size; j++) 105 int local_recvbuf_size = std::accumulate(local_recvcounts.begin(), local_recvcounts.end(), 0); 106 107 for(int i=1; i<num_ep; i++) 108 local_displs[i] = local_displs[i-1] + local_recvcounts[i-1]; 109 110 local_recvbuf = new void*[datasize * local_recvbuf_size]; 111 } 112 113 if(mpi_rank == root_mpi_rank) MPI_Gatherv_local(sendbuf, sendcount, sendtype, local_recvbuf, local_recvcounts.data(), local_displs.data(), root_ep_loc, comm); 114 else MPI_Gatherv_local(sendbuf, sendcount, sendtype, local_recvbuf, local_recvcounts.data(), local_displs.data(), 0, comm); 115 116 //if(is_master) printf("local_recvbuf = %d %d %d %d\n", static_cast<int*>(local_recvbuf)[0], static_cast<int*>(local_recvbuf)[1], static_cast<int*>(local_recvbuf)[2], static_cast<int*>(local_recvbuf)[3]); 117 118 void* tmp_recvbuf; 119 int tmp_recvbuf_size = std::accumulate(recvcounts, recvcounts+ep_size, 0); 120 121 if(is_root) tmp_recvbuf = new void*[datasize * tmp_recvbuf_size]; 122 123 124 std::vector<int> mpi_recvcounts(mpi_size, 0); 125 std::vector<int> mpi_displs(mpi_size, 0); 126 127 128 if(is_master) 129 { 130 for(int i=0; i<ep_size; i++) 672 131 { 673 mpi_recvcnts[i][j] = recvcounts[j*num_ep + i]; 674 mpi_displs[i][j] = displs[j*num_ep + i]; 675 } 676 } 677 678 void *master_recvbuf; 679 if(ep_rank_loc == 0 && mpi_rank == root_mpi_rank && root_ep_loc != 0) master_recvbuf = new void*[sizeof(recvbuf)]; 680 681 if(ep_rank_loc == 0 && root_ep_loc == 0) // master in MPI_Allgatherv loop 682 for(int i=0; i<num_ep; i++) 683 { 684 ::MPI_Gatherv(local_gather_recvbuf + datasize*local_displs[i], recvcounts[ep_rank+i], static_cast< ::MPI_Datatype>(sendtype), recvbuf, mpi_recvcnts[i], mpi_displs[i], 685 static_cast< ::MPI_Datatype>(recvtype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 686 } 687 if(ep_rank_loc == 0 && root_ep_loc != 0) 688 for(int i=0; i<num_ep; i++) 689 { 690 ::MPI_Gatherv(local_gather_recvbuf + datasize*local_displs[i], recvcounts[ep_rank+i], static_cast< ::MPI_Datatype>(sendtype), master_recvbuf, mpi_recvcnts[i], mpi_displs[i], 691 static_cast< ::MPI_Datatype>(recvtype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 132 mpi_recvcounts[comm.rank_map->at(i).second]+=recvcounts[i]; 692 133 } 693 134 694 135 695 if(root_ep_loc != 0 && mpi_rank == root_mpi_rank) // root is not master, master send to root and root receive from master 136 137 for(int i=1; i<mpi_size; i++) 138 mpi_displs[i] = mpi_displs[i-1] + mpi_recvcounts[i-1]; 139 140 141 142 ::MPI_Gatherv(local_recvbuf, sendcount*num_ep, sendtype, tmp_recvbuf, mpi_recvcounts.data(), mpi_displs.data(), recvtype, root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 143 } 144 145 146 // reorder data 147 if(is_root) 696 148 { 149 // printf("tmp_recvbuf =\n"); 150 // for(int i=0; i<ep_size*sendcount; i++) printf("%d\t", static_cast<int*>(tmp_recvbuf)[i]); 151 // printf("\n"); 152 153 int offset; 697 154 for(int i=0; i<ep_size; i++) 698 innode_memcpy(0, master_recvbuf + datasize*displs[i], root_ep_loc, recvbuf + datasize*displs[i], recvcounts[i], sendtype, comm); 155 { 156 int extra = 0; 157 for(int j=0, k=0; j<ep_size, k<comm.rank_map->at(i).first; j++) 158 if(comm.rank_map->at(i).second == comm.rank_map->at(j).second) 159 { 160 extra += recvcounts[j]; 161 k++; 162 } 699 163 700 if(ep_rank_loc == 0) delete[] master_recvbuf; 164 offset = mpi_displs[comm.rank_map->at(i).second] + extra; 165 166 memcpy(recvbuf+displs[i]*datasize, tmp_recvbuf+offset*datasize, recvcounts[i]*datasize); 167 168 //printf("recvbuf[%d] = tmp_recvbuf[%d] \n", i, offset); 169 170 } 171 172 // printf("recvbuf =\n"); 173 // for(int i=0; i<ep_size*sendcount; i++) printf("%d\t", static_cast<int*>(recvbuf)[i]); 174 // printf("\n"); 175 701 176 } 702 177 703 704 delete[] local_displs; 705 delete[] local_rvcnts; 706 for(int i=0; i<num_ep; i++) { delete[] mpi_recvcnts[i]; 707 delete[] mpi_displs[i]; } 708 delete[] mpi_recvcnts; 709 delete[] mpi_displs; 710 if(ep_rank_loc==0) 178 delete[] recvcounts; 179 delete[] displs; 180 181 if(is_master) 711 182 { 712 if(sendtype == MPI_INT) 713 { 714 delete[] static_cast<int*>(local_gather_recvbuf); 715 } 716 else if(sendtype == MPI_FLOAT) 717 { 718 delete[] static_cast<float*>(local_gather_recvbuf); 719 } 720 else if(sendtype == MPI_DOUBLE) 721 { 722 delete[] static_cast<double*>(local_gather_recvbuf); 723 } 724 else if(sendtype == MPI_LONG) 725 { 726 delete[] static_cast<long*>(local_gather_recvbuf); 727 } 728 else if(sendtype == MPI_UNSIGNED_LONG) 729 { 730 delete[] static_cast<unsigned long*>(local_gather_recvbuf); 731 } 732 else // if(sendtype == MPI_CHAR) 733 { 734 delete[] static_cast<char*>(local_gather_recvbuf); 735 } 183 delete[] local_recvbuf; 736 184 } 185 if(is_root) delete[] tmp_recvbuf; 737 186 } 738 187 739 int MPI_Allgatherv_special(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[],740 MPI_Datatype recvtype, MPI_Comm comm)741 {742 int ep_rank, ep_rank_loc, mpi_rank;743 int ep_size, num_ep, mpi_size;744 745 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;746 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;747 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;748 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;749 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;750 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;751 752 753 ::MPI_Aint datasize, lb;754 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(sendtype), &lb, &datasize);755 756 void *local_gather_recvbuf;757 int buffer_size;758 759 int *local_displs = new int[num_ep];760 int *local_rvcnts = new int[num_ep];761 for(int i=0; i<num_ep; i++) local_rvcnts[i] = recvcounts[ep_rank-ep_rank_loc + i];762 local_displs[0] = 0;763 for(int i=1; i<num_ep; i++) local_displs[i] = local_displs[i-1] + local_rvcnts[i-1];764 765 if(ep_rank_loc==0)766 {767 buffer_size = local_displs[num_ep-1] + recvcounts[ep_rank+num_ep-1];768 local_gather_recvbuf = new void*[datasize*buffer_size];769 }770 771 // local gather to master772 MPI_Gatherv_local(sendbuf, sendcount, sendtype, local_gather_recvbuf, local_rvcnts, local_displs, comm); // all sendbuf gathered to master773 774 int **mpi_recvcnts = new int*[num_ep];775 int **mpi_displs = new int*[num_ep];776 for(int i=0; i<num_ep; i++)777 {778 mpi_recvcnts[i] = new int[mpi_size];779 mpi_displs[i] = new int[mpi_size];780 for(int j=0; j<mpi_size; j++)781 {782 mpi_recvcnts[i][j] = recvcounts[j*num_ep + i];783 mpi_displs[i][j] = displs[j*num_ep + i];784 }785 }786 787 if(ep_rank_loc == 0) // master in MPI_Allgatherv loop788 for(int i=0; i<num_ep; i++)789 {790 ::MPI_Allgatherv(local_gather_recvbuf + datasize*local_displs[i], recvcounts[ep_rank+i], static_cast< ::MPI_Datatype>(sendtype), recvbuf, mpi_recvcnts[i], mpi_displs[i],791 static_cast< ::MPI_Datatype>(recvtype), static_cast< ::MPI_Comm>(comm.mpi_comm));792 }793 794 for(int i=0; i<ep_size; i++)795 MPI_Bcast_local(recvbuf + datasize*displs[i], recvcounts[i], recvtype, comm);796 797 798 delete[] local_displs;799 delete[] local_rvcnts;800 for(int i=0; i<num_ep; i++) { delete[] mpi_recvcnts[i];801 delete[] mpi_displs[i]; }802 delete[] mpi_recvcnts;803 delete[] mpi_displs;804 if(ep_rank_loc==0)805 {806 if(sendtype == MPI_INT)807 {808 delete[] static_cast<int*>(local_gather_recvbuf);809 }810 else if(sendtype == MPI_FLOAT)811 {812 delete[] static_cast<float*>(local_gather_recvbuf);813 }814 else if(sendtype == MPI_DOUBLE)815 {816 delete[] static_cast<double*>(local_gather_recvbuf);817 }818 else if(sendtype == MPI_LONG)819 {820 delete[] static_cast<long*>(local_gather_recvbuf);821 }822 else if(sendtype == MPI_UNSIGNED_LONG)823 {824 delete[] static_cast<unsigned long*>(local_gather_recvbuf);825 }826 else // if(sendtype == MPI_CHAR)827 {828 delete[] static_cast<char*>(local_gather_recvbuf);829 }830 }831 }832 833 834 188 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_intercomm.cpp
r1196 r1287 32 32 33 33 34 35 34 if(ep_rank == local_leader) 36 35 { 37 ::MPI_Comm_rank(MPI_COMM_WORLD_STD, &leader_ranks[0]);36 MPI_Comm_rank(MPI_COMM_WORLD, &leader_ranks[0]); 38 37 39 38 leader_ranks[1] = mpi_size; 40 39 MPI_Comm_rank(peer_comm, &leader_ranks[2]); 41 40 42 MPI_Request req_s, req_r; 43 MPI_Isend(&leader_ranks[0], 3, MPI_INT_STD, remote_leader, tag, peer_comm, &req_s); 44 MPI_Status status; 45 MPI_Wait(&req_s, &status); 46 47 MPI_Irecv(&leader_ranks[3], 3, MPI_INT_STD, remote_leader, tag, peer_comm, &req_r); 48 MPI_Wait(&req_r, &status); 41 //printf("leader_ranks = %d, %d, %d\n", leader_ranks[0], leader_ranks[1], leader_ranks[2]); 42 MPI_Request request[2]; 43 MPI_Status status[2]; 44 45 MPI_Isend(&leader_ranks[0], 3, static_cast< ::MPI_Datatype>(MPI_INT), remote_leader, tag, peer_comm, &request[0]); 46 MPI_Irecv(&leader_ranks[3], 3, static_cast< ::MPI_Datatype>(MPI_INT), remote_leader, tag, peer_comm, &request[1]); 47 48 MPI_Waitall(2, request, status); 49 49 } 50 50 51 MPI_Bcast(leader_ranks, 6, MPI_INT_STD, local_leader, local_comm); 52 51 52 MPI_Bcast(leader_ranks, 6, static_cast< ::MPI_Datatype>(MPI_INT), local_leader, local_comm); 53 54 53 55 MPI_Barrier(local_comm); 56 54 57 55 58 if(leader_ranks[0] == leader_ranks[3]) … … 65 68 { 66 69 // change leader 67 if(ep_rank == local_leader) Debug("calling MPI_Intercomm_create_from_world\n");68 69 70 int new_local_leader; 70 71 … … 128 129 new_tag_in_world = TAG++; 129 130 } 130 MPI_Bcast(&new_tag_in_world, 1, MPI_INT_STD, new_local_leader, local_comm);131 if(ep_rank == local_leader) MPI_Send(&new_tag_in_world, 1, MPI_INT_STD, remote_leader, tag, peer_comm);131 MPI_Bcast(&new_tag_in_world, 1, static_cast< ::MPI_Datatype> (MPI_INT), new_local_leader, local_comm); 132 if(ep_rank == local_leader) MPI_Send(&new_tag_in_world, 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm); 132 133 } 133 134 else … … 136 137 { 137 138 MPI_Status status; 138 MPI_Recv(&new_tag_in_world, 1, MPI_INT_STD, remote_leader, tag, peer_comm, &status);139 } 140 MPI_Bcast(&new_tag_in_world, 1, MPI_INT_STD, new_local_leader, local_comm);139 MPI_Recv(&new_tag_in_world, 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &status); 140 } 141 MPI_Bcast(&new_tag_in_world, 1, static_cast< ::MPI_Datatype> (MPI_INT), new_local_leader, local_comm); 141 142 } 142 143 … … 144 145 if(ep_rank == new_local_leader) 145 146 { 146 ::MPI_Comm_rank( MPI_COMM_WORLD_STD, &leader_in_world[0]);147 } 148 149 MPI_Bcast(&leader_in_world[0], 1, MPI_INT_STD, new_local_leader, local_comm);147 ::MPI_Comm_rank(static_cast< ::MPI_Comm >(MPI_COMM_WORLD.mpi_comm), &leader_in_world[0]); 148 } 149 150 MPI_Bcast(&leader_in_world[0], 1, static_cast< ::MPI_Datatype> (MPI_INT), new_local_leader, local_comm); 150 151 151 152 152 153 if(ep_rank == local_leader) 153 154 { 154 MPI_Request req_s, req_r; 155 156 MPI_Isend(&leader_in_world[0], 1, MPI_INT_STD, remote_leader, tag, peer_comm, &req_s); 157 MPI_Irecv(&leader_in_world[1], 1, MPI_INT_STD, remote_leader, tag, peer_comm, &req_r); 158 159 MPI_Status status; 160 MPI_Wait(&req_s, &status); 161 MPI_Wait(&req_r, &status); 162 163 /* 164 MPI_Send(&leader_in_world[0], 1, MPI_INT_STD, remote_leader, tag, peer_comm); 165 MPI_Status status; 166 MPI_Recv(&leader_in_world[1], 1, MPI_INT_STD, remote_leader, tag, peer_comm, &status); 167 */ 168 } 169 170 155 MPI_Request request[2]; 156 MPI_Status status[2]; 157 158 MPI_Isend(&leader_in_world[0], 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &request[0]); 159 MPI_Irecv(&leader_in_world[1], 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &request[1]); 160 161 MPI_Waitall(2, request, status); 162 } 171 163 172 164 MPI_Bcast(&leader_in_world[1], 1, MPI_INT, local_leader, local_comm); 173 165 174 175 176 166 local_comm.ep_comm_ptr->comm_label = tag; 177 167 178 return MPI_Intercomm_create_from_world(local_comm, new_local_leader, MPI_COMM_WORLD_STD, leader_in_world[1], new_tag_in_world, newintercomm); 168 if(ep_rank == local_leader) Debug("calling MPI_Intercomm_create_from_world\n"); 169 170 return MPI_Intercomm_create_from_world(local_comm, new_local_leader, static_cast< ::MPI_Comm >(MPI_COMM_WORLD.mpi_comm), leader_in_world[1], new_tag_in_world, newintercomm); 171 179 172 } 180 173 } … … 194 187 return 0; 195 188 } 196 else if(comm.mpi_comm != MPI_COMM_NULL_STD)189 else if(comm.mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 197 190 { 198 191 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm> (comm.mpi_comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_intercomm_kernel.cpp
r1185 r1287 10 10 int MPI_Intercomm_create_kernel(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm) 11 11 { 12 13 12 int ep_rank, ep_rank_loc, mpi_rank; 14 13 int ep_size, num_ep, mpi_size; … … 36 35 37 36 38 ::MPI_Comm_rank( MPI_COMM_WORLD_STD, &rank_in_world);37 ::MPI_Comm_rank(static_cast< ::MPI_Comm>(MPI_COMM_WORLD.mpi_comm), &rank_in_world); 39 38 ::MPI_Comm_rank(static_cast< ::MPI_Comm>(local_comm.mpi_comm), &rank_in_local_parent); 40 39 … … 80 79 send_buf[2] = num_ep; 81 80 82 ::MPI_Allgather(send_buf.data(), 3, MPI_INT_STD, recv_buf.data(), 3, MPI_INT_STD, local_mpi_comm);81 ::MPI_Allgather(send_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), recv_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), local_mpi_comm); 83 82 84 83 for(int i=0; i<size_info[0]; i++) … … 105 104 MPI_Status sta_send, sta_recv; 106 105 107 MPI_Isend(send_buf.data(), 3, MPI_INT_STD, remote_leader, tag, peer_comm, &req_send);108 MPI_Irecv(recv_buf.data(), 3, MPI_INT_STD, remote_leader, tag, peer_comm, &req_recv);106 MPI_Isend(send_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_send); 107 MPI_Irecv(recv_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_recv); 109 108 110 109 … … 118 117 } 119 118 119 120 120 121 send_buf[0] = size_info[1]; 121 122 send_buf[1] = leader_info[0]; … … 124 125 send_buf[4] = rank_in_peer_mpi[1]; 125 126 126 ::MPI_Bcast(send_buf.data(), 5, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);127 ::MPI_Bcast(send_buf.data(), 5, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 127 128 128 129 size_info[1] = send_buf[0]; … … 149 150 std::copy ( ep_info[0].data(), ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[0] ); 150 151 151 MPI_Send(send_buf.data(), 3*size_info[0], MPI_INT_STD, remote_leader, tag+1, peer_comm);152 MPI_Recv(recv_buf.data(), 3*size_info[1], MPI_INT_STD, remote_leader, tag+1, peer_comm, &status);153 } 154 155 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);152 MPI_Send(send_buf.data(), 3*size_info[0], static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+1, peer_comm); 153 MPI_Recv(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+1, peer_comm, &status); 154 } 155 156 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 156 157 157 158 std::copy ( recv_buf.data(), recv_buf.data() + size_info[1], rank_info[2].begin() ); … … 270 271 size_info[2] = new_ep_info[0].size(); 271 272 MPI_Status status; 272 MPI_Send(&size_info[2], 1, MPI_INT_STD, remote_leader, tag+2, peer_comm);273 MPI_Recv(&size_info[3], 1, MPI_INT_STD, remote_leader, tag+2, peer_comm, &status);274 } 275 276 ::MPI_Bcast(&size_info[2], 2, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);273 MPI_Send(&size_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+2, peer_comm); 274 MPI_Recv(&size_info[3], 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+2, peer_comm, &status); 275 } 276 277 ::MPI_Bcast(&size_info[2], 2, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 277 278 278 279 new_rank_info[2].resize(size_info[3]); … … 291 292 std::copy ( new_ep_info[0].data(), new_ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[2] ); 292 293 293 MPI_Send(send_buf.data(), 3*size_info[2], MPI_INT_STD, remote_leader, tag+3, peer_comm);294 MPI_Recv(recv_buf.data(), 3*size_info[3], MPI_INT_STD, remote_leader, tag+3, peer_comm, &status);295 } 296 297 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);294 MPI_Send(send_buf.data(), 3*size_info[2], static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+3, peer_comm); 295 MPI_Recv(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+3, peer_comm, &status); 296 } 297 298 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 298 299 299 300 std::copy ( recv_buf.data(), recv_buf.data() + size_info[3], new_rank_info[2].begin() ); … … 303 304 } 304 305 305 306 306 307 307 308 if(is_proc_master) … … 327 328 } 328 329 329 ::MPI_Bcast(&leader_info[2], 1, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);330 331 if(new_comm != MPI_COMM_NULL_STD)330 ::MPI_Bcast(&leader_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 331 332 if(new_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 332 333 { 333 334 … … 360 361 //printf("tag_list size = %lu\n", tag_list.size()); 361 362 } 362 363 364 } 365 366 367 MPI_Barrier_local(local_comm); 363 } 368 364 369 365 vector<int> bcast_buf(8); … … 374 370 } 375 371 376 MPI_Bcast(bcast_buf.data(), 8, MPI_INT_STD, local_leader, local_comm);372 MPI_Bcast(bcast_buf.data(), 8, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 377 373 378 374 if(!is_local_leader) … … 399 395 } 400 396 401 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, MPI_INT_STD, local_leader, local_comm);397 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 402 398 403 399 if(!is_local_leader) … … 410 406 411 407 int my_position = offset[rank_in_local_parent]+ep_rank_loc; 412 413 408 414 409 MPI_Barrier_local(local_comm); 415 410 #pragma omp flush … … 425 420 if((*iter).first == make_pair(tag, min(leader_info[0], leader_info[1]))) 426 421 { 427 *newintercomm = 422 *newintercomm = iter->second[my_position]; 428 423 found = true; 429 424 break; … … 433 428 } 434 429 435 MPI_Barrier_local(local_comm); 430 MPI_Barrier(local_comm); 431 432 if(is_local_leader) 433 { 434 int local_flag = true; 435 int remote_flag = false; 436 MPI_Status mpi_status; 437 438 MPI_Send(&local_flag, 1, MPI_INT, remote_leader, tag, peer_comm); 439 440 MPI_Recv(&remote_flag, 1, MPI_INT, remote_leader, tag, peer_comm, &mpi_status); 441 } 442 443 444 MPI_Barrier(local_comm); 445 436 446 if(is_proc_master) 437 447 { … … 456 466 intercomm_mpi_size = newintercomm->ep_comm_ptr->size_rank_info[2].second; 457 467 458 MPI_Bcast(&remote_ep_size, 1, MPI_INT_STD, local_leader, local_comm);468 MPI_Bcast(&remote_ep_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 459 469 460 470 int my_rank_map_elem[2]; … … 470 480 (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->resize(local_ep_size); 471 481 472 MPI_Allgather(my_rank_map_elem, 2, MPI_INT_STD, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT_STD, local_comm); 482 MPI_Allgather(my_rank_map_elem, 2, static_cast< ::MPI_Datatype> (MPI_INT), 483 (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), local_comm); 473 484 474 485 (*newintercomm).ep_comm_ptr->intercomm->remote_rank_map = new RANK_MAP; … … 489 500 { 490 501 MPI_Status status; 491 MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, MPI_INT_STD, remote_leader, tag+4, peer_comm);492 MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, remote_leader, tag+4, peer_comm, &status);493 494 MPI_Send(&local_intercomm_size, 1, MPI_INT_STD, remote_leader, tag+5, peer_comm);495 MPI_Recv(&remote_intercomm_size, 1, MPI_INT_STD, remote_leader, tag+5, peer_comm, &status);502 MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+4, peer_comm); 503 MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+4, peer_comm, &status); 504 505 MPI_Send(&local_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+5, peer_comm); 506 MPI_Recv(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+5, peer_comm, &status); 496 507 497 508 new_bcast_root_0 = intercomm_ep_rank; 498 509 } 499 510 500 MPI_Allreduce(&new_bcast_root_0, &new_bcast_root, 1, MPI_INT_STD, MPI_SUM_STD, *newintercomm);501 502 503 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, local_leader, local_comm);504 MPI_Bcast(&remote_intercomm_size, 1, MPI_INT_STD, new_bcast_root, *newintercomm);511 MPI_Allreduce(&new_bcast_root_0, &new_bcast_root, 1, static_cast< ::MPI_Datatype> (MPI_INT), static_cast< ::MPI_Op>(MPI_SUM), *newintercomm); 512 513 514 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 515 MPI_Bcast(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), new_bcast_root, *newintercomm); 505 516 506 517 … … 514 525 { 515 526 MPI_Status status; 516 MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, MPI_INT_STD, remote_leader, tag+6, peer_comm);517 MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, remote_leader, tag+6, peer_comm, &status);518 } 519 520 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, new_bcast_root, *newintercomm);527 MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+6, peer_comm); 528 MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+6, peer_comm, &status); 529 } 530 531 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), new_bcast_root, *newintercomm); 521 532 522 533 (*newintercomm).ep_comm_ptr->intercomm->local_comm = &(local_comm.ep_comm_ptr->comm_list[ep_rank_loc]); … … 579 590 int rank_in_peer_mpi[2]; 580 591 581 ::MPI_Comm_rank( MPI_COMM_WORLD_STD, &rank_in_world);592 ::MPI_Comm_rank(static_cast< ::MPI_Comm >(MPI_COMM_WORLD.mpi_comm), &rank_in_world); 582 593 583 594 … … 608 619 MPI_Request req_s, req_r; 609 620 610 MPI_Isend(send_buf.data(), 2, MPI_INT_STD, remote_leader, tag, peer_comm, &req_s);611 MPI_Irecv(recv_buf.data(), 2, MPI_INT_STD, remote_leader, tag, peer_comm, &req_r);621 MPI_Isend(send_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_s); 622 MPI_Irecv(recv_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_r); 612 623 613 624 … … 619 630 } 620 631 621 MPI_Bcast(recv_buf.data(), 3, MPI_INT_STD, local_leader, local_comm);632 MPI_Bcast(recv_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 622 633 623 634 remote_num_ep = recv_buf[0]; … … 660 671 MPI_Request req_s; 661 672 MPI_Status sta_s; 662 MPI_Isend(tag_label, 2, MPI_INT_STD, remote_leader, tag, peer_comm, &req_s);673 MPI_Isend(tag_label, 2, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_s); 663 674 664 675 MPI_Wait(&req_s, &sta_s); … … 674 685 MPI_Status status; 675 686 MPI_Request req_r; 676 MPI_Irecv(tag_label, 2, MPI_INT_STD, remote_leader, tag, peer_comm, &req_r);687 MPI_Irecv(tag_label, 2, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_r); 677 688 MPI_Wait(&req_r, &status); 678 689 } 679 690 } 680 691 681 MPI_Bcast(tag_label, 2, MPI_INT_STD, local_leader, local_comm);692 MPI_Bcast(tag_label, 2, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 682 693 683 694 … … 745 756 local_rank_map_ele[1] = (*newintercomm).ep_comm_ptr->comm_label; 746 757 747 MPI_Allgather(local_rank_map_ele, 2, MPI_INT_STD, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT_STD, local_comm); 758 MPI_Allgather(local_rank_map_ele, 2, static_cast< ::MPI_Datatype> (MPI_INT), 759 (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), local_comm); 748 760 749 761 if(ep_rank == local_leader) … … 752 764 MPI_Request req_s, req_r; 753 765 754 MPI_Isend((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_num_ep, MPI_INT_STD, remote_leader, tag, peer_comm, &req_s);755 MPI_Irecv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_num_ep, MPI_INT_STD, remote_leader, tag, peer_comm, &req_r);766 MPI_Isend((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_num_ep, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_s); 767 MPI_Irecv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_num_ep, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_r); 756 768 757 769 … … 761 773 } 762 774 763 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_num_ep, MPI_INT_STD, local_leader, local_comm);775 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_num_ep, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 764 776 (*newintercomm).ep_comm_ptr->intercomm->local_comm = &(local_comm.ep_comm_ptr->comm_list[ep_rank_loc]); 765 777 (*newintercomm).ep_comm_ptr->intercomm->intercomm_tag = tag; -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_intercomm_world.cpp
r1134 r1287 80 80 send_buf[2] = num_ep; 81 81 82 ::MPI_Allgather(send_buf.data(), 3, MPI_INT_STD, recv_buf.data(), 3, MPI_INT_STD, local_mpi_comm);82 ::MPI_Allgather(send_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), recv_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), local_mpi_comm); 83 83 84 84 for(int i=0; i<size_info[0]; i++) … … 99 99 send_buf[1] = local_ep_size; 100 100 101 ::MPI_Send(send_buf.data(), 2, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);102 103 ::MPI_Recv(recv_buf.data(), 2, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);101 ::MPI_Send(send_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 102 103 ::MPI_Recv(recv_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 104 104 105 105 recv_buf[2] = rank_in_world; … … 108 108 } 109 109 110 ::MPI_Bcast(recv_buf.data(), 4, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);110 ::MPI_Bcast(recv_buf.data(), 4, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 111 111 112 112 size_info[1] = recv_buf[0]; … … 132 132 std::copy ( ep_info[0].data(), ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[0] ); 133 133 134 ::MPI_Send(send_buf.data(), 3*size_info[0], MPI_INT_STD, mpi_remote_leader, tag, peer_comm);135 136 ::MPI_Recv(recv_buf.data(), 3*size_info[1], MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);137 138 } 139 140 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);134 ::MPI_Send(send_buf.data(), 3*size_info[0], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 135 136 ::MPI_Recv(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 137 138 } 139 140 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 141 141 142 142 std::copy ( recv_buf.data(), recv_buf.data() + size_info[1], rank_info[2].begin() ); … … 259 259 ::MPI_Status mpi_status; 260 260 261 ::MPI_Send(&size_info[2], 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);262 263 ::MPI_Recv(&size_info[3], 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);264 } 265 266 ::MPI_Bcast(&size_info[2], 2, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);261 ::MPI_Send(&size_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 262 263 ::MPI_Recv(&size_info[3], 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 264 } 265 266 ::MPI_Bcast(&size_info[2], 2, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 267 267 268 268 new_rank_info[2].resize(size_info[3]); … … 281 281 std::copy ( new_ep_info[0].data(), new_ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[2] ); 282 282 283 ::MPI_Send(send_buf.data(), 3*size_info[2], MPI_INT_STD, mpi_remote_leader, tag, peer_comm);284 ::MPI_Recv(recv_buf.data(), 3*size_info[3], MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);285 286 } 287 288 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);283 ::MPI_Send(send_buf.data(), 3*size_info[2], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 284 ::MPI_Recv(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 285 286 } 287 288 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 289 289 290 290 std::copy ( recv_buf.data(), recv_buf.data() + size_info[3], new_rank_info[2].begin() ); … … 318 318 } 319 319 320 ::MPI_Bcast(&leader_info[2], 1, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);321 322 if(new_comm != MPI_COMM_NULL_STD)320 ::MPI_Bcast(&leader_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 321 322 if(new_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 323 323 { 324 324 ::MPI_Barrier(new_comm); … … 362 362 } 363 363 364 MPI_Bcast(bcast_buf.data(), 8, MPI_INT_STD, local_leader, local_comm);364 MPI_Bcast(bcast_buf.data(), 8, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 365 365 366 366 if(!is_local_leader) … … 387 387 } 388 388 389 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, MPI_INT_STD, local_leader, local_comm);389 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 390 390 391 391 if(!is_local_leader) … … 424 424 } 425 425 426 MPI_Barrier_local(local_comm); 426 MPI_Barrier(local_comm); 427 428 if(is_local_leader) 429 { 430 int local_flag = true; 431 int remote_flag = false; 432 ::MPI_Status mpi_status; 433 434 ::MPI_Send(&local_flag, 1, static_cast< ::MPI_Datatype>(MPI_INT), mpi_remote_leader, tag, peer_comm); 435 436 ::MPI_Recv(&remote_flag, 1, static_cast< ::MPI_Datatype>(MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 437 } 438 439 MPI_Barrier(local_comm); 427 440 428 441 if(is_proc_master) … … 450 463 intercomm_mpi_size = newintercomm->ep_comm_ptr->size_rank_info[2].second; 451 464 452 MPI_Bcast(&remote_ep_size, 1, MPI_INT_STD, local_leader, local_comm);465 MPI_Bcast(&remote_ep_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 453 466 454 467 int my_rank_map_elem[2]; … … 466 479 (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->resize(local_ep_size); 467 480 468 MPI_Allgather (my_rank_map_elem, 2, MPI_INT_STD, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT_STD, local_comm);481 MPI_Allgather2(my_rank_map_elem, 2, MPI_INT, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT, local_comm); 469 482 470 483 (*newintercomm).ep_comm_ptr->intercomm->remote_rank_map = new RANK_MAP; … … 481 494 ::MPI_Status status; 482 495 483 ::MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);484 485 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);486 487 ::MPI_Send(&local_intercomm_size, 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);488 489 ::MPI_Recv(&remote_intercomm_size, 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);490 } 491 492 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, local_leader, local_comm);493 MPI_Bcast(&remote_intercomm_size, 1, MPI_INT_STD, 0, *newintercomm);496 ::MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 497 498 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 499 500 ::MPI_Send(&local_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 501 502 ::MPI_Recv(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 503 } 504 505 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 506 MPI_Bcast(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), 0, *newintercomm); 494 507 495 508 … … 506 519 ::MPI_Status status; 507 520 508 ::MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);509 510 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);511 } 512 513 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, 0, *newintercomm);521 ::MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 522 523 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 524 } 525 526 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), 0, *newintercomm); 514 527 515 528 (*newintercomm).ep_comm_ptr->intercomm->local_comm = &(local_comm.ep_comm_ptr->comm_list[ep_rank_loc]); … … 590 603 send_buf[2] = num_ep; 591 604 592 ::MPI_Allgather(send_buf.data(), 3, MPI_INT_STD, recv_buf.data(), 3, MPI_INT_STD, local_mpi_comm);605 ::MPI_Allgather(send_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), recv_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), local_mpi_comm); 593 606 594 607 for(int i=0; i<size_info[0]; i++) … … 610 623 send_buf[1] = local_ep_size; 611 624 612 ::MPI_Send(send_buf.data(), 2, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);613 614 ::MPI_Recv(recv_buf.data(), 2, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);625 ::MPI_Send(send_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 626 627 ::MPI_Recv(recv_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 615 628 616 629 recv_buf[2] = rank_in_world; … … 619 632 } 620 633 621 ::MPI_Bcast(recv_buf.data(), 4, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);634 ::MPI_Bcast(recv_buf.data(), 4, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 622 635 623 636 size_info[1] = recv_buf[0]; … … 643 656 std::copy ( ep_info[0].data(), ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[0] ); 644 657 645 ::MPI_Send(send_buf.data(), 3*size_info[0], MPI_INT_STD, mpi_remote_leader, tag, peer_comm);646 647 ::MPI_Recv(recv_buf.data(), 3*size_info[1], MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);648 649 } 650 651 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);658 ::MPI_Send(send_buf.data(), 3*size_info[0], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 659 660 ::MPI_Recv(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 661 662 } 663 664 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 652 665 653 666 std::copy ( recv_buf.data(), recv_buf.data() + size_info[1], rank_info[2].begin() ); … … 770 783 ::MPI_Status mpi_status; 771 784 772 ::MPI_Send(&size_info[2], 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);773 774 ::MPI_Recv(&size_info[3], 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);775 } 776 777 ::MPI_Bcast(&size_info[2], 2, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);785 ::MPI_Send(&size_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 786 787 ::MPI_Recv(&size_info[3], 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 788 } 789 790 ::MPI_Bcast(&size_info[2], 2, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 778 791 779 792 new_rank_info[2].resize(size_info[3]); … … 792 805 std::copy ( new_ep_info[0].data(), new_ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[2] ); 793 806 794 ::MPI_Send(send_buf.data(), 3*size_info[2], MPI_INT_STD, mpi_remote_leader, tag, peer_comm);795 796 ::MPI_Recv(recv_buf.data(), 3*size_info[3], MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);797 } 798 799 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);807 ::MPI_Send(send_buf.data(), 3*size_info[2], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 808 809 ::MPI_Recv(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 810 } 811 812 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 800 813 801 814 std::copy ( recv_buf.data(), recv_buf.data() + size_info[3], new_rank_info[2].begin() ); … … 829 842 } 830 843 831 ::MPI_Bcast(&leader_info[2], 1, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);832 833 if(new_comm != MPI_COMM_NULL_STD)844 ::MPI_Bcast(&leader_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 845 846 if(new_comm != static_cast< ::MPI_Comm >(MPI_COMM_NULL.mpi_comm)) 834 847 { 835 848 ::MPI_Barrier(new_comm); … … 857 870 #pragma omp critical (write_to_tag_list) 858 871 tag_list.push_back(make_pair( make_pair(tag, min(leader_info[0], leader_info[1])) , ep_intercomm)); 859 860 } 861 862 863 } 864 865 866 MPI_Barrier_local(local_comm); 872 873 874 } 875 876 877 } 867 878 868 879 … … 876 887 877 888 878 MPI_Bcast(bcast_buf.data(), 8, MPI_INT_STD, local_leader, local_comm);889 MPI_Bcast(bcast_buf.data(), 8, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 879 890 880 891 … … 904 915 } 905 916 906 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, MPI_INT_STD, local_leader, local_comm);917 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 907 918 908 919 … … 919 930 920 931 921 MPI_Barrier_local(local_comm);922 #pragma omp flush923 924 925 932 #pragma omp critical (read_from_tag_list) 926 933 { … … 935 942 936 943 found = true; 937 //tag_list.erase(iter);938 944 break; 939 945 } … … 942 948 } 943 949 944 MPI_Barrier_local(local_comm); 950 MPI_Barrier(local_comm); 951 952 if(is_local_leader) 953 { 954 int local_flag = true; 955 int remote_flag = false; 956 ::MPI_Status mpi_status; 957 958 ::MPI_Send(&local_flag, 1, static_cast< ::MPI_Datatype>(MPI_INT), mpi_remote_leader, tag, peer_comm); 959 960 ::MPI_Recv(&remote_flag, 1, static_cast< ::MPI_Datatype>(MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 961 } 962 963 MPI_Barrier(local_comm); 945 964 946 965 if(is_proc_master) … … 967 986 968 987 969 MPI_Bcast(&remote_ep_size, 1, MPI_INT_STD, local_leader, local_comm);988 MPI_Bcast(&remote_ep_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 970 989 971 990 int my_rank_map_elem[2]; … … 983 1002 (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->resize(local_ep_size); 984 1003 985 MPI_Allgather(my_rank_map_elem, 2, MPI_INT _STD, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT_STD, local_comm);986 1004 MPI_Allgather(my_rank_map_elem, 2, MPI_INT, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT, local_comm); 1005 987 1006 (*newintercomm).ep_comm_ptr->intercomm->remote_rank_map = new RANK_MAP; 988 1007 (*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->resize(remote_ep_size); … … 996 1015 ::MPI_Status status; 997 1016 998 ::MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);999 1000 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);1001 1002 ::MPI_Send(&local_intercomm_size, 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);1003 1004 ::MPI_Recv(&remote_intercomm_size, 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);1005 } 1006 1007 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, local_leader, local_comm);1008 MPI_Bcast(&remote_intercomm_size, 1, MPI_INT_STD, 0, *newintercomm);1017 ::MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 1018 1019 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 1020 1021 ::MPI_Send(&local_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 1022 1023 ::MPI_Recv(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 1024 } 1025 1026 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 1027 MPI_Bcast(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), 0, *newintercomm); 1009 1028 1010 1029 … … 1021 1040 ::MPI_Status status; 1022 1041 1023 ::MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);1024 1025 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);1026 } 1027 1028 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, 0, *newintercomm);1042 ::MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 1043 1044 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 1045 } 1046 1047 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), 0, *newintercomm); 1029 1048 1030 1049 (*newintercomm).ep_comm_ptr->intercomm->local_comm = &(local_comm.ep_comm_ptr->comm_list[ep_rank_loc]); 1031 1050 (*newintercomm).ep_comm_ptr->intercomm->intercomm_tag = local_comm.ep_comm_ptr->comm_label; 1032 1051 1033 1034 // for(int i=0; i<local_ep_size; i++)1035 // if(local_comm.ep_comm_ptr->comm_label != 99) printf("ep_rank = %d, remote_rank_map[%d] = (%d,%d)\n", intercomm_ep_rank, i,1036 // (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->at(i).first, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->at(i).second);1037 1038 // for(int i=0; i<remote_intercomm_size; i++)1039 // if(local_comm.ep_comm_ptr->comm_label != 99) printf("ep_rank = %d, intercomm_rank_map[%d] = (%d,%d)\n", intercomm_ep_rank, i,1040 // (*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->at(i).first, (*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->at(i).second);1041 1042 // for(int i=0; i<(*newintercomm).rank_map->size(); i++)1043 // if(local_comm.ep_comm_ptr->comm_label != 99) printf("ep_rank = %d, rank_map[%d] = (%d,%d)\n", intercomm_ep_rank, i,1044 // (*newintercomm).rank_map->at(i).first, (*newintercomm).rank_map->at(i).second);1045 1046 1047 1048 1049 1050 1052 return MPI_SUCCESS; 1051 1053 -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_lib.cpp
r1220 r1287 63 63 64 64 65 int innode_memcpy(int sender, const void* sendbuf, int receiver, void* recvbuf, int count, MPI_Datatype datatype, MPI_Comm comm)66 {67 int ep_rank, ep_rank_loc, mpi_rank;68 int ep_size, num_ep, mpi_size;69 70 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;71 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;72 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;73 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;74 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;75 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;76 77 78 79 if(datatype == MPI_INT)80 {81 82 int* send_buf = static_cast<int*>(const_cast<void*>(sendbuf));83 int* recv_buf = static_cast<int*>(recvbuf);84 int* buffer = comm.my_buffer->buf_int;85 86 for(int j=0; j<count; j+=BUFFER_SIZE)87 {88 if(ep_rank_loc == sender)89 {90 #pragma omp critical (write_to_buffer)91 {92 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);93 }94 #pragma omp flush95 }96 97 MPI_Barrier_local(comm);98 99 100 if(ep_rank_loc == receiver)101 {102 #pragma omp flush103 #pragma omp critical (read_from_buffer)104 {105 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);106 }107 }108 109 MPI_Barrier_local(comm);110 }111 }112 else if(datatype == MPI_FLOAT)113 {114 115 float* send_buf = static_cast<float*>(const_cast<void*>(sendbuf));116 float* recv_buf = static_cast<float*>(recvbuf);117 float* buffer = comm.my_buffer->buf_float;118 119 for(int j=0; j<count; j+=BUFFER_SIZE)120 {121 if(ep_rank_loc == sender)122 {123 #pragma omp critical (write_to_buffer)124 {125 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);126 }127 #pragma omp flush128 }129 130 MPI_Barrier_local(comm);131 132 133 if(ep_rank_loc == receiver)134 {135 #pragma omp flush136 #pragma omp critical (read_from_buffer)137 {138 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);139 }140 }141 142 MPI_Barrier_local(comm);143 }144 }145 else if(datatype == MPI_DOUBLE)146 {147 148 149 double* send_buf = static_cast<double*>(const_cast<void*>(sendbuf));150 double* recv_buf = static_cast<double*>(recvbuf);151 double* buffer = comm.my_buffer->buf_double;152 153 for(int j=0; j<count; j+=BUFFER_SIZE)154 {155 if(ep_rank_loc == sender)156 {157 #pragma omp critical (write_to_buffer)158 {159 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);160 }161 #pragma omp flush162 }163 164 MPI_Barrier_local(comm);165 166 167 if(ep_rank_loc == receiver)168 {169 #pragma omp flush170 #pragma omp critical (read_from_buffer)171 {172 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);173 }174 }175 176 MPI_Barrier_local(comm);177 }178 }179 else if(datatype == MPI_LONG)180 {181 long* send_buf = static_cast<long*>(const_cast<void*>(sendbuf));182 long* recv_buf = static_cast<long*>(recvbuf);183 long* buffer = comm.my_buffer->buf_long;184 185 for(int j=0; j<count; j+=BUFFER_SIZE)186 {187 if(ep_rank_loc == sender)188 {189 #pragma omp critical (write_to_buffer)190 {191 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);192 }193 #pragma omp flush194 }195 196 MPI_Barrier_local(comm);197 198 199 if(ep_rank_loc == receiver)200 {201 #pragma omp flush202 #pragma omp critical (read_from_buffer)203 {204 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);205 }206 }207 208 MPI_Barrier_local(comm);209 }210 }211 else if(datatype == MPI_UNSIGNED_LONG)212 {213 unsigned long* send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf));214 unsigned long* recv_buf = static_cast<unsigned long*>(recvbuf);215 unsigned long* buffer = comm.my_buffer->buf_ulong;216 217 for(int j=0; j<count; j+=BUFFER_SIZE)218 {219 if(ep_rank_loc == sender)220 {221 #pragma omp critical (write_to_buffer)222 {223 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);224 }225 #pragma omp flush226 }227 228 MPI_Barrier_local(comm);229 230 231 if(ep_rank_loc == receiver)232 {233 #pragma omp flush234 #pragma omp critical (read_from_buffer)235 {236 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);237 }238 }239 240 MPI_Barrier_local(comm);241 }242 }243 else if(datatype == MPI_CHAR)244 {245 char* send_buf = static_cast<char*>(const_cast<void*>(sendbuf));246 char* recv_buf = static_cast<char*>(recvbuf);247 char* buffer = comm.my_buffer->buf_char;248 249 for(int j=0; j<count; j+=BUFFER_SIZE)250 {251 if(ep_rank_loc == sender)252 {253 #pragma omp critical (write_to_buffer)254 {255 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);256 }257 #pragma omp flush258 }259 260 MPI_Barrier_local(comm);261 262 263 if(ep_rank_loc == receiver)264 {265 #pragma omp flush266 #pragma omp critical (read_from_buffer)267 {268 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);269 }270 }271 272 MPI_Barrier_local(comm);273 }274 }275 else276 {277 printf("datatype not supported!!\n");278 exit(1);279 }280 return 0;281 }282 283 284 65 int MPI_Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count) 285 66 { 286 /* 287 ::MPI_Aint datasize, char_size, lb; 288 289 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 290 ::MPI_Type_get_extent(MPI_CHAR, &lb, &char_size); 291 292 *count = status->char_count / ( datasize/ char_size); 293 294 //printf("MPI_Get_count, status_count = %d\n", *count); 295 return 0; 296 */ 67 297 68 ::MPI_Status *mpi_status = static_cast< ::MPI_Status* >(status->mpi_status); 298 69 ::MPI_Datatype mpi_datatype = static_cast< ::MPI_Datatype >(datatype); … … 314 85 315 86 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 316 ::MPI_Type_get_extent( MPI_CHAR_STD, &lb, &intsize);87 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype> (MPI_CHAR), &lb, &intsize); 317 88 318 89 int_count = count * datasize / intsize ; … … 346 117 347 118 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 348 ::MPI_Type_get_extent( MPI_CHAR_STD, &lb, &intsize);119 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype> (MPI_CHAR), &lb, &intsize); 349 120 350 121 int_count = count * datasize / intsize ; … … 424 195 } 425 196 197 bool valid_type(MPI_Datatype datatype) 198 { 199 if( datatype == MPI_INT 200 || datatype == MPI_FLOAT 201 || datatype == MPI_DOUBLE 202 || datatype == MPI_CHAR 203 || datatype == MPI_LONG 204 || datatype == MPI_UNSIGNED_LONG) 205 { 206 return true; 207 } 208 209 return false; 210 } 211 212 213 bool valid_op(MPI_Op op) 214 { 215 if( op == MPI_MAX 216 || op == MPI_MIN 217 || op == MPI_SUM) 218 { 219 return true; 220 } 221 222 return false; 223 } 224 225 426 226 } 427 227 428 228 429 430 431 432 433 229 MPI_Datatype to_mpi_type(ep_lib::MPI_Datatype type) 230 { 231 return static_cast< MPI_Datatype >(type); 232 } 233 234 MPI_Op to_mpi_op(ep_lib::MPI_Op op) 235 { 236 return static_cast< MPI_Op >(op); 237 } 238 239 MPI_Comm to_mpi_comm(int comm) 240 { 241 return static_cast< MPI_Comm >(comm); 242 } 243 244 245 246 247 248 -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_lib.hpp
r1196 r1287 8 8 #include "ep_tag.hpp" 9 9 #include "ep_lib_fortran.hpp" 10 11 10 12 11 namespace ep_lib … … 87 86 void check_sum_send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, int type); 88 87 void check_sum_recv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm, int type); 88 89 bool valid_type(MPI_Datatype type); 90 bool valid_op(MPI_Op op); 91 89 92 } 90 93 94 //MPI_Datatype to_mpi(ep_lib::MPI_Datatype type); 91 95 92 96 -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_lib_collective.hpp
r1164 r1287 17 17 18 18 int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); 19 19 20 int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 20 21 … … 22 23 23 24 int MPI_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 25 24 26 int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 25 27 … … 31 33 int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], 32 34 MPI_Datatype recvtype, int root, MPI_Comm comm); 33 int MPI_Gatherv_special(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[],34 MPI_Datatype recvtype, int root, MPI_Comm comm);35 35 int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], 36 36 MPI_Datatype recvtype, MPI_Comm comm); 37 int MPI_Allgatherv_special(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[],38 MPI_Datatype recvtype, MPI_Comm comm);39 37 40 38 41 39 int MPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); 40 42 41 int MPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, 43 42 MPI_Datatype recvtype, int root, MPI_Comm comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_lib_fortran.hpp
r1134 r1287 6 6 namespace ep_lib 7 7 { 8 9 // #ifdef _intelmpi10 11 // MPI_Fint MPI_Comm_c2f(MPI_Comm comm);12 // MPI_Comm MPI_Comm_f2c(MPI_Fint comm);13 14 // #elif _openmpi15 16 // int MPI_Comm_c2f(MPI_Comm comm);17 // ep_lib::MPI_Comm MPI_Comm_f2c(MPI_Fint comm);18 19 // #endif20 8 21 9 int EP_Comm_c2f(MPI_Comm comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_lib_local.hpp
r1134 r1287 12 12 #endif 13 13 14 15 int MPI_Reduce_local (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 16 int MPI_Reduce_local_int (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 17 int MPI_Reduce_local_float (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 18 int MPI_Reduce_local_double(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 19 int MPI_Reduce_local_long (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 20 int MPI_Reduce_local_ulong (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 21 int MPI_Reduce_local_char (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 22 14 int MPI_Reduce_local (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int local_root, MPI_Comm comm); 23 15 24 16 int MPI_Scan_local (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 25 int MPI_Scan_local_int (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);26 int MPI_Scan_local_float (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);27 int MPI_Scan_local_double(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);28 int MPI_Scan_local_long (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);29 int MPI_Scan_local_ulong (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);30 int MPI_Scan_local_char (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);31 17 32 18 int MPI_Exscan_local (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 33 int MPI_Exscan_local_int (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);34 int MPI_Exscan_local_float (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);35 int MPI_Exscan_local_double(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);36 int MPI_Exscan_local_long (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);37 int MPI_Exscan_local_ulong (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);38 int MPI_Exscan_local_char (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);39 19 40 int MPI_Bcast_local (void *buffer, int count, MPI_Datatype datatype, MPI_Comm comm); 41 int MPI_Bcast_local_int (void *buffer, int count, MPI_Comm comm); 42 int MPI_Bcast_local_float (void *buffer, int count, MPI_Comm comm); 43 int MPI_Bcast_local_double(void *buffer, int count, MPI_Comm comm); 44 int MPI_Bcast_local_long (void *buffer, int count, MPI_Comm comm); 45 int MPI_Bcast_local_ulong (void *buffer, int count, MPI_Comm comm); 46 int MPI_Bcast_local_char (void *buffer, int count, MPI_Comm comm); 47 48 int MPI_Gather_local (const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, MPI_Comm comm); 49 int MPI_Gather_local_int (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 50 int MPI_Gather_local_float (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 51 int MPI_Gather_local_double(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 52 int MPI_Gather_local_long (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 53 int MPI_Gather_local_ulong (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 54 int MPI_Gather_local_char (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 55 20 int MPI_Bcast_local (void *buffer, int count, MPI_Datatype datatype, int local_root, MPI_Comm comm); 21 22 int MPI_Gather_local (const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, int local_root, MPI_Comm comm); 56 23 57 24 int MPI_Gatherv_local (const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, 58 const int recvcounts[], const int displs[], MPI_Comm comm); 59 int MPI_Gatherv_local_int (const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 60 int MPI_Gatherv_local_float (const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 61 int MPI_Gatherv_local_double(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 62 int MPI_Gatherv_local_long (const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 63 int MPI_Gatherv_local_ulong (const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 64 int MPI_Gatherv_local_char (const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 25 const int recvcounts[], const int displs[], int local_root, MPI_Comm comm); 65 26 66 int MPI_Scatter_local (const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, MPI_Comm comm); 67 int MPI_Scatter_local_int (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 68 int MPI_Scatter_local_float (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 69 int MPI_Scatter_local_double(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 70 int MPI_Scatter_local_long (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 71 int MPI_Scatter_local_ulong (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 72 int MPI_Scatter_local_char (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 27 int MPI_Scatter_local(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int local_root, MPI_Comm comm); 73 28 74 int MPI_Scatterv_local (const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype datatype, void *recvbuf, MPI_Comm comm); 75 int MPI_Scatterv_local_int (const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 76 int MPI_Scatterv_local_float (const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 77 int MPI_Scatterv_local_double(const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 78 int MPI_Scatterv_local_long (const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 79 int MPI_Scatterv_local_ulong (const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 80 int MPI_Scatterv_local_char (const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 81 82 int innode_memcpy(int sender, const void* sendbuf, int receiver, void* recvbuf, int count, MPI_Datatype datatype, MPI_Comm comm); 29 int MPI_Scatterv_local(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, 30 MPI_Datatype recvtype, int local_root, MPI_Comm comm); 83 31 84 32 int MPI_Barrier_local(MPI_Comm comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_memory.cpp
r1138 r1287 10 10 int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr) 11 11 { 12 //::MPI_Alloc_mem(size.mpi_aint, static_cast< ::MPI_Info>(info.mpi_info), baseptr);13 ::MPI_Alloc_mem(size.mpi_aint, MPI_INFO_NULL_STD, baseptr);12 ::MPI_Alloc_mem(size.mpi_aint, static_cast< ::MPI_Info>(info.mpi_info), baseptr); 13 //::MPI_Alloc_mem(size.mpi_aint, MPI_INFO_NULL_STD, baseptr); 14 14 return 0; 15 15 } … … 17 17 int MPI_Alloc_mem(unsigned long size, MPI_Info info, void *baseptr) 18 18 { 19 //::MPI_Alloc_mem(size, static_cast< ::MPI_Info>(info.mpi_info), baseptr);20 ::MPI_Alloc_mem(size, MPI_INFO_NULL_STD, baseptr);19 ::MPI_Alloc_mem(size, static_cast< ::MPI_Info>(info.mpi_info), baseptr); 20 //::MPI_Alloc_mem(size, MPI_INFO_NULL_STD, baseptr); 21 21 return 0; 22 22 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_merge.cpp
r1134 r1287 42 42 if(local_ep_rank == 0) 43 43 { 44 MPI_Status status; 45 MPI_Request req_s, req_r; 46 MPI_Isend(&local_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &req_s); 47 MPI_Irecv(&remote_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &req_r); 48 49 MPI_Wait(&req_s, &status); 50 MPI_Wait(&req_r, &status); 44 MPI_Status status[2]; 45 MPI_Request request[2]; 46 MPI_Isend(&local_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &request[0]); 47 MPI_Irecv(&remote_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &request[1]); 48 49 MPI_Waitall(2, request, status); 51 50 } 52 51 … … 54 53 MPI_Bcast(&remote_high, 1, MPI_INT, 0, *(inter_comm.ep_comm_ptr->intercomm->local_comm)); 55 54 56 // printf("%d, %d, %d, %d\n", local_ep_size, remote_ep_size, local_high, remote_high);57 55 58 56 … … 86 84 if(intra_ep_rank_loc == 0) 87 85 { 88 ::MPI_Bcast(reorder, intra_ep_size, MPI_INT_STD, 0, static_cast< ::MPI_Comm>(newintracomm->mpi_comm));86 ::MPI_Bcast(reorder, intra_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), 0, static_cast< ::MPI_Comm>(newintracomm->mpi_comm)); 89 87 90 88 vector< pair<int, int> > tmp_rank_map(intra_ep_size); … … 130 128 Debug("intercomm_merge kernel\n"); 131 129 132 int ep_rank, ep_rank_loc, mpi_rank; 133 int ep_size, num_ep, mpi_size; 134 135 ep_rank = inter_comm.ep_comm_ptr->size_rank_info[0].first; 130 int ep_rank_loc; 131 int num_ep; 132 136 133 ep_rank_loc = inter_comm.ep_comm_ptr->size_rank_info[1].first; 137 mpi_rank = inter_comm.ep_comm_ptr->size_rank_info[2].first;138 ep_size = inter_comm.ep_comm_ptr->size_rank_info[0].second;139 134 num_ep = inter_comm.ep_comm_ptr->size_rank_info[1].second; 140 mpi_size = inter_comm.ep_comm_ptr->size_rank_info[2].second; 141 142 143 int local_ep_rank, local_ep_rank_loc, local_mpi_rank; 144 int local_ep_size, local_num_ep, local_mpi_size; 145 146 147 local_ep_rank = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[0].first; 148 local_ep_rank_loc = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[1].first; 149 local_mpi_rank = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[2].first; 150 local_ep_size = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[0].second; 151 local_num_ep = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[1].second; 152 local_mpi_size = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[2].second; 135 136 153 137 154 138 int remote_ep_size = inter_comm.ep_comm_ptr->intercomm->remote_rank_map->size(); 155 139 156 int local_high = high;157 int remote_high;158 140 159 141 MPI_Barrier(inter_comm); 160 161 // if(local_ep_rank == 0 && high == false)162 // {163 // MPI_Status status;164 // MPI_Send(&local_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm);165 // MPI_Recv(&remote_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &status);166 // }167 //168 // if(local_ep_rank == 0 && high == true)169 // {170 // MPI_Status status;171 // MPI_Recv(&remote_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &status);172 // MPI_Send(&local_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm);173 // }174 175 if(local_ep_rank == 0)176 {177 MPI_Status status;178 MPI_Request req_s, req_r;179 MPI_Isend(&local_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &req_s);180 MPI_Irecv(&remote_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &req_r);181 182 MPI_Wait(&req_s, &status);183 MPI_Wait(&req_r, &status);184 }185 186 MPI_Bcast(&remote_high, 1, MPI_INT, 0, *(inter_comm.ep_comm_ptr->intercomm->local_comm));187 188 int intercomm_high;189 if(ep_rank == 0) intercomm_high = local_high;190 MPI_Bcast(&intercomm_high, 1, MPI_INT, 0, inter_comm);191 192 //printf("remote_ep_size = %d, local_high = %d, remote_high = %d, intercomm_high = %d\n", remote_ep_size, local_high, remote_high, intercomm_high);193 142 194 143 … … 201 150 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm>(inter_comm.ep_comm_ptr->intercomm->mpi_inter_comm); 202 151 203 ::MPI_Intercomm_merge(mpi_comm, intercomm_high, &mpi_intracomm);152 ::MPI_Intercomm_merge(mpi_comm, high, &mpi_intracomm); 204 153 MPI_Info info; 205 154 MPI_Comm_create_endpoints(mpi_intracomm, num_ep, info, ep_intracomm); … … 209 158 } 210 159 211 212 213 160 MPI_Barrier_local(inter_comm); 214 161 215 *newintracomm = inter_comm.ep_comm_ptr->comm_list->mem_bridge[ep_rank_loc]; 216 217 int my_ep_rank = local_high<remote_high? local_ep_rank: local_ep_rank+remote_ep_size; 218 219 int intra_ep_rank, intra_ep_rank_loc, intra_mpi_rank; 220 int intra_ep_size, intra_num_ep, intra_mpi_size; 221 222 intra_ep_rank = newintracomm->ep_comm_ptr->size_rank_info[0].first; 223 intra_ep_rank_loc = newintracomm->ep_comm_ptr->size_rank_info[1].first; 224 intra_mpi_rank = newintracomm->ep_comm_ptr->size_rank_info[2].first; 225 intra_ep_size = newintracomm->ep_comm_ptr->size_rank_info[0].second; 226 intra_num_ep = newintracomm->ep_comm_ptr->size_rank_info[1].second; 227 intra_mpi_size = newintracomm->ep_comm_ptr->size_rank_info[2].second; 228 229 230 231 MPI_Barrier_local(*newintracomm); 232 233 234 int *reorder; 235 if(intra_ep_rank_loc == 0) 236 { 237 reorder = new int[intra_ep_size]; 238 } 239 240 241 242 MPI_Gather(&my_ep_rank, 1, MPI_INT, reorder, 1, MPI_INT, 0, *newintracomm); 243 if(intra_ep_rank_loc == 0) 244 { 245 246 ::MPI_Bcast(reorder, intra_ep_size, MPI_INT_STD, 0, static_cast< ::MPI_Comm>(newintracomm->mpi_comm)); 247 248 vector< pair<int, int> > tmp_rank_map(intra_ep_size); 249 250 251 for(int i=0; i<intra_ep_size; i++) 252 { 253 tmp_rank_map[reorder[i]] = newintracomm->rank_map->at(i) ; 254 } 255 256 newintracomm->rank_map->swap(tmp_rank_map); 257 258 tmp_rank_map.clear(); 259 } 260 261 MPI_Barrier_local(*newintracomm); 162 int inter_rank; 163 MPI_Comm_rank(inter_comm, &inter_rank); 164 165 int my_ep_rank = high? inter_rank+remote_ep_size : inter_rank; 166 int my_ep_rank_loc = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[1].first; 167 int my_num_ep_loc = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[1].second; 168 int my_num_ep_total = inter_comm.ep_comm_ptr->comm_list->mem_bridge[0].ep_comm_ptr->size_rank_info[1].second; 169 int my_ep_size = inter_comm.ep_comm_ptr->comm_list->mem_bridge[0].ep_comm_ptr->size_rank_info[0].second; 170 171 int tmp_intra_ep_rank_loc = high?my_ep_rank_loc+my_num_ep_total-my_num_ep_loc: my_ep_rank_loc; 172 173 174 *newintracomm = inter_comm.ep_comm_ptr->comm_list->mem_bridge[tmp_intra_ep_rank_loc]; 175 176 int newintracomm_ep_rank = (*newintracomm).ep_comm_ptr->size_rank_info[0].first; 177 int newintracomm_ep_rank_loc = (*newintracomm).ep_comm_ptr->size_rank_info[1].first; 178 int newintracomm_mpi_rank = (*newintracomm).ep_comm_ptr->size_rank_info[2].first; 179 int newintracomm_ep_size = (*newintracomm).ep_comm_ptr->size_rank_info[0].second; 180 int newintracomm_num_ep = (*newintracomm).ep_comm_ptr->size_rank_info[1].second; 181 int newintracomm_mpi_size = (*newintracomm).ep_comm_ptr->size_rank_info[2].second; 182 183 184 int buf[3]; 185 buf[0] = my_ep_rank; 186 buf[1] = tmp_intra_ep_rank_loc; 187 buf[2] = newintracomm->ep_comm_ptr->size_rank_info[2].first; 188 189 // printf("my_ep_rank = %d, tmp_intra_ep_rank_loc = %d, mpi_rank = %d\n", my_ep_rank, tmp_intra_ep_rank_loc, newintracomm->ep_comm_ptr->size_rank_info[2].first); 190 191 int *rankmap_buf; 192 rankmap_buf = new int [3*my_ep_size]; 193 194 MPI_Allgather(buf, 3, MPI_INT, rankmap_buf, 3, MPI_INT, *newintracomm); 195 196 197 // printf(" ID = %d : rankmap_buf = (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d)\n", newintracomm_ep_rank, 198 // rankmap_buf[0], rankmap_buf[1], rankmap_buf[2], rankmap_buf[3], rankmap_buf[4], rankmap_buf[5], rankmap_buf[6], rankmap_buf[7], rankmap_buf[8], rankmap_buf[9], 199 // rankmap_buf[10], rankmap_buf[11], rankmap_buf[12], rankmap_buf[13], rankmap_buf[14], rankmap_buf[15], rankmap_buf[16], rankmap_buf[17], rankmap_buf[18], rankmap_buf[19], 200 // rankmap_buf[20], rankmap_buf[21], rankmap_buf[22], rankmap_buf[23], rankmap_buf[24], rankmap_buf[25], rankmap_buf[26], rankmap_buf[27], rankmap_buf[28], rankmap_buf[29], 201 // rankmap_buf[30], rankmap_buf[31], rankmap_buf[32], rankmap_buf[33], rankmap_buf[34], rankmap_buf[35], rankmap_buf[36], rankmap_buf[37], rankmap_buf[38], rankmap_buf[39], 202 // rankmap_buf[40], rankmap_buf[41], rankmap_buf[42], rankmap_buf[43], rankmap_buf[44], rankmap_buf[45], rankmap_buf[46], rankmap_buf[47]); 203 204 205 for(int i=0; i<newintracomm_ep_size; i++) 206 { 207 (*newintracomm).rank_map->at(rankmap_buf[3*i]).first = rankmap_buf[3*i+1]; 208 (*newintracomm).rank_map->at(rankmap_buf[3*i]).second = rankmap_buf[3*i+2]; 209 } 210 262 211 263 212 (*newintracomm).ep_comm_ptr->size_rank_info[0].first = my_ep_rank; 264 265 266 if(intra_ep_rank_loc == 0) 267 { 268 delete[] reorder; 269 270 } 271 272 /* 273 if(intra_ep_rank == 0) 274 { 275 for(int i=0; i<intra_ep_size; i++) 276 { 277 printf("intra rank_map[%d] = (%d, %d)\n", i, newintracomm->rank_map->at(i).first, newintracomm->rank_map->at(i).second); 278 } 279 } 280 */ 213 (*newintracomm).ep_comm_ptr->size_rank_info[1].first = tmp_intra_ep_rank_loc; 214 215 216 delete[] rankmap_buf; 217 218 281 219 return MPI_SUCCESS; 282 220 -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_message.cpp
r1220 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 #include "ep_mpi.hpp" 11 12 12 13 using namespace std; … … 20 21 int Message_Check(MPI_Comm comm) 21 22 { 22 int myRank;23 MPI_Comm_rank(comm, &myRank);24 25 23 if(!comm.is_ep) return 0; 26 24 … … 38 36 { 39 37 Debug("Message probing for intracomm\n"); 40 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm> (comm.mpi_comm); 38 39 41 40 #ifdef _openmpi 42 41 #pragma omp critical (_mpi_call) 43 42 { 44 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &status);43 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.mpi_comm), &flag, &status); 45 44 if(flag) 46 45 { … … 48 47 mpi_source = status.MPI_SOURCE; 49 48 int tag = status.MPI_TAG; 50 ::MPI_Mprobe(mpi_source, tag, mpi_comm, &message, &status);49 ::MPI_Mprobe(mpi_source, tag, to_mpi_comm(comm.mpi_comm), &message, &status); 51 50 52 51 } 53 52 } 54 53 #elif _intelmpi 55 #pragma omp critical (_mpi_call) 56 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &message, &status); 54 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.mpi_comm), &flag, &message, &status); 57 55 #endif 58 56 … … 68 66 69 67 msg_block->ep_src = get_ep_rank(comm, src_loc, src_mpi); 70 int dest_mpi = comm.ep_comm_ptr->size_rank_info[2].first;71 int ep_dest = get_ep_rank(comm, dest_loc, dest_mpi);72 68 msg_block->mpi_status = new ::MPI_Status(status); 73 69 … … 79 75 { 80 76 #pragma omp flush 81 ptr_comm_target->ep_comm_ptr->message_queue->push_back(*msg_block); 82 77 comm.ep_comm_ptr->comm_list[dest_loc].ep_comm_ptr->message_queue->push_back(*msg_block); 83 78 #pragma omp flush 84 79 } … … 110 105 { 111 106 Debug("Message probing for intracomm\n"); 112 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm> (comm.ep_comm_ptr->intercomm->mpi_inter_comm); // => mpi_intercomm 113 107 114 108 #ifdef _openmpi 115 109 #pragma omp critical (_mpi_call) 116 110 { 117 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &status);111 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.ep_comm_ptr->intercomm->mpi_inter_comm), &flag, &status); 118 112 if(flag) 119 113 { … … 121 115 mpi_source = status.MPI_SOURCE; 122 116 int tag = status.MPI_TAG; 123 ::MPI_Mprobe(mpi_source, tag, mpi_comm, &message, &status);117 ::MPI_Mprobe(mpi_source, tag, to_mpi_comm(comm.ep_comm_ptr->intercomm->mpi_inter_comm), &message, &status); 124 118 125 119 } 126 120 } 127 121 #elif _intelmpi 128 #pragma omp critical (_mpi_call) 129 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &message, &status); 122 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.ep_comm_ptr->intercomm->mpi_inter_comm), &flag, &message, &status); 130 123 #endif 131 124 … … 153 146 { 154 147 #pragma omp flush 155 ptr_comm_target->ep_comm_ptr->message_queue->push_back(*msg_block);148 comm.ep_comm_ptr->comm_list[dest_loc].ep_comm_ptr->message_queue->push_back(*msg_block); 156 149 #pragma omp flush 157 150 } … … 167 160 { 168 161 Debug("Message probing for intracomm\n"); 169 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm> (comm.mpi_comm);162 170 163 #ifdef _openmpi 171 164 #pragma omp critical (_mpi_call) 172 165 { 173 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &status);166 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.mpi_comm), &flag, &status); 174 167 if(flag) 175 168 { … … 177 170 mpi_source = status.MPI_SOURCE; 178 171 int tag = status.MPI_TAG; 179 ::MPI_Mprobe(mpi_source, tag, mpi_comm, &message, &status);172 ::MPI_Mprobe(mpi_source, tag, to_mpi_comm(comm.mpi_comm), &message, &status); 180 173 181 174 } 182 175 } 183 176 #elif _intelmpi 184 #pragma omp critical (_mpi_call) 185 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &message, &status); 177 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.mpi_comm), &flag, &message, &status); 186 178 #endif 187 179 … … 196 188 int dest_loc = bitset<8> (status.MPI_TAG) .to_ulong(); 197 189 int src_mpi = status.MPI_SOURCE; 198 int current_inter = comm.ep_comm_ptr->intercomm->local_rank_map->at(current_ep_rank).first;199 190 200 191 msg_block->ep_src = get_ep_rank_intercomm(comm, src_loc, src_mpi); … … 209 200 { 210 201 #pragma omp flush 211 ptr_comm_target->ep_comm_ptr->message_queue->push_back(*msg_block);202 comm.ep_comm_ptr->comm_list[dest_loc].ep_comm_ptr->message_queue->push_back(*msg_block); 212 203 #pragma omp flush 213 204 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_recv.cpp
r1220 r1287 56 56 int dest_rank; 57 57 MPI_Comm_rank(comm, &dest_rank); 58 int world_rank; 59 MPI_Comm_rank(MPI_COMM_WORLD_STD, &world_rank); 58 60 59 61 60 if(!comm.is_ep) … … 71 70 } 72 71 73 request->mpi_request = MPI_REQUEST_NULL _STD;72 request->mpi_request = MPI_REQUEST_NULL.mpi_request; 74 73 request->buf = buf; 75 74 request->comm = comm; -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_reduce.cpp
r1134 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 #include "ep_mpi.hpp" 11 12 12 13 using namespace std; … … 27 28 } 28 29 29 30 int MPI_Reduce_local(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) 31 { 32 if(datatype == MPI_INT) 33 { 34 Debug("datatype is INT\n"); 35 return MPI_Reduce_local_int(sendbuf, recvbuf, count, op, comm); 36 } 37 else if(datatype == MPI_FLOAT) 38 { 39 Debug("datatype is FLOAT\n"); 40 return MPI_Reduce_local_float(sendbuf, recvbuf, count, op, comm); 41 } 42 else if(datatype == MPI_DOUBLE) 43 { 44 Debug("datatype is DOUBLE\n"); 45 return MPI_Reduce_local_double(sendbuf, recvbuf, count, op, comm); 46 } 47 else if(datatype == MPI_LONG) 48 { 49 Debug("datatype is DOUBLE\n"); 50 return MPI_Reduce_local_long(sendbuf, recvbuf, count, op, comm); 51 } 52 else if(datatype == MPI_UNSIGNED_LONG) 53 { 54 Debug("datatype is DOUBLE\n"); 55 return MPI_Reduce_local_ulong(sendbuf, recvbuf, count, op, comm); 56 } 57 else if(datatype == MPI_CHAR) 58 { 59 Debug("datatype is DOUBLE\n"); 60 return MPI_Reduce_local_char(sendbuf, recvbuf, count, op, comm); 61 } 62 else 63 { 64 printf("MPI_Reduce Datatype not supported!\n"); 65 exit(0); 66 } 67 } 68 69 70 int MPI_Reduce_local_int(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 71 { 72 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 73 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 74 75 int *buffer = comm.my_buffer->buf_int; 76 int *send_buf = static_cast<int*>(const_cast<void*>(sendbuf)); 77 int *recv_buf = static_cast<int*>(const_cast<void*>(recvbuf)); 78 79 for(int j=0; j<count; j+=BUFFER_SIZE) 80 { 81 if( 0 == my_rank ) 30 template<typename T> 31 void reduce_max(const T * buffer, T* recvbuf, int count) 32 { 33 transform(buffer, buffer+count, recvbuf, recvbuf, max_op<T>); 34 } 35 36 template<typename T> 37 void reduce_min(const T * buffer, T* recvbuf, int count) 38 { 39 transform(buffer, buffer+count, recvbuf, recvbuf, min_op<T>); 40 } 41 42 template<typename T> 43 void reduce_sum(const T * buffer, T* recvbuf, int count) 44 { 45 transform(buffer, buffer+count, recvbuf, recvbuf, std::plus<T>()); 46 } 47 48 int MPI_Reduce_local(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int local_root, MPI_Comm comm) 49 { 50 assert(valid_type(datatype)); 51 assert(valid_op(op)); 52 53 ::MPI_Aint datasize, lb; 54 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 55 56 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 57 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 58 59 #pragma omp critical (_reduce) 60 comm.my_buffer->void_buffer[ep_rank_loc] = const_cast< void* >(sendbuf); 61 62 MPI_Barrier_local(comm); 63 64 if(ep_rank_loc == local_root) 65 { 66 memcpy(recvbuf, comm.my_buffer->void_buffer[0], datasize * count); 67 68 if(op == MPI_MAX) 82 69 { 83 #pragma omp critical (write_to_buffer) 84 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 85 #pragma omp flush 70 if(datasize == sizeof(int)) 71 { 72 for(int i=1; i<num_ep; i++) 73 reduce_max<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 74 } 75 76 else if(datasize == sizeof(float)) 77 { 78 for(int i=1; i<num_ep; i++) 79 reduce_max<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 80 } 81 82 else if(datasize == sizeof(double)) 83 { 84 for(int i=1; i<num_ep; i++) 85 reduce_max<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 86 } 87 88 else if(datasize == sizeof(char)) 89 { 90 for(int i=1; i<num_ep; i++) 91 reduce_max<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 92 } 93 94 else if(datasize == sizeof(long)) 95 { 96 for(int i=1; i<num_ep; i++) 97 reduce_max<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 98 } 99 100 else if(datasize == sizeof(unsigned long)) 101 { 102 for(int i=1; i<num_ep; i++) 103 reduce_max<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 104 } 105 106 else printf("datatype Error\n"); 107 86 108 } 87 109 88 MPI_Barrier_local(comm); 89 90 if(my_rank !=0 ) 110 if(op == MPI_MIN) 91 111 { 92 #pragma omp critical (write_to_buffer) 93 { 94 #pragma omp flush 95 if(op == MPI_SUM) 96 { 97 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<int>()); 98 } 99 100 else if (op == MPI_MAX) 101 { 102 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<int>); 103 } 104 105 else if (op == MPI_MIN) 106 { 107 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<int>); 108 } 109 110 else 111 { 112 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 113 exit(1); 114 } 115 #pragma omp flush 116 } 112 if(datasize == sizeof(int)) 113 { 114 for(int i=1; i<num_ep; i++) 115 reduce_min<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 116 } 117 118 else if(datasize == sizeof(float)) 119 { 120 for(int i=1; i<num_ep; i++) 121 reduce_min<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 122 } 123 124 else if(datasize == sizeof(double)) 125 { 126 for(int i=1; i<num_ep; i++) 127 reduce_min<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 128 } 129 130 else if(datasize == sizeof(char)) 131 { 132 for(int i=1; i<num_ep; i++) 133 reduce_min<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 134 } 135 136 else if(datasize == sizeof(long)) 137 { 138 for(int i=1; i<num_ep; i++) 139 reduce_min<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 140 } 141 142 else if(datasize == sizeof(unsigned long)) 143 { 144 for(int i=1; i<num_ep; i++) 145 reduce_min<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 146 } 147 148 else printf("datatype Error\n"); 149 117 150 } 118 151 119 MPI_Barrier_local(comm); 120 121 if(my_rank == 0) 152 153 if(op == MPI_SUM) 122 154 { 123 #pragma omp flush 124 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 155 if(datasize == sizeof(int)) 156 { 157 for(int i=1; i<num_ep; i++) 158 reduce_sum<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 159 } 160 161 else if(datasize == sizeof(float)) 162 { 163 for(int i=1; i<num_ep; i++) 164 reduce_sum<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 165 } 166 167 else if(datasize == sizeof(double)) 168 { 169 for(int i=1; i<num_ep; i++) 170 reduce_sum<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 171 } 172 173 else if(datasize == sizeof(char)) 174 { 175 for(int i=1; i<num_ep; i++) 176 reduce_sum<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 177 } 178 179 else if(datasize == sizeof(long)) 180 { 181 for(int i=1; i<num_ep; i++) 182 reduce_sum<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 183 } 184 185 else if(datasize == sizeof(unsigned long)) 186 { 187 for(int i=1; i<num_ep; i++) 188 reduce_sum<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 189 } 190 191 else printf("datatype Error\n"); 192 125 193 } 126 MPI_Barrier_local(comm); 127 } 128 } 129 130 131 int MPI_Reduce_local_float(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 132 { 133 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 134 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 135 136 float *buffer = comm.my_buffer->buf_float; 137 float *send_buf = static_cast<float*>(const_cast<void*>(sendbuf)); 138 float *recv_buf = static_cast<float*>(const_cast<void*>(recvbuf)); 139 140 for(int j=0; j<count; j+=BUFFER_SIZE) 141 { 142 if( 0 == my_rank ) 143 { 144 #pragma omp critical (write_to_buffer) 145 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 146 #pragma omp flush 147 } 148 149 MPI_Barrier_local(comm); 150 151 if(my_rank !=0 ) 152 { 153 #pragma omp critical (write_to_buffer) 154 { 155 #pragma omp flush 156 157 if(op == MPI_SUM) 158 { 159 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<float>()); 160 } 161 162 else if (op == MPI_MAX) 163 { 164 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<float>); 165 } 166 167 else if (op == MPI_MIN) 168 { 169 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<float>); 170 } 171 172 else 173 { 174 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 175 exit(1); 176 } 177 #pragma omp flush 178 } 179 } 180 181 MPI_Barrier_local(comm); 182 183 if(my_rank == 0) 184 { 185 #pragma omp flush 186 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 187 } 188 MPI_Barrier_local(comm); 189 } 190 } 191 192 int MPI_Reduce_local_double(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 193 { 194 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 195 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 196 197 double *buffer = comm.my_buffer->buf_double; 198 double *send_buf = static_cast<double*>(const_cast<void*>(sendbuf)); 199 double *recv_buf = static_cast<double*>(const_cast<void*>(recvbuf)); 200 201 for(int j=0; j<count; j+=BUFFER_SIZE) 202 { 203 if( 0 == my_rank ) 204 { 205 #pragma omp critical (write_to_buffer) 206 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 207 #pragma omp flush 208 } 209 210 MPI_Barrier_local(comm); 211 212 if(my_rank !=0 ) 213 { 214 #pragma omp critical (write_to_buffer) 215 { 216 #pragma omp flush 217 218 219 if(op == MPI_SUM) 220 { 221 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<double>()); 222 } 223 224 else if (op == MPI_MAX) 225 { 226 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<double>); 227 } 228 229 230 else if (op == MPI_MIN) 231 { 232 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<double>); 233 } 234 235 else 236 { 237 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 238 exit(1); 239 } 240 #pragma omp flush 241 } 242 } 243 244 MPI_Barrier_local(comm); 245 246 if(my_rank == 0) 247 { 248 #pragma omp flush 249 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 250 } 251 MPI_Barrier_local(comm); 252 } 253 } 254 255 int MPI_Reduce_local_long(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 256 { 257 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 258 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 259 260 long *buffer = comm.my_buffer->buf_long; 261 long *send_buf = static_cast<long*>(const_cast<void*>(sendbuf)); 262 long *recv_buf = static_cast<long*>(const_cast<void*>(recvbuf)); 263 264 for(int j=0; j<count; j+=BUFFER_SIZE) 265 { 266 if( 0 == my_rank ) 267 { 268 #pragma omp critical (write_to_buffer) 269 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 270 #pragma omp flush 271 } 272 273 MPI_Barrier_local(comm); 274 275 if(my_rank !=0 ) 276 { 277 #pragma omp critical (write_to_buffer) 278 { 279 #pragma omp flush 280 281 282 if(op == MPI_SUM) 283 { 284 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<long>()); 285 } 286 287 else if (op == MPI_MAX) 288 { 289 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<long>); 290 } 291 292 293 else if (op == MPI_MIN) 294 { 295 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<long>); 296 } 297 298 else 299 { 300 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 301 exit(1); 302 } 303 #pragma omp flush 304 } 305 } 306 307 MPI_Barrier_local(comm); 308 309 if(my_rank == 0) 310 { 311 #pragma omp flush 312 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 313 } 314 MPI_Barrier_local(comm); 315 } 316 } 317 318 int MPI_Reduce_local_ulong(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 319 { 320 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 321 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 322 323 unsigned long *buffer = comm.my_buffer->buf_ulong; 324 unsigned long *send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf)); 325 unsigned long *recv_buf = static_cast<unsigned long*>(const_cast<void*>(recvbuf)); 326 327 for(int j=0; j<count; j+=BUFFER_SIZE) 328 { 329 if( 0 == my_rank ) 330 { 331 #pragma omp critical (write_to_buffer) 332 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 333 #pragma omp flush 334 } 335 336 MPI_Barrier_local(comm); 337 338 if(my_rank !=0 ) 339 { 340 #pragma omp critical (write_to_buffer) 341 { 342 #pragma omp flush 343 344 345 if(op == MPI_SUM) 346 { 347 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<unsigned long>()); 348 } 349 350 else if (op == MPI_MAX) 351 { 352 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<unsigned long>); 353 } 354 355 356 else if (op == MPI_MIN) 357 { 358 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<unsigned long>); 359 } 360 361 else 362 { 363 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 364 exit(1); 365 } 366 #pragma omp flush 367 } 368 } 369 370 MPI_Barrier_local(comm); 371 372 if(my_rank == 0) 373 { 374 #pragma omp flush 375 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 376 } 377 MPI_Barrier_local(comm); 378 } 379 } 380 381 int MPI_Reduce_local_char(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 382 { 383 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 384 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 385 386 char *buffer = comm.my_buffer->buf_char; 387 char *send_buf = static_cast<char*>(const_cast<void*>(sendbuf)); 388 char *recv_buf = static_cast<char*>(const_cast<void*>(recvbuf)); 389 390 for(int j=0; j<count; j+=BUFFER_SIZE) 391 { 392 if( 0 == my_rank ) 393 { 394 #pragma omp critical (write_to_buffer) 395 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 396 #pragma omp flush 397 } 398 399 MPI_Barrier_local(comm); 400 401 if(my_rank !=0 ) 402 { 403 #pragma omp critical (write_to_buffer) 404 { 405 #pragma omp flush 406 407 408 if(op == MPI_SUM) 409 { 410 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<char>()); 411 } 412 413 else if (op == MPI_MAX) 414 { 415 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<char>); 416 } 417 418 419 else if (op == MPI_MIN) 420 { 421 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<char>); 422 } 423 424 else 425 { 426 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 427 exit(1); 428 } 429 #pragma omp flush 430 } 431 } 432 433 MPI_Barrier_local(comm); 434 435 if(my_rank == 0) 436 { 437 #pragma omp flush 438 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 439 } 440 MPI_Barrier_local(comm); 441 } 194 } 195 196 MPI_Barrier_local(comm); 197 442 198 } 443 199 … … 445 201 int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) 446 202 { 203 447 204 if(!comm.is_ep && comm.mpi_comm) 448 205 { 449 ::MPI_Reduce(sendbuf, recvbuf, count, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Op>(op), root, 450 static_cast< ::MPI_Comm>(comm.mpi_comm)); 451 return 0; 452 } 453 454 455 if(!comm.mpi_comm) return 0; 206 return ::MPI_Reduce(sendbuf, recvbuf, count, to_mpi_type(datatype), to_mpi_op(op), root, to_mpi_comm(comm.mpi_comm)); 207 } 208 209 210 211 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 212 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 213 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 214 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 215 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 216 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 456 217 457 218 int root_mpi_rank = comm.rank_map->at(root).second; 458 219 int root_ep_loc = comm.rank_map->at(root).first; 459 220 460 int ep_rank, ep_rank_loc, mpi_rank;461 int ep_size, num_ep, mpi_size;462 463 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;464 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;465 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;466 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;467 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;468 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;469 470 471 ::MPI_Aint recvsize, lb;472 473 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &recvsize);474 475 void *local_recvbuf;476 if(ep_rank_loc==0)477 {478 local_recvbuf = new void*[recvsize*count];479 }480 481 MPI_Reduce_local(sendbuf, local_recvbuf, count, datatype, op, comm);482 483 484 if(ep_rank_loc==0)485 {486 ::MPI_Reduce(local_recvbuf, recvbuf, count, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Op>(op), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm));487 }488 489 if(root_ep_loc != 0 && mpi_rank == root_mpi_rank) // root is not master, master send to root and root receive from master490 {491 innode_memcpy(0, recvbuf, root_ep_loc, re