Changeset 1287
- Timestamp:
- 10/04/17 11:45:14 (7 years ago)
- Location:
- XIOS/dev/branch_openmp
- Files:
-
- 5 added
- 1 deleted
- 48 edited
Legend:
- Unmodified
- Added
- Removed
-
XIOS/dev/branch_openmp/arch/arch-X64_ADA.fcm
r1134 r1287 10 10 %PROD_CFLAGS -O3 -D BOOST_DISABLE_ASSERTS 11 11 %DEV_CFLAGS -g -traceback 12 %DEBUG_CFLAGS -DBZ_DEBUG -g -fno-inline 12 %DEBUG_CFLAGS -DBZ_DEBUG -g -fno-inline -std=c++11 13 13 14 14 %BASE_FFLAGS -D__NONE__ -
XIOS/dev/branch_openmp/bld.cfg
r1209 r1287 42 42 bld::target test_remap_omp.exe 43 43 bld::target test_unstruct_omp.exe 44 bld::target test_netcdf_omp.exe44 #bld::target test_netcdf_omp.exe 45 45 #bld::target test_client.exe 46 46 #bld::target test_complete.exe -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_alltoall.cpp
r1147 r1287 1 1 #include "ep_lib.hpp" 2 2 #include <mpi.h> 3 //#include "ep_declaration.hpp" 3 #include "ep_mpi.hpp" 4 4 5 5 6 namespace ep_lib 6 7 { 7 8 9 int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) 10 { 11 if(!comm.is_ep) 12 { 13 return ::MPI_Alltoall(sendbuf, sendcount, to_mpi_type(sendtype), recvbuf, recvcount, to_mpi_type(recvtype), to_mpi_comm(comm.mpi_comm)); 14 } 8 15 9 16 10 int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) 11 { 12 assert(static_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype)); 13 ::MPI_Aint typesize, llb; 14 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(sendtype), &llb, &typesize); 17 assert(valid_type(sendtype) && valid_type(recvtype)); 18 assert(sendcount == recvcount); 19 20 ::MPI_Aint datasize, llb; 21 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(sendtype), &llb, &datasize); 22 23 int count = sendcount; 15 24 16 int ep_size; 17 MPI_Comm_size(comm, &ep_size); 25 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 26 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 27 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 28 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 29 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 30 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 31 32 void* tmp_recvbuf; 33 if(ep_rank == 0) tmp_recvbuf = new void*[count * ep_size * ep_size * datasize]; 34 35 MPI_Gather(sendbuf, count*ep_size, sendtype, tmp_recvbuf, count*ep_size, recvtype, 0, comm); 36 18 37 38 39 // reorder tmp_buf 40 void* tmp_sendbuf; 41 if(ep_rank == 0) tmp_sendbuf = new void*[count * ep_size * ep_size * datasize]; 19 42 43 if(ep_rank == 0) 20 44 for(int i=0; i<ep_size; i++) 21 45 { 22 ep_lib::MPI_Gather(sendbuf+i*sendcount*typesize, sendcount, sendtype, recvbuf, recvcount, recvtype, i, comm); 46 for(int j=0; j<ep_size; j++) 47 { 48 //printf("tmp_recv[%d] = tmp_send[%d]\n", i*ep_size*count + j*count, j*ep_size*count + i*count); 49 50 memcpy(tmp_sendbuf + j*ep_size*count*datasize + i*count*datasize, tmp_recvbuf + i*ep_size*count*datasize + j*count*datasize, count*datasize); 51 } 23 52 } 24 53 54 MPI_Scatter(tmp_sendbuf, ep_size*count, sendtype, recvbuf, ep_size*recvcount, recvtype, 0, comm); 55 56 if(ep_rank == 0) 57 { 58 delete[] tmp_recvbuf; 59 delete[] tmp_sendbuf; 60 } 25 61 26 62 return 0; -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_barrier.cpp
r1185 r1287 27 27 return 0; 28 28 } 29 else if(comm.mpi_comm != MPI_COMM_NULL_STD)29 else if(comm.mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 30 30 { 31 31 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm> (comm.mpi_comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_bcast.cpp
r1134 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 #include "ep_mpi.hpp" 11 12 12 13 using namespace std; 13 14 14 15 15 namespace ep_lib 16 16 { 17 int MPI_Bcast_local(void *buffer, int count, MPI_Datatype datatype, MPI_Comm comm)18 {19 if(datatype == MPI_INT)20 {21 return MPI_Bcast_local_int(buffer, count, comm);22 }23 else if(datatype == MPI_FLOAT)24 {25 return MPI_Bcast_local_float(buffer, count, comm);26 }27 else if(datatype == MPI_DOUBLE)28 {29 return MPI_Bcast_local_double(buffer, count, comm);30 }31 else if(datatype == MPI_CHAR)32 {33 return MPI_Bcast_local_char(buffer, count, comm);34 }35 else if(datatype == MPI_LONG)36 {37 return MPI_Bcast_local_long(buffer, count, comm);38 }39 else if(datatype == MPI_UNSIGNED_LONG)40 {41 return MPI_Bcast_local_char(buffer, count, comm);42 }43 else44 {45 printf("MPI_Bcast Datatype not supported!\n");46 exit(0);47 }48 }49 50 int MPI_Bcast_local_int(void *buf, int count, MPI_Comm comm)51 {52 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;53 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;54 55 int *buffer = comm.my_buffer->buf_int;56 int *tmp = static_cast<int*>(buf);57 58 for(int j=0; j<count; j+=BUFFER_SIZE)59 {60 if(my_rank == 0)61 {62 #pragma omp critical (write_to_buffer)63 {64 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);65 }66 #pragma omp flush67 }68 69 MPI_Barrier_local(comm);70 71 72 73 if(my_rank != 0)74 {75 #pragma omp flush76 #pragma omp critical (read_from_buffer)77 {78 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);79 }80 }81 82 MPI_Barrier_local(comm);83 }84 }85 86 int MPI_Bcast_local_float(void *buf, int count, MPI_Comm comm)87 {88 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;89 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;90 91 float *buffer = comm.my_buffer->buf_float;92 float *tmp = static_cast<float*>(buf);93 94 for(int j=0; j<count; j+=BUFFER_SIZE)95 {96 if(my_rank == 0)97 {98 #pragma omp critical (write_to_buffer)99 {100 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);101 }102 #pragma omp flush103 }104 105 MPI_Barrier_local(comm);106 107 108 if(my_rank != 0)109 {110 #pragma omp flush111 #pragma omp critical (read_from_buffer)112 {113 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);114 }115 }116 117 MPI_Barrier_local(comm);118 }119 }120 121 int MPI_Bcast_local_double(void *buf, int count, MPI_Comm comm)122 {123 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;124 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;125 126 double *buffer = comm.my_buffer->buf_double;127 double *tmp = static_cast<double*>(buf);128 129 for(int j=0; j<count; j+=BUFFER_SIZE)130 {131 if(my_rank == 0)132 {133 #pragma omp critical (write_to_buffer)134 {135 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);136 }137 #pragma omp flush138 }139 140 MPI_Barrier_local(comm);141 142 143 if(my_rank != 0)144 {145 #pragma omp flush146 #pragma omp critical (read_from_buffer)147 {148 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);149 }150 }151 152 MPI_Barrier_local(comm);153 }154 }155 156 157 int MPI_Bcast_local_char(void *buf, int count, MPI_Comm comm)158 {159 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;160 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;161 162 char *buffer = comm.my_buffer->buf_char;163 char *tmp = static_cast<char*>(buf);164 165 for(int j=0; j<count; j+=BUFFER_SIZE)166 {167 if(my_rank == 0)168 {169 #pragma omp critical (write_to_buffer)170 {171 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);172 }173 #pragma omp flush174 }175 176 MPI_Barrier_local(comm);177 178 179 if(my_rank != 0)180 {181 #pragma omp flush182 #pragma omp critical (read_from_buffer)183 {184 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);185 }186 }187 188 MPI_Barrier_local(comm);189 }190 }191 192 int MPI_Bcast_local_long(void *buf, int count, MPI_Comm comm)193 {194 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;195 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;196 197 long *buffer = comm.my_buffer->buf_long;198 long *tmp = static_cast<long*>(buf);199 200 for(int j=0; j<count; j+=BUFFER_SIZE)201 {202 if(my_rank == 0)203 {204 #pragma omp critical (write_to_buffer)205 {206 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);207 }208 #pragma omp flush209 }210 211 MPI_Barrier_local(comm);212 213 214 if(my_rank != 0)215 {216 #pragma omp flush217 #pragma omp critical (read_from_buffer)218 {219 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);220 }221 }222 223 MPI_Barrier_local(comm);224 }225 }226 227 int MPI_Bcast_local_ulong(void *buf, int count, MPI_Comm comm)228 {229 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;230 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;231 232 unsigned long *buffer = comm.my_buffer->buf_ulong;233 unsigned long *tmp = static_cast<unsigned long*>(buf);234 235 for(int j=0; j<count; j+=BUFFER_SIZE)236 {237 if(my_rank == 0)238 {239 #pragma omp critical (write_to_buffer)240 {241 copy(tmp+j, tmp+j+min(BUFFER_SIZE, count-j), buffer);242 }243 #pragma omp flush244 }245 246 MPI_Barrier_local(comm);247 248 249 if(my_rank != 0)250 {251 #pragma omp flush252 #pragma omp critical (read_from_buffer)253 {254 copy(buffer, buffer+min(BUFFER_SIZE, count-j), tmp+j);255 }256 }257 258 MPI_Barrier_local(comm);259 }260 }261 262 17 263 18 int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm) … … 266 21 if(!comm.is_ep) 267 22 { 23 #pragma omp single nowait 268 24 ::MPI_Bcast(buffer, count, static_cast< ::MPI_Datatype>(datatype), root, static_cast< ::MPI_Comm>(comm.mpi_comm)); 269 25 return 0; … … 271 27 272 28 273 int ep_rank, ep_rank_loc, mpi_rank; 274 int ep_size, num_ep, mpi_size; 275 276 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 277 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 278 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 279 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 280 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 281 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 282 283 29 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 30 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 31 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 284 32 285 33 int root_mpi_rank = comm.rank_map->at(root).second; 286 34 int root_ep_rank_loc = comm.rank_map->at(root).first; 287 35 36 // printf("root_mpi_rank = %d\n", root_mpi_rank); 288 37 289 // if root is not master thread, send first to master 290 if(root_ep_rank_loc != 0 && mpi_rank == root_mpi_rank) 38 if((ep_rank_loc==0 && mpi_rank != root_mpi_rank ) || ep_rank == root) 291 39 { 292 innode_memcpy(root_ep_rank_loc, buffer, 0, buffer, count, datatype, comm);40 ::MPI_Bcast(buffer, count, to_mpi_type(datatype), root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 293 41 } 294 42 295 296 if(ep_rank_loc==0) 297 { 298 ::MPI_Bcast(buffer, count, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 299 } 300 301 MPI_Bcast_local(buffer, count, datatype, comm); 43 if(mpi_rank == root_mpi_rank) MPI_Bcast_local(buffer, count, datatype, root_ep_rank_loc, comm); 44 else MPI_Bcast_local(buffer, count, datatype, 0, comm); 302 45 303 46 return 0; … … 305 48 306 49 50 51 int MPI_Bcast_local(void *buffer, int count, MPI_Datatype datatype, int local_root, MPI_Comm comm) 52 { 53 assert(valid_type(datatype)); 54 55 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 56 57 ::MPI_Aint datasize, lb; 58 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 59 60 61 if(ep_rank_loc == local_root) 62 { 63 comm.my_buffer->void_buffer[local_root] = buffer; 64 } 65 66 // #pragma omp flush 67 MPI_Barrier_local(comm); 68 // #pragma omp flush 69 70 if(ep_rank_loc != local_root) 71 { 72 #pragma omp critical (_bcast) 73 memcpy(buffer, comm.my_buffer->void_buffer[local_root], datasize * count); 74 } 75 76 MPI_Barrier_local(comm); 77 } 78 307 79 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_create.cpp
r1134 r1287 52 52 } 53 53 54 ::MPI_Allgather(&num_ep, 1, MPI_INT _STD, &recv_num_ep[0], 1, MPI_INT_STD, mpi_base_comm);54 ::MPI_Allgather(&num_ep, 1, MPI_INT, &recv_num_ep[0], 1, MPI_INT, mpi_base_comm); 55 55 56 56 … … 63 63 64 64 out_comm_hdls[0].my_buffer = new BUFFER; 65 out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE];66 out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE];67 out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE];68 out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE];69 out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE];70 out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE];65 // out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE]; 66 // out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE]; 67 // out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE]; 68 // out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE]; 69 // out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE]; 70 // out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE]; 71 71 72 72 out_comm_hdls[0].rank_map = new RANK_MAP; … … 135 135 } 136 136 137 ::MPI_Allgather(&num_ep, 1, MPI_INT _STD, &recv_num_ep[0], 1, MPI_INT_STD, mpi_base_comm);137 ::MPI_Allgather(&num_ep, 1, MPI_INT, &recv_num_ep[0], 1, MPI_INT, mpi_base_comm); 138 138 139 139 int sum = 0; // representing total ep number of process with smaller rank … … 145 145 146 146 out_comm_hdls[0].my_buffer = new BUFFER; 147 out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE];148 out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE];149 out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE];150 out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE];151 out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE];152 out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE];147 // out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE]; 148 // out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE]; 149 // out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE]; 150 // out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE]; 151 // out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE]; 152 // out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE]; 153 153 154 154 out_comm_hdls[0].rank_map = new RANK_MAP; … … 222 222 } 223 223 224 ::MPI_Allgather(&num_ep, 1, MPI_INT _STD, &recv_num_ep[0], 1, MPI_INT_STD, mpi_base_comm);224 ::MPI_Allgather(&num_ep, 1, MPI_INT, &recv_num_ep[0], 1, MPI_INT, mpi_base_comm); 225 225 226 226 … … 233 233 234 234 out_comm_hdls[0].my_buffer = new BUFFER; 235 out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE];236 out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE];237 out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE];238 out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE];239 out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE];240 out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE];235 // out_comm_hdls[0].my_buffer->buf_double = new double[BUFFER_SIZE]; 236 // out_comm_hdls[0].my_buffer->buf_float = new float[BUFFER_SIZE]; 237 // out_comm_hdls[0].my_buffer->buf_int = new int[BUFFER_SIZE]; 238 // out_comm_hdls[0].my_buffer->buf_long = new long[BUFFER_SIZE]; 239 // out_comm_hdls[0].my_buffer->buf_ulong = new unsigned long[BUFFER_SIZE]; 240 // out_comm_hdls[0].my_buffer->buf_char = new char[BUFFER_SIZE]; 241 241 242 242 out_comm_hdls[0].rank_map = new RANK_MAP; -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_declaration.cpp
r1134 r1287 4 4 5 5 #include <mpi.h> 6 7 ::MPI_Comm MPI_COMM_WORLD_STD = MPI_COMM_WORLD; 8 #undef MPI_COMM_WORLD 9 10 11 ::MPI_Comm MPI_COMM_NULL_STD = MPI_COMM_NULL; 12 #undef MPI_COMM_NULL 13 14 15 ::MPI_Request MPI_REQUEST_NULL_STD = MPI_REQUEST_NULL; 16 #undef MPI_REQUEST_NULL 17 18 ::MPI_Info MPI_INFO_NULL_STD = MPI_INFO_NULL; 19 #undef MPI_INFO_NULL 20 21 ::MPI_Datatype MPI_INT_STD = MPI_INT; 22 ::MPI_Datatype MPI_FLOAT_STD = MPI_FLOAT; 23 ::MPI_Datatype MPI_DOUBLE_STD = MPI_DOUBLE; 24 ::MPI_Datatype MPI_LONG_STD = MPI_LONG; 25 ::MPI_Datatype MPI_CHAR_STD = MPI_CHAR; 26 ::MPI_Datatype MPI_UNSIGNED_LONG_STD = MPI_UNSIGNED_LONG; 27 ::MPI_Datatype MPI_UNSIGNED_CHAR_STD = MPI_UNSIGNED_CHAR; 28 29 #undef MPI_INT 30 #undef MPI_FLOAT 31 #undef MPI_DOUBLE 32 #undef MPI_LONG 33 #undef MPI_CHAR 34 #undef MPI_UNSIGNED_LONG 35 #undef MPI_UNSIGNED_CHAR 36 37 38 ::MPI_Op MPI_SUM_STD = MPI_SUM; 39 ::MPI_Op MPI_MAX_STD = MPI_MAX; 40 ::MPI_Op MPI_MIN_STD = MPI_MIN; 41 42 #undef MPI_SUM 43 #undef MPI_MAX 44 #undef MPI_MIN 45 6 46 7 47 #undef MPI_INT -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_declaration.hpp
r1160 r1287 1 1 #ifndef EP_DECLARATION_HPP_INCLUDED 2 2 #define EP_DECLARATION_HPP_INCLUDED 3 3 /* 4 4 extern ::MPI_Datatype MPI_INT_STD; 5 5 extern ::MPI_Datatype MPI_FLOAT_STD; … … 20 20 extern ::MPI_Request MPI_REQUEST_NULL_STD; 21 21 extern ::MPI_Info MPI_INFO_NULL_STD; 22 22 */ 23 23 #undef MPI_INT 24 24 #undef MPI_FLOAT … … 37 37 38 38 #undef MPI_REQUEST_NULL 39 40 41 42 39 #undef MPI_STATUS_IGNORE 40 #undef MPI_INFO_NULL 43 41 44 42 extern ep_lib::MPI_Datatype MPI_INT; … … 59 57 extern ep_lib::MPI_Status MPI_STATUS_IGNORE; 60 58 extern ep_lib::MPI_Request MPI_REQUEST_NULL; 61 //extern ep_lib::MPI_Info MPI_INFO_NULL; 59 extern ep_lib::MPI_Info MPI_INFO_NULL; 60 62 61 63 62 #endif // EP_DECLARATION_HPP_INCLUDED -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_dup.cpp
r1134 r1287 28 28 29 29 // for intracomm 30 if(comm.mpi_comm == MPI_COMM_NULL_STD) return 0;30 if(comm.mpi_comm == static_cast< ::MPI_Comm >(MPI_COMM_NULL.mpi_comm)) return 0; 31 31 32 32 … … 59 59 { 60 60 61 if(comm.mpi_comm == MPI_COMM_NULL_STD) return 0;61 if(comm.mpi_comm == static_cast< ::MPI_Comm >(MPI_COMM_NULL.mpi_comm)) return 0; 62 62 63 63 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_exscan.cpp
r1134 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 #include "ep_mpi.hpp" 11 12 12 13 using namespace std; … … 26 27 } 27 28 29 template<typename T> 30 void reduce_max(const T * buffer, T* recvbuf, int count) 31 { 32 transform(buffer, buffer+count, recvbuf, recvbuf, max_op<T>); 33 } 34 35 template<typename T> 36 void reduce_min(const T * buffer, T* recvbuf, int count) 37 { 38 transform(buffer, buffer+count, recvbuf, recvbuf, min_op<T>); 39 } 40 41 template<typename T> 42 void reduce_sum(const T * buffer, T* recvbuf, int count) 43 { 44 transform(buffer, buffer+count, recvbuf, recvbuf, std::plus<T>()); 45 } 46 47 28 48 int MPI_Exscan_local(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) 29 49 { 30 if(datatype == MPI_INT) 31 { 32 return MPI_Exscan_local_int(sendbuf, recvbuf, count, op, comm); 33 } 34 else if(datatype == MPI_FLOAT) 35 { 36 return MPI_Exscan_local_float(sendbuf, recvbuf, count, op, comm); 37 } 38 else if(datatype == MPI_DOUBLE) 39 { 40 return MPI_Exscan_local_double(sendbuf, recvbuf, count, op, comm); 41 } 42 else if(datatype == MPI_LONG) 43 { 44 return MPI_Exscan_local_long(sendbuf, recvbuf, count, op, comm); 45 } 46 else if(datatype == MPI_UNSIGNED_LONG) 47 { 48 return MPI_Exscan_local_ulong(sendbuf, recvbuf, count, op, comm); 49 } 50 else if(datatype == MPI_CHAR) 51 { 52 return MPI_Exscan_local_char(sendbuf, recvbuf, count, op, comm); 53 } 54 else 55 { 56 printf("MPI_Exscan Datatype not supported!\n"); 57 exit(0); 58 } 59 } 60 61 62 63 64 int MPI_Exscan_local_int(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 65 { 66 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 67 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 68 69 int *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_int; 70 int *send_buf = static_cast<int*>(const_cast<void*>(sendbuf)); 71 int *recv_buf = static_cast<int*>(recvbuf); 72 73 for(int j=0; j<count; j+=BUFFER_SIZE) 74 { 75 76 if(my_rank == 0) 77 { 78 79 #pragma omp critical (write_to_buffer) 80 { 81 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 82 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 83 #pragma omp flush 84 } 85 } 86 87 MPI_Barrier_local(comm); 88 89 for(int k=1; k<num_ep; k++) 90 { 91 #pragma omp critical (write_to_buffer) 92 { 93 if(my_rank == k) 94 { 95 #pragma omp flush 96 if(op == MPI_SUM) 97 { 98 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 99 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<int>()); 100 101 } 102 else if(op == MPI_MAX) 103 { 104 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 105 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<int>); 106 } 107 else if(op == MPI_MIN) 108 { 109 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 110 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<int>); 111 } 112 else 113 { 114 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 115 exit(1); 116 } 117 #pragma omp flush 118 } 119 } 120 121 MPI_Barrier_local(comm); 122 } 123 } 124 125 } 126 127 int MPI_Exscan_local_float(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 128 { 129 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 130 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 131 132 float *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_float; 133 float *send_buf = static_cast<float*>(const_cast<void*>(sendbuf)); 134 float *recv_buf = static_cast<float*>(recvbuf); 135 136 for(int j=0; j<count; j+=BUFFER_SIZE) 137 { 138 if(my_rank == 0) 139 { 140 141 #pragma omp critical (write_to_buffer) 142 { 143 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 144 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 145 #pragma omp flush 146 } 147 } 148 149 MPI_Barrier_local(comm); 150 151 for(int k=1; k<num_ep; k++) 152 { 153 #pragma omp critical (write_to_buffer) 154 { 155 if(my_rank == k) 156 { 157 #pragma omp flush 158 if(op == MPI_SUM) 159 { 160 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 161 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<float>()); 162 } 163 else if(op == MPI_MAX) 164 { 165 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 166 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<float>); 167 } 168 else if(op == MPI_MIN) 169 { 170 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 171 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<float>); 172 } 173 else 174 { 175 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 176 exit(1); 177 } 178 #pragma omp flush 179 } 180 } 181 182 MPI_Barrier_local(comm); 183 } 184 } 185 } 186 187 int MPI_Exscan_local_double(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 188 { 189 190 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 191 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 192 193 double *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_double; 194 double *send_buf = static_cast<double*>(const_cast<void*>(sendbuf)); 195 double *recv_buf = static_cast<double*>(recvbuf); 196 197 for(int j=0; j<count; j+=BUFFER_SIZE) 198 { 199 if(my_rank == 0) 200 { 201 202 #pragma omp critical (write_to_buffer) 203 { 204 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 205 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 206 #pragma omp flush 207 } 208 } 209 210 MPI_Barrier_local(comm); 211 212 for(int k=1; k<num_ep; k++) 213 { 214 #pragma omp critical (write_to_buffer) 215 { 216 if(my_rank == k) 217 { 218 #pragma omp flush 219 if(op == MPI_SUM) 220 { 221 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 222 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<double>()); 223 } 224 else if(op == MPI_MAX) 225 { 226 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 227 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<double>); 228 } 229 else if(op == MPI_MIN) 230 { 231 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 232 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<double>); 233 } 234 else 235 { 236 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 237 exit(1); 238 } 239 #pragma omp flush 240 } 241 } 242 243 MPI_Barrier_local(comm); 244 } 245 } 246 } 247 248 int MPI_Exscan_local_long(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 249 { 250 251 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 252 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 253 254 long *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_long; 255 long *send_buf = static_cast<long*>(const_cast<void*>(sendbuf)); 256 long *recv_buf = static_cast<long*>(recvbuf); 257 258 for(int j=0; j<count; j+=BUFFER_SIZE) 259 { 260 if(my_rank == 0) 261 { 262 263 #pragma omp critical (write_to_buffer) 264 { 265 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 266 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 267 #pragma omp flush 268 } 269 } 270 271 MPI_Barrier_local(comm); 272 273 for(int k=1; k<num_ep; k++) 274 { 275 #pragma omp critical (write_to_buffer) 276 { 277 if(my_rank == k) 278 { 279 #pragma omp flush 280 if(op == MPI_SUM) 281 { 282 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 283 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<long>()); 284 } 285 else if(op == MPI_MAX) 286 { 287 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 288 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<long>); 289 } 290 else if(op == MPI_MIN) 291 { 292 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 293 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<long>); 294 } 295 else 296 { 297 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 298 exit(1); 299 } 300 #pragma omp flush 301 } 302 } 303 304 MPI_Barrier_local(comm); 305 } 306 } 307 } 308 309 int MPI_Exscan_local_ulong(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 310 { 311 312 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 313 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 314 315 unsigned long *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_ulong; 316 unsigned long *send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf)); 317 unsigned long *recv_buf = static_cast<unsigned long*>(recvbuf); 318 319 for(int j=0; j<count; j+=BUFFER_SIZE) 320 { 321 if(my_rank == 0) 322 { 323 324 #pragma omp critical (write_to_buffer) 325 { 326 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 327 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 328 #pragma omp flush 329 } 330 } 331 332 MPI_Barrier_local(comm); 333 334 for(int k=1; k<num_ep; k++) 335 { 336 #pragma omp critical (write_to_buffer) 337 { 338 if(my_rank == k) 339 { 340 #pragma omp flush 341 if(op == MPI_SUM) 342 { 343 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 344 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<unsigned long>()); 345 } 346 else if(op == MPI_MAX) 347 { 348 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 349 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<unsigned long>); 350 } 351 else if(op == MPI_MIN) 352 { 353 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 354 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<unsigned long>); 355 } 356 else 357 { 358 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 359 exit(1); 360 } 361 #pragma omp flush 362 } 363 } 364 365 MPI_Barrier_local(comm); 366 } 367 } 368 } 369 370 int MPI_Exscan_local_char(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 371 { 372 373 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 374 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 375 376 char *buffer = comm.ep_comm_ptr->comm_list->my_buffer->buf_char; 377 char *send_buf = static_cast<char*>(const_cast<void*>(sendbuf)); 378 char *recv_buf = static_cast<char*>(recvbuf); 379 380 for(int j=0; j<count; j+=BUFFER_SIZE) 381 { 382 if(my_rank == 0) 383 { 384 385 #pragma omp critical (write_to_buffer) 386 { 387 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 388 fill(recv_buf+j, recv_buf+j+min(BUFFER_SIZE, count-j), MPI_UNDEFINED); 389 #pragma omp flush 390 } 391 } 392 393 MPI_Barrier_local(comm); 394 395 for(int k=1; k<num_ep; k++) 396 { 397 #pragma omp critical (write_to_buffer) 398 { 399 if(my_rank == k) 400 { 401 #pragma omp flush 402 if(op == MPI_SUM) 403 { 404 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 405 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<char>()); 406 } 407 else if(op == MPI_MAX) 408 { 409 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 410 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<char>); 411 } 412 else if(op == MPI_MIN) 413 { 414 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 415 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<char>); 416 } 417 else 418 { 419 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 420 exit(1); 421 } 422 #pragma omp flush 423 } 424 } 425 426 MPI_Barrier_local(comm); 427 } 428 } 429 } 430 50 valid_op(op); 51 52 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 53 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 54 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 55 56 57 ::MPI_Aint datasize, lb; 58 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 59 60 if(ep_rank_loc == 0 && mpi_rank != 0) 61 { 62 comm.my_buffer->void_buffer[0] = recvbuf; 63 } 64 if(ep_rank_loc == 0 && mpi_rank == 0) 65 { 66 comm.my_buffer->void_buffer[0] = const_cast<void*>(sendbuf); 67 } 68 69 70 MPI_Barrier_local(comm); 71 72 memcpy(recvbuf, comm.my_buffer->void_buffer[0], datasize*count); 73 74 MPI_Barrier_local(comm); 75 76 comm.my_buffer->void_buffer[ep_rank_loc] = const_cast<void*>(sendbuf); 77 78 MPI_Barrier_local(comm); 79 80 if(op == MPI_SUM) 81 { 82 if(datatype == MPI_INT && datasize == sizeof(int)) 83 { 84 for(int i=0; i<ep_rank_loc; i++) 85 reduce_sum<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 86 } 87 88 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 89 { 90 for(int i=0; i<ep_rank_loc; i++) 91 reduce_sum<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 92 } 93 94 95 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 96 { 97 for(int i=0; i<ep_rank_loc; i++) 98 reduce_sum<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 99 } 100 101 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 102 { 103 for(int i=0; i<ep_rank_loc; i++) 104 reduce_sum<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 105 } 106 107 else if(datatype == MPI_LONG && datasize == sizeof(long)) 108 { 109 for(int i=0; i<ep_rank_loc; i++) 110 reduce_sum<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 111 } 112 113 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 114 { 115 for(int i=0; i<ep_rank_loc; i++) 116 reduce_sum<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 117 } 118 119 else printf("datatype Error\n"); 120 121 122 } 123 124 else if(op == MPI_MAX) 125 { 126 if(datatype == MPI_INT && datasize == sizeof(int)) 127 for(int i=0; i<ep_rank_loc; i++) 128 reduce_max<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 129 130 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 131 for(int i=0; i<ep_rank_loc; i++) 132 reduce_max<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 133 134 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 135 for(int i=0; i<ep_rank_loc; i++) 136 reduce_max<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 137 138 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 139 for(int i=0; i<ep_rank_loc; i++) 140 reduce_max<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 141 142 else if(datatype == MPI_LONG && datasize == sizeof(long)) 143 for(int i=0; i<ep_rank_loc; i++) 144 reduce_max<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 145 146 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 147 for(int i=0; i<ep_rank_loc; i++) 148 reduce_max<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 149 150 else printf("datatype Error\n"); 151 } 152 153 else //if(op == MPI_MIN) 154 { 155 if(datatype == MPI_INT && datasize == sizeof(int)) 156 for(int i=0; i<ep_rank_loc; i++) 157 reduce_min<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 158 159 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 160 for(int i=0; i<ep_rank_loc; i++) 161 reduce_min<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 162 163 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 164 for(int i=0; i<ep_rank_loc; i++) 165 reduce_min<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 166 167 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 168 for(int i=0; i<ep_rank_loc; i++) 169 reduce_min<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 170 171 else if(datatype == MPI_LONG && datasize == sizeof(long)) 172 for(int i=0; i<ep_rank_loc; i++) 173 reduce_min<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 174 175 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 176 for(int i=0; i<ep_rank_loc; i++) 177 reduce_min<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 178 179 else printf("datatype Error\n"); 180 } 181 182 MPI_Barrier_local(comm); 183 184 } 431 185 432 186 int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) 433 187 { 434 435 188 if(!comm.is_ep) 436 189 { 437 ::MPI_Exscan(const_cast<void*>(sendbuf), recvbuf, count, static_cast< ::MPI_Datatype>(datatype), 438 static_cast< ::MPI_Op>(op), static_cast< ::MPI_Comm>(comm.mpi_comm)); 439 return 0; 440 } 441 if(!comm.mpi_comm) return 0; 442 443 int ep_rank, ep_rank_loc, mpi_rank; 444 int ep_size, num_ep, mpi_size; 445 446 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 447 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 448 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 449 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 450 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 451 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 452 453 190 return ::MPI_Scan(sendbuf, recvbuf, count, to_mpi_type(datatype), to_mpi_op(op), to_mpi_comm(comm.mpi_comm)); 191 } 192 193 valid_type(datatype); 194 195 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 196 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 197 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 198 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 199 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 200 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 454 201 455 202 ::MPI_Aint datasize, lb; 456 457 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 458 459 void* local_scan_recvbuf; 460 local_scan_recvbuf = new void*[datasize * count]; 461 462 463 // local scan 464 MPI_Exscan_local(sendbuf, recvbuf, count, datatype, op, comm); 465 466 // MPI_scan 467 void* local_sum; 468 void* mpi_scan_recvbuf; 469 470 471 mpi_scan_recvbuf = new void*[datasize*count]; 203 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 204 205 void* tmp_sendbuf; 206 tmp_sendbuf = new void*[datasize * count]; 207 208 int my_src = 0; 209 int my_dst = ep_rank; 210 211 std::vector<int> my_map(mpi_size, 0); 212 213 for(int i=0; i<comm.rank_map->size(); i++) my_map[comm.rank_map->at(i).second]++; 214 215 for(int i=0; i<mpi_rank; i++) my_src += my_map[i]; 216 my_src += ep_rank_loc; 217 218 219 for(int i=0; i<mpi_size; i++) 220 { 221 if(my_dst < my_map[i]) 222 { 223 my_dst = get_ep_rank(comm, my_dst, i); 224 break; 225 } 226 else 227 my_dst -= my_map[i]; 228 } 229 230 if(ep_rank != my_dst) 231 { 232 MPI_Request request[2]; 233 MPI_Status status[2]; 234 235 MPI_Isend(sendbuf, count, datatype, my_dst, my_dst, comm, &request[0]); 236 237 MPI_Irecv(tmp_sendbuf, count, datatype, my_src, ep_rank, comm, &request[1]); 238 239 MPI_Waitall(2, request, status); 240 } 241 242 else memcpy(tmp_sendbuf, sendbuf, datasize*count); 243 244 245 void* tmp_recvbuf; 246 tmp_recvbuf = new void*[datasize * count]; 247 248 MPI_Reduce_local(tmp_sendbuf, tmp_recvbuf, count, datatype, op, 0, comm); 472 249 473 250 if(ep_rank_loc == 0) 474 { 475 local_sum = new void*[datasize*count]; 476 } 477 478 479 MPI_Reduce_local(sendbuf, local_sum, count, datatype, op, comm); 480 481 if(ep_rank_loc == 0) 482 { 483 ::MPI_Exscan(local_sum, mpi_scan_recvbuf, count, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Op>(op), static_cast< ::MPI_Comm>(comm.mpi_comm)); 484 } 485 486 487 if(mpi_rank > 0) 488 { 489 MPI_Bcast_local(mpi_scan_recvbuf, count, datatype, comm); 490 } 491 492 493 if(datatype == MPI_DOUBLE) 494 { 495 double* sum_buf = static_cast<double*>(mpi_scan_recvbuf); 496 double* recv_buf = static_cast<double*>(recvbuf); 497 498 if(mpi_rank != 0) 499 { 500 if(op == MPI_SUM) 501 { 502 if(ep_rank_loc == 0) 503 { 504 copy(sum_buf, sum_buf+count, recv_buf); 505 } 506 else 507 { 508 for(int i=0; i<count; i++) 509 { 510 recv_buf[i] += sum_buf[i]; 511 } 512 } 513 } 514 else if (op == MPI_MAX) 515 { 516 if(ep_rank_loc == 0) 517 { 518 copy(sum_buf, sum_buf+count, recv_buf); 519 } 520 else 521 { 522 for(int i=0; i<count; i++) 523 { 524 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 525 } 526 } 527 } 528 else if(op == MPI_MIN) 529 { 530 if(ep_rank_loc == 0) 531 { 532 copy(sum_buf, sum_buf+count, recv_buf); 533 } 534 else 535 { 536 for(int i=0; i<count; i++) 537 { 538 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 539 } 540 } 541 } 542 else 543 { 544 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 545 exit(1); 546 } 547 } 548 549 delete[] static_cast<double*>(mpi_scan_recvbuf); 550 if(ep_rank_loc == 0) 551 { 552 delete[] static_cast<double*>(local_sum); 553 } 554 } 555 556 else if(datatype == MPI_FLOAT) 557 { 558 float* sum_buf = static_cast<float*>(mpi_scan_recvbuf); 559 float* recv_buf = static_cast<float*>(recvbuf); 560 561 if(mpi_rank != 0) 562 { 563 if(op == MPI_SUM) 564 { 565 if(ep_rank_loc == 0) 566 { 567 copy(sum_buf, sum_buf+count, recv_buf); 568 } 569 else 570 { 571 for(int i=0; i<count; i++) 572 { 573 recv_buf[i] += sum_buf[i]; 574 } 575 } 576 } 577 else if (op == MPI_MAX) 578 { 579 if(ep_rank_loc == 0) 580 { 581 copy(sum_buf, sum_buf+count, recv_buf); 582 } 583 else 584 { 585 for(int i=0; i<count; i++) 586 { 587 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 588 } 589 } 590 } 591 else if(op == MPI_MIN) 592 { 593 if(ep_rank_loc == 0) 594 { 595 copy(sum_buf, sum_buf+count, recv_buf); 596 } 597 else 598 { 599 for(int i=0; i<count; i++) 600 { 601 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 602 } 603 } 604 } 605 else 606 { 607 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 608 exit(1); 609 } 610 } 611 612 delete[] static_cast<float*>(mpi_scan_recvbuf); 613 if(ep_rank_loc == 0) 614 { 615 delete[] static_cast<float*>(local_sum); 616 } 617 } 618 619 else if(datatype == MPI_INT) 620 { 621 int* sum_buf = static_cast<int*>(mpi_scan_recvbuf); 622 int* recv_buf = static_cast<int*>(recvbuf); 623 624 if(mpi_rank != 0) 625 { 626 if(op == MPI_SUM) 627 { 628 if(ep_rank_loc == 0) 629 { 630 copy(sum_buf, sum_buf+count, recv_buf); 631 } 632 else 633 { 634 for(int i=0; i<count; i++) 635 { 636 recv_buf[i] += sum_buf[i]; 637 } 638 } 639 } 640 else if (op == MPI_MAX) 641 { 642 if(ep_rank_loc == 0) 643 { 644 copy(sum_buf, sum_buf+count, recv_buf); 645 } 646 else 647 { 648 for(int i=0; i<count; i++) 649 { 650 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 651 } 652 } 653 } 654 else if(op == MPI_MIN) 655 { 656 if(ep_rank_loc == 0) 657 { 658 copy(sum_buf, sum_buf+count, recv_buf); 659 } 660 else 661 { 662 for(int i=0; i<count; i++) 663 { 664 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 665 } 666 } 667 } 668 else 669 { 670 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 671 exit(1); 672 } 673 } 674 675 delete[] static_cast<int*>(mpi_scan_recvbuf); 676 if(ep_rank_loc == 0) 677 { 678 delete[] static_cast<int*>(local_sum); 679 } 680 } 681 682 else if(datatype == MPI_CHAR) 683 { 684 char* sum_buf = static_cast<char*>(mpi_scan_recvbuf); 685 char* recv_buf = static_cast<char*>(recvbuf); 686 687 if(mpi_rank != 0) 688 { 689 if(op == MPI_SUM) 690 { 691 if(ep_rank_loc == 0) 692 { 693 copy(sum_buf, sum_buf+count, recv_buf); 694 } 695 else 696 { 697 for(int i=0; i<count; i++) 698 { 699 recv_buf[i] += sum_buf[i]; 700 } 701 } 702 } 703 else if (op == MPI_MAX) 704 { 705 if(ep_rank_loc == 0) 706 { 707 copy(sum_buf, sum_buf+count, recv_buf); 708 } 709 else 710 { 711 for(int i=0; i<count; i++) 712 { 713 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 714 } 715 } 716 } 717 else if(op == MPI_MIN) 718 { 719 if(ep_rank_loc == 0) 720 { 721 copy(sum_buf, sum_buf+count, recv_buf); 722 } 723 else 724 { 725 for(int i=0; i<count; i++) 726 { 727 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 728 } 729 } 730 } 731 else 732 { 733 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 734 exit(1); 735 } 736 } 737 738 delete[] static_cast<char*>(mpi_scan_recvbuf); 739 if(ep_rank_loc == 0) 740 { 741 delete[] static_cast<char*>(local_sum); 742 } 743 } 744 745 else if(datatype == MPI_LONG) 746 { 747 long* sum_buf = static_cast<long*>(mpi_scan_recvbuf); 748 long* recv_buf = static_cast<long*>(recvbuf); 749 750 if(mpi_rank != 0) 751 { 752 if(op == MPI_SUM) 753 { 754 if(ep_rank_loc == 0) 755 { 756 copy(sum_buf, sum_buf+count, recv_buf); 757 } 758 else 759 { 760 for(int i=0; i<count; i++) 761 { 762 recv_buf[i] += sum_buf[i]; 763 } 764 } 765 } 766 else if (op == MPI_MAX) 767 { 768 if(ep_rank_loc == 0) 769 { 770 copy(sum_buf, sum_buf+count, recv_buf); 771 } 772 else 773 { 774 for(int i=0; i<count; i++) 775 { 776 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 777 } 778 } 779 } 780 else if(op == MPI_MIN) 781 { 782 if(ep_rank_loc == 0) 783 { 784 copy(sum_buf, sum_buf+count, recv_buf); 785 } 786 else 787 { 788 for(int i=0; i<count; i++) 789 { 790 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 791 } 792 } 793 } 794 else 795 { 796 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 797 exit(1); 798 } 799 } 800 801 delete[] static_cast<long*>(mpi_scan_recvbuf); 802 if(ep_rank_loc == 0) 803 { 804 delete[] static_cast<long*>(local_sum); 805 } 806 } 807 808 else if(datatype == MPI_UNSIGNED_LONG) 809 { 810 unsigned long* sum_buf = static_cast<unsigned long*>(mpi_scan_recvbuf); 811 unsigned long* recv_buf = static_cast<unsigned long*>(recvbuf); 812 813 if(mpi_rank != 0) 814 { 815 if(op == MPI_SUM) 816 { 817 if(ep_rank_loc == 0) 818 { 819 copy(sum_buf, sum_buf+count, recv_buf); 820 } 821 else 822 { 823 for(int i=0; i<count; i++) 824 { 825 recv_buf[i] += sum_buf[i]; 826 } 827 } 828 } 829 else if (op == MPI_MAX) 830 { 831 if(ep_rank_loc == 0) 832 { 833 copy(sum_buf, sum_buf+count, recv_buf); 834 } 835 else 836 { 837 for(int i=0; i<count; i++) 838 { 839 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 840 } 841 } 842 } 843 else if(op == MPI_MIN) 844 { 845 if(ep_rank_loc == 0) 846 { 847 copy(sum_buf, sum_buf+count, recv_buf); 848 } 849 else 850 { 851 for(int i=0; i<count; i++) 852 { 853 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 854 } 855 } 856 } 857 else 858 { 859 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 860 exit(1); 861 } 862 } 863 864 delete[] static_cast<unsigned long*>(mpi_scan_recvbuf); 865 if(ep_rank_loc == 0) 866 { 867 delete[] static_cast<unsigned long*>(local_sum); 868 } 869 } 870 871 872 } 873 874 251 ::MPI_Exscan(MPI_IN_PLACE, tmp_recvbuf, count, to_mpi_type(datatype), to_mpi_op(op), to_mpi_comm(comm.mpi_comm)); 252 253 // printf(" ID=%d : %d %d \n", ep_rank, static_cast<int*>(tmp_recvbuf)[0], static_cast<int*>(tmp_recvbuf)[1]); 254 255 MPI_Exscan_local(tmp_sendbuf, tmp_recvbuf, count, datatype, op, comm); 256 257 // printf(" ID=%d : after local tmp_sendbuf = %d %d ; tmp_recvbuf = %d %d \n", ep_rank, static_cast<int*>(tmp_sendbuf)[0], static_cast<int*>(tmp_sendbuf)[1], static_cast<int*>(tmp_recvbuf)[0], static_cast<int*>(tmp_recvbuf)[1]); 258 259 260 261 if(ep_rank != my_src) 262 { 263 MPI_Request request[2]; 264 MPI_Status status[2]; 265 266 MPI_Isend(tmp_recvbuf, count, datatype, my_src, my_src, comm, &request[0]); 267 268 MPI_Irecv(recvbuf, count, datatype, my_dst, ep_rank, comm, &request[1]); 269 270 MPI_Waitall(2, request, status); 271 } 272 273 else memcpy(recvbuf, tmp_recvbuf, datasize*count); 274 275 276 277 278 delete[] tmp_sendbuf; 279 delete[] tmp_recvbuf; 280 281 } 875 282 876 283 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_fortran.cpp
r1196 r1287 65 65 #endif 66 66 67 if(base_comm != MPI_COMM_NULL_STD)67 if(base_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 68 68 { 69 69 if(omp_get_thread_num() == 0) -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_free.cpp
r1134 r1287 6 6 { 7 7 8 9 10 8 int MPI_Comm_free(MPI_Comm *comm) 11 9 { … … 13 11 if(! comm->is_ep) 14 12 { 15 if(comm->mpi_comm != MPI_COMM_NULL_STD)13 if(comm->mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 16 14 { 17 15 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm>(comm->mpi_comm); … … 41 39 Debug("comm is EP, mpi_comm_ptr != NULL\n"); 42 40 43 if(comm->my_buffer != NULL)44 {45 if(comm->my_buffer->buf_int != NULL) delete[] comm->my_buffer->buf_int; Debug("buf_int freed\n");46 if(comm->my_buffer->buf_float != NULL) delete[] comm->my_buffer->buf_float; Debug("buf_float freed\n");47 if(comm->my_buffer->buf_double != NULL) delete[] comm->my_buffer->buf_double; Debug("buf_double freed\n");48 if(comm->my_buffer->buf_long != NULL) delete[] comm->my_buffer->buf_long; Debug("buf_long freed\n");49 if(comm->my_buffer->buf_ulong != NULL) delete[] comm->my_buffer->buf_ulong; Debug("buf_ulong freed\n");50 if(comm->my_buffer->buf_char != NULL) delete[] comm->my_buffer->buf_char; Debug("buf_char freed\n");51 }52 41 53 42 if(comm->ep_barrier != NULL) … … 77 66 } 78 67 79 if(comm->mpi_comm != MPI_COMM_NULL_STD) 68 if( comm->mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm) 69 && comm->mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_WORLD.mpi_comm)) 80 70 { 81 71 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm>(comm->mpi_comm); … … 107 97 Debug("comm is EP, mpi_comm_ptr != NULL\n"); 108 98 109 if(comm->my_buffer != NULL)110 {111 if(comm->my_buffer->buf_int != NULL) delete[] comm->my_buffer->buf_int; Debug("buf_int freed\n");112 if(comm->my_buffer->buf_float != NULL) delete[] comm->my_buffer->buf_float; Debug("buf_float freed\n");113 if(comm->my_buffer->buf_double != NULL) delete[] comm->my_buffer->buf_double; Debug("buf_double freed\n");114 if(comm->my_buffer->buf_long != NULL) delete[] comm->my_buffer->buf_long; Debug("buf_long freed\n");115 if(comm->my_buffer->buf_ulong != NULL) delete[] comm->my_buffer->buf_ulong; Debug("buf_ulong freed\n");116 if(comm->my_buffer->buf_char != NULL) delete[] comm->my_buffer->buf_char; Debug("buf_char freed\n");117 }118 99 119 100 if(comm->ep_barrier != NULL) … … 151 132 } 152 133 153 if(comm->mpi_comm != MPI_COMM_NULL_STD)134 if(comm->mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 154 135 { 155 136 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm>(comm->mpi_comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_gather.cpp
r1164 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 11 #include "ep_mpi.hpp" 12 12 13 13 using namespace std; … … 16 16 { 17 17 18 int MPI_Gather_local(const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, MPI_Comm comm)18 int MPI_Gather_local(const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, int local_root, MPI_Comm comm) 19 19 { 20 if(datatype == MPI_INT) 20 assert(valid_type(datatype)); 21 22 ::MPI_Aint datasize, lb; 23 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 24 25 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 26 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 27 28 #pragma omp critical (_gather) 29 comm.my_buffer->void_buffer[ep_rank_loc] = const_cast< void* >(sendbuf); 30 31 MPI_Barrier_local(comm); 32 33 if(ep_rank_loc == local_root) 21 34 { 22 Debug("datatype is INT\n"); 23 return MPI_Gather_local_int(sendbuf, count, recvbuf, comm); 24 } 25 else if(datatype == MPI_FLOAT) 26 { 27 Debug("datatype is FLOAT\n"); 28 return MPI_Gather_local_float(sendbuf, count, recvbuf, comm); 29 } 30 else if(datatype == MPI_DOUBLE) 31 { 32 Debug("datatype is DOUBLE\n"); 33 return MPI_Gather_local_double(sendbuf, count, recvbuf, comm); 34 } 35 else if(datatype == MPI_LONG) 36 { 37 Debug("datatype is LONG\n"); 38 return MPI_Gather_local_long(sendbuf, count, recvbuf, comm); 39 } 40 else if(datatype == MPI_UNSIGNED_LONG) 41 { 42 Debug("datatype is uLONG\n"); 43 return MPI_Gather_local_ulong(sendbuf, count, recvbuf, comm); 44 } 45 else if(datatype == MPI_CHAR) 46 { 47 Debug("datatype is CHAR\n"); 48 return MPI_Gather_local_char(sendbuf, count, recvbuf, comm); 49 } 50 else 51 { 52 printf("MPI_Gather Datatype not supported!\n"); 53 exit(0); 54 } 55 } 35 for(int i=0; i<num_ep; i++) 36 memcpy(recvbuf + datasize * i * count, comm.my_buffer->void_buffer[i], datasize * count); 56 37 57 int MPI_Gather_local_int(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm) 58 { 59 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 60 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 61 62 int *buffer = comm.my_buffer->buf_int; 63 int *send_buf = static_cast<int*>(const_cast<void*>(sendbuf)); 64 int *recv_buf = static_cast<int*>(recvbuf); 65 66 if(my_rank == 0) 67 { 68 copy(send_buf, send_buf+count, recv_buf); 38 //printf("local_recvbuf = %d %d \n", static_cast<int*>(recvbuf)[0], static_cast<int*>(recvbuf)[1] ); 69 39 } 70 40 71 for(int j=0; j<count; j+=BUFFER_SIZE) 72 { 73 for(int k=1; k<num_ep; k++) 74 { 75 if(my_rank == k) 76 { 77 #pragma omp critical (write_to_buffer) 78 { 79 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 80 #pragma omp flush 81 } 82 } 83 84 MPI_Barrier_local(comm); 85 86 if(my_rank == 0) 87 { 88 #pragma omp flush 89 #pragma omp critical (read_from_buffer) 90 { 91 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count); 92 } 93 } 94 95 MPI_Barrier_local(comm); 96 } 97 } 41 MPI_Barrier_local(comm); 98 42 } 99 100 int MPI_Gather_local_float(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)101 {102 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;103 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;104 105 float *buffer = comm.my_buffer->buf_float;106 float *send_buf = static_cast<float*>(const_cast<void*>(sendbuf));107 float *recv_buf = static_cast<float*>(recvbuf);108 109 if(my_rank == 0)110 {111 copy(send_buf, send_buf+count, recv_buf);112 }113 114 for(int j=0; j<count; j+=BUFFER_SIZE)115 {116 for(int k=1; k<num_ep; k++)117 {118 if(my_rank == k)119 {120 #pragma omp critical (write_to_buffer)121 {122 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);123 #pragma omp flush124 }125 }126 127 MPI_Barrier_local(comm);128 129 if(my_rank == 0)130 {131 #pragma omp flush132 #pragma omp critical (read_from_buffer)133 {134 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count);135 }136 }137 138 MPI_Barrier_local(comm);139 }140 }141 }142 143 int MPI_Gather_local_double(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)144 {145 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;146 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;147 148 double *buffer = comm.my_buffer->buf_double;149 double *send_buf = static_cast<double*>(const_cast<void*>(sendbuf));150 double *recv_buf = static_cast<double*>(recvbuf);151 152 if(my_rank == 0)153 {154 copy(send_buf, send_buf+count, recv_buf);155 }156 157 for(int j=0; j<count; j+=BUFFER_SIZE)158 {159 for(int k=1; k<num_ep; k++)160 {161 if(my_rank == k)162 {163 #pragma omp critical (write_to_buffer)164 {165 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);166 #pragma omp flush167 }168 }169 170 MPI_Barrier_local(comm);171 172 if(my_rank == 0)173 {174 #pragma omp flush175 #pragma omp critical (read_from_buffer)176 {177 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count);178 }179 }180 181 MPI_Barrier_local(comm);182 }183 }184 }185 186 int MPI_Gather_local_long(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)187 {188 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;189 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;190 191 long *buffer = comm.my_buffer->buf_long;192 long *send_buf = static_cast<long*>(const_cast<void*>(sendbuf));193 long *recv_buf = static_cast<long*>(recvbuf);194 195 if(my_rank == 0)196 {197 copy(send_buf, send_buf+count, recv_buf);198 }199 200 for(int j=0; j<count; j+=BUFFER_SIZE)201 {202 for(int k=1; k<num_ep; k++)203 {204 if(my_rank == k)205 {206 #pragma omp critical (write_to_buffer)207 {208 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);209 #pragma omp flush210 }211 }212 213 MPI_Barrier_local(comm);214 215 if(my_rank == 0)216 {217 #pragma omp flush218 #pragma omp critical (read_from_buffer)219 {220 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count);221 }222 }223 224 MPI_Barrier_local(comm);225 }226 }227 }228 229 int MPI_Gather_local_ulong(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)230 {231 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;232 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;233 234 unsigned long *buffer = comm.my_buffer->buf_ulong;235 unsigned long *send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf));236 unsigned long *recv_buf = static_cast<unsigned long*>(recvbuf);237 238 if(my_rank == 0)239 {240 copy(send_buf, send_buf+count, recv_buf);241 }242 243 for(int j=0; j<count; j+=BUFFER_SIZE)244 {245 for(int k=1; k<num_ep; k++)246 {247 if(my_rank == k)248 {249 #pragma omp critical (write_to_buffer)250 {251 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);252 #pragma omp flush253 }254 }255 256 MPI_Barrier_local(comm);257 258 if(my_rank == 0)259 {260 #pragma omp flush261 #pragma omp critical (read_from_buffer)262 {263 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count);264 }265 }266 267 MPI_Barrier_local(comm);268 }269 }270 }271 272 273 int MPI_Gather_local_char(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)274 {275 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;276 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;277 278 char *buffer = comm.my_buffer->buf_char;279 char *send_buf = static_cast<char*>(const_cast<void*>(sendbuf));280 char *recv_buf = static_cast<char*>(recvbuf);281 282 if(my_rank == 0)283 {284 copy(send_buf, send_buf+count, recv_buf);285 }286 287 for(int j=0; j<count; j+=BUFFER_SIZE)288 {289 for(int k=1; k<num_ep; k++)290 {291 if(my_rank == k)292 {293 #pragma omp critical (write_to_buffer)294 {295 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);296 #pragma omp flush297 }298 }299 300 MPI_Barrier_local(comm);301 302 if(my_rank == 0)303 {304 #pragma omp flush305 #pragma omp critical (read_from_buffer)306 {307 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j+k*count);308 }309 }310 311 MPI_Barrier_local(comm);312 }313 }314 }315 316 317 43 318 44 int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) 319 45 { 320 if(!comm.is_ep && comm.mpi_comm)46 if(!comm.is_ep) 321 47 { 322 ::MPI_Gather(const_cast<void*>(sendbuf), sendcount, static_cast< ::MPI_Datatype>(sendtype), recvbuf, recvcount, static_cast< ::MPI_Datatype>(recvtype), 323 root, static_cast< ::MPI_Comm>(comm.mpi_comm)); 324 return 0; 48 return ::MPI_Gather(const_cast<void*>(sendbuf), sendcount, to_mpi_type(sendtype), recvbuf, recvcount, to_mpi_type(recvtype), 49 root, to_mpi_comm(comm.mpi_comm)); 325 50 } 326 51 327 if(!comm.mpi_comm) return 0; 328 329 MPI_Bcast(&recvcount, 1, MPI_INT, root, comm); 52 assert(sendcount == recvcount && sendtype == recvtype); 330 53 331 assert(static_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype) && sendcount == recvcount); 332 333 MPI_Datatype datatype = sendtype; 334 int count = sendcount; 335 336 int ep_rank, ep_rank_loc, mpi_rank; 337 int ep_size, num_ep, mpi_size; 338 339 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 340 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 341 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 342 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 343 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 344 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 345 54 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 55 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 56 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 57 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 58 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 59 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 346 60 347 61 int root_mpi_rank = comm.rank_map->at(root).second; 348 62 int root_ep_loc = comm.rank_map->at(root).first; 349 63 64 ::MPI_Aint datasize, lb; 65 ::MPI_Type_get_extent(to_mpi_type(sendtype), &lb, &datasize); 350 66 351 ::MPI_Aint datasize, lb; 67 bool is_master = (ep_rank_loc==0 && mpi_rank != root_mpi_rank ) || ep_rank == root; 68 bool is_root = ep_rank == root; 352 69 353 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize);70 void* local_recvbuf; 354 71 355 void *local_gather_recvbuf; 356 void *master_recvbuf; 357 if(ep_rank_loc == 0 && mpi_rank == root_mpi_rank && root_ep_loc != 0) 72 if(is_master) 358 73 { 359 master_recvbuf = new void*[datasize*ep_size*count];74 local_recvbuf = new void*[datasize * num_ep * sendcount]; 360 75 } 361 76 362 if(ep_rank_loc==0) 363 { 364 local_gather_recvbuf = new void*[datasize*num_ep*count]; 365 } 366 367 // local gather to master 368 MPI_Gather_local(sendbuf, count, datatype, local_gather_recvbuf, comm); 369 370 //MPI_Gather 371 372 if(ep_rank_loc == 0) 373 { 374 int *gatherv_recvcnt; 375 int *gatherv_displs; 376 int gatherv_cnt = count*num_ep; 377 378 gatherv_recvcnt = new int[mpi_size]; 379 gatherv_displs = new int[mpi_size]; 77 void* tmp_recvbuf; 78 if(is_root) tmp_recvbuf = new void*[datasize * recvcount * ep_size]; 380 79 381 80 382 ::MPI_Allgather(&gatherv_cnt, 1, MPI_INT_STD, gatherv_recvcnt, 1, MPI_INT_STD, static_cast< ::MPI_Comm>(comm.mpi_comm)); 81 if(mpi_rank == root_mpi_rank) MPI_Gather_local(sendbuf, sendcount, sendtype, local_recvbuf, root_ep_loc, comm); 82 else MPI_Gather_local(sendbuf, sendcount, sendtype, local_recvbuf, 0, comm); 383 83 384 gatherv_displs[0] = 0; 385 for(int i=1; i<mpi_size; i++) 84 std::vector<int> recvcounts(mpi_size, 0); 85 std::vector<int> displs(mpi_size, 0); 86 87 88 if(is_master) 89 { 90 for(int i=0; i<ep_size; i++) 386 91 { 387 gatherv_displs[i] = gatherv_recvcnt[i-1] + gatherv_displs[i-1];92 recvcounts[comm.rank_map->at(i).second]+=sendcount; 388 93 } 389 94 390 if(root_ep_loc != 0) // gather to root_master 95 for(int i=1; i<mpi_size; i++) 96 displs[i] = displs[i-1] + recvcounts[i-1]; 97 98 ::MPI_Gatherv(local_recvbuf, sendcount*num_ep, sendtype, tmp_recvbuf, recvcounts.data(), displs.data(), recvtype, root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 99 } 100 101 102 // reorder data 103 if(is_root) 104 { 105 // printf("tmp_recvbuf = %d %d %d %d %d %d %d %d\n", static_cast<int*>(tmp_recvbuf)[0], static_cast<int*>(tmp_recvbuf)[1], 106 // static_cast<int*>(tmp_recvbuf)[2], static_cast<int*>(tmp_recvbuf)[3], 107 // static_cast<int*>(tmp_recvbuf)[4], static_cast<int*>(tmp_recvbuf)[5], 108 // static_cast<int*>(tmp_recvbuf)[6], static_cast<int*>(tmp_recvbuf)[7] ); 109 110 int offset; 111 for(int i=0; i<ep_size; i++) 391 112 { 392 ::MPI_Gatherv(local_gather_recvbuf, count*num_ep, static_cast< ::MPI_Datatype>(datatype), master_recvbuf, gatherv_recvcnt, 393 gatherv_displs, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 394 } 395 else 396 { 397 ::MPI_Gatherv(local_gather_recvbuf, count*num_ep, static_cast< ::MPI_Datatype>(datatype), recvbuf, gatherv_recvcnt, 398 gatherv_displs, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 113 offset = displs[comm.rank_map->at(i).second] + comm.rank_map->at(i).first * sendcount; 114 memcpy(recvbuf + i*sendcount*datasize, tmp_recvbuf+offset*datasize, sendcount*datasize); 115 116 399 117 } 400 118 401 delete[] gatherv_recvcnt;402 delete[] gatherv_displs;403 119 } 404 120 405 121 406 if( root_ep_loc != 0 && mpi_rank == root_mpi_rank) // root is not master, master send to root and root receive from master122 if(is_master) 407 123 { 408 innode_memcpy(0, master_recvbuf, root_ep_loc, recvbuf, count*ep_size, datatype, comm);124 delete[] local_recvbuf; 409 125 } 410 411 412 413 if(ep_rank_loc==0) 414 { 415 if(datatype == MPI_INT) 416 { 417 delete[] static_cast<int*>(local_gather_recvbuf); 418 } 419 else if(datatype == MPI_FLOAT) 420 { 421 delete[] static_cast<float*>(local_gather_recvbuf); 422 } 423 else if(datatype == MPI_DOUBLE) 424 { 425 delete[] static_cast<double*>(local_gather_recvbuf); 426 } 427 else if(datatype == MPI_CHAR) 428 { 429 delete[] static_cast<char*>(local_gather_recvbuf); 430 } 431 else if(datatype == MPI_LONG) 432 { 433 delete[] static_cast<long*>(local_gather_recvbuf); 434 } 435 else// if(datatype == MPI_UNSIGNED_LONG) 436 { 437 delete[] static_cast<unsigned long*>(local_gather_recvbuf); 438 } 439 440 if(root_ep_loc != 0 && mpi_rank == root_mpi_rank) delete[] master_recvbuf; 441 } 126 if(is_root) delete[] tmp_recvbuf; 127 442 128 } 443 129 444 445 int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)446 {447 if(!comm.is_ep && comm.mpi_comm)448 {449 ::MPI_Allgather(const_cast<void*>(sendbuf), sendcount, static_cast< ::MPI_Datatype>(sendtype), recvbuf, recvcount, static_cast< ::MPI_Datatype>(recvtype),450 static_cast< ::MPI_Comm>(comm.mpi_comm));451 return 0;452 }453 454 if(!comm.mpi_comm) return 0;455 456 assert(static_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype) && sendcount == recvcount);457 458 MPI_Datatype datatype = sendtype;459 int count = sendcount;460 461 int ep_rank, ep_rank_loc, mpi_rank;462 int ep_size, num_ep, mpi_size;463 464 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;465 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;466 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;467 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;468 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;469 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;470 471 472 ::MPI_Aint datasize, lb;473 474 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize);475 476 void *local_gather_recvbuf;477 478 if(ep_rank_loc==0)479 {480 local_gather_recvbuf = new void*[datasize*num_ep*count];481 }482 483 // local gather to master484 MPI_Gather_local(sendbuf, count, datatype, local_gather_recvbuf, comm);485 486 //MPI_Gather487 488 if(ep_rank_loc == 0)489 {490 int *gatherv_recvcnt;491 int *gatherv_displs;492 int gatherv_cnt = count*num_ep;493 494 gatherv_recvcnt = new int[mpi_size];495 gatherv_displs = new int[mpi_size];496 497 ::MPI_Allgather(&gatherv_cnt, 1, MPI_INT_STD, gatherv_recvcnt, 1, MPI_INT_STD, static_cast< ::MPI_Comm>(comm.mpi_comm));498 499 gatherv_displs[0] = 0;500 for(int i=1; i<mpi_size; i++)501 {502 gatherv_displs[i] = gatherv_recvcnt[i-1] + gatherv_displs[i-1];503 }504 505 ::MPI_Allgatherv(local_gather_recvbuf, count*num_ep, static_cast< ::MPI_Datatype>(datatype), recvbuf, gatherv_recvcnt,506 gatherv_displs, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Comm>(comm.mpi_comm));507 508 delete[] gatherv_recvcnt;509 delete[] gatherv_displs;510 }511 512 MPI_Bcast_local(recvbuf, count*ep_size, datatype, comm);513 514 515 if(ep_rank_loc==0)516 {517 if(datatype == MPI_INT)518 {519 delete[] static_cast<int*>(local_gather_recvbuf);520 }521 else if(datatype == MPI_FLOAT)522 {523 delete[] static_cast<float*>(local_gather_recvbuf);524 }525 else if(datatype == MPI_DOUBLE)526 {527 delete[] static_cast<double*>(local_gather_recvbuf);528 }529 else if(datatype == MPI_CHAR)530 {531 delete[] static_cast<char*>(local_gather_recvbuf);532 }533 else if(datatype == MPI_LONG)534 {535 delete[] static_cast<long*>(local_gather_recvbuf);536 }537 else// if(datatype == MPI_UNSIGNED_LONG)538 {539 delete[] static_cast<unsigned long*>(local_gather_recvbuf);540 }541 }542 }543 544 545 130 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_gatherv.cpp
r1218 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 11 #include "ep_mpi.hpp" 12 12 13 13 using namespace std; … … 16 16 { 17 17 18 int MPI_Gatherv_local(const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm)18 int MPI_Gatherv_local(const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, const int recvcounts[], const int displs[], int local_root, MPI_Comm comm) 19 19 { 20 if(datatype == MPI_INT) 20 assert(valid_type(datatype)); 21 22 ::MPI_Aint datasize, lb; 23 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 24 25 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 26 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 27 28 //if(ep_rank_loc == local_root) printf("local_gatherv : recvcounts = %d %d\n\n", recvcounts[0], recvcounts[1]); 29 //if(ep_rank_loc == local_root) printf("local_gatherv : displs = %d %d\n\n", displs[0], displs[1]); 30 31 #pragma omp critical (_gatherv) 32 comm.my_buffer->void_buffer[ep_rank_loc] = const_cast< void* >(sendbuf); 33 34 MPI_Barrier_local(comm); 35 36 if(ep_rank_loc == local_root) 21 37 { 22 Debug("datatype is INT\n"); 23 return MPI_Gatherv_local_int(sendbuf, count, recvbuf, recvcounts, displs, comm); 38 for(int i=0; i<num_ep; i++) 39 memcpy(recvbuf + datasize*displs[i], comm.my_buffer->void_buffer[i], datasize*recvcounts[i]); 40 24 41 } 25 else if(datatype == MPI_FLOAT) 26 { 27 Debug("datatype is FLOAT\n"); 28 return MPI_Gatherv_local_float(sendbuf, count, recvbuf, recvcounts, displs, comm); 29 } 30 else if(datatype == MPI_DOUBLE) 31 { 32 Debug("datatype is DOUBLE\n"); 33 return MPI_Gatherv_local_double(sendbuf, count, recvbuf, recvcounts, displs, comm); 34 } 35 else if(datatype == MPI_LONG) 36 { 37 Debug("datatype is LONG\n"); 38 return MPI_Gatherv_local_long(sendbuf, count, recvbuf, recvcounts, displs, comm); 39 } 40 else if(datatype == MPI_UNSIGNED_LONG) 41 { 42 Debug("datatype is uLONG\n"); 43 return MPI_Gatherv_local_ulong(sendbuf, count, recvbuf, recvcounts, displs, comm); 44 } 45 else if(datatype == MPI_CHAR) 46 { 47 Debug("datatype is CHAR\n"); 48 return MPI_Gatherv_local_char(sendbuf, count, recvbuf, recvcounts, displs, comm); 49 } 50 else 51 { 52 printf("MPI_Gatherv Datatype not supported!\n"); 53 exit(0); 54 } 42 43 MPI_Barrier_local(comm); 55 44 } 56 45 57 int MPI_Gatherv_local_int(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 58 { 59 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 60 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 61 62 int *buffer = comm.my_buffer->buf_int; 63 int *send_buf = static_cast<int*>(const_cast<void*>(sendbuf)); 64 int *recv_buf = static_cast<int*>(recvbuf); 65 66 if(my_rank == 0) 67 { 68 assert(count == recvcounts[0]); 69 copy(send_buf, send_buf+count, recv_buf + displs[0]); 70 } 71 72 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 73 { 74 for(int k=1; k<num_ep; k++) 75 { 76 if(my_rank == k) 77 { 78 #pragma omp critical (write_to_buffer) 79 { 80 if(count!=0) copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 81 #pragma omp flush 82 } 83 } 84 85 MPI_Barrier_local(comm); 86 87 if(my_rank == 0) 88 { 89 #pragma omp flush 90 #pragma omp critical (read_from_buffer) 91 { 92 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 93 } 94 } 95 96 MPI_Barrier_local(comm); 97 } 98 } 99 } 100 101 int MPI_Gatherv_local_float(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 102 { 103 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 104 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 105 106 float *buffer = comm.my_buffer->buf_float; 107 float *send_buf = static_cast<float*>(const_cast<void*>(sendbuf)); 108 float *recv_buf = static_cast<float*>(recvbuf); 109 110 if(my_rank == 0) 111 { 112 assert(count == recvcounts[0]); 113 copy(send_buf, send_buf+count, recv_buf + displs[0]); 114 } 115 116 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 117 { 118 for(int k=1; k<num_ep; k++) 119 { 120 if(my_rank == k) 121 { 122 #pragma omp critical (write_to_buffer) 123 { 124 if(count!=0) copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 125 #pragma omp flush 126 } 127 } 128 129 MPI_Barrier_local(comm); 130 131 if(my_rank == 0) 132 { 133 #pragma omp flush 134 #pragma omp critical (read_from_buffer) 135 { 136 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 137 } 138 } 139 140 MPI_Barrier_local(comm); 141 } 142 } 143 } 144 145 int MPI_Gatherv_local_double(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 146 { 147 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 148 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 149 150 double *buffer = comm.my_buffer->buf_double; 151 double *send_buf = static_cast<double*>(const_cast<void*>(sendbuf)); 152 double *recv_buf = static_cast<double*>(recvbuf); 153 154 if(my_rank == 0) 155 { 156 assert(count == recvcounts[0]); 157 copy(send_buf, send_buf+count, recv_buf + displs[0]); 158 } 159 160 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 161 { 162 for(int k=1; k<num_ep; k++) 163 { 164 if(my_rank == k) 165 { 166 #pragma omp critical (write_to_buffer) 167 { 168 if(count!=0) copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 169 #pragma omp flush 170 } 171 } 172 173 MPI_Barrier_local(comm); 174 175 if(my_rank == 0) 176 { 177 #pragma omp flush 178 #pragma omp critical (read_from_buffer) 179 { 180 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 181 } 182 } 183 184 MPI_Barrier_local(comm); 185 } 186 } 187 } 188 189 int MPI_Gatherv_local_long(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 190 { 191 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 192 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 193 194 long *buffer = comm.my_buffer->buf_long; 195 long *send_buf = static_cast<long*>(const_cast<void*>(sendbuf)); 196 long *recv_buf = static_cast<long*>(recvbuf); 197 198 if(my_rank == 0) 199 { 200 assert(count == recvcounts[0]); 201 copy(send_buf, send_buf+count, recv_buf + displs[0]); 202 } 203 204 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 205 { 206 for(int k=1; k<num_ep; k++) 207 { 208 if(my_rank == k) 209 { 210 #pragma omp critical (write_to_buffer) 211 { 212 if(count!=0)copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 213 #pragma omp flush 214 } 215 } 216 217 MPI_Barrier_local(comm); 218 219 if(my_rank == 0) 220 { 221 #pragma omp flush 222 #pragma omp critical (read_from_buffer) 223 { 224 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 225 } 226 } 227 228 MPI_Barrier_local(comm); 229 } 230 } 231 } 232 233 int MPI_Gatherv_local_ulong(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 234 { 235 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 236 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 237 238 unsigned long *buffer = comm.my_buffer->buf_ulong; 239 unsigned long *send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf)); 240 unsigned long *recv_buf = static_cast<unsigned long*>(recvbuf); 241 242 if(my_rank == 0) 243 { 244 assert(count == recvcounts[0]); 245 copy(send_buf, send_buf+count, recv_buf + displs[0]); 246 } 247 248 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 249 { 250 for(int k=1; k<num_ep; k++) 251 { 252 if(my_rank == k) 253 { 254 #pragma omp critical (write_to_buffer) 255 { 256 if(count!=0) copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 257 #pragma omp flush 258 } 259 } 260 261 MPI_Barrier_local(comm); 262 263 if(my_rank == 0) 264 { 265 #pragma omp flush 266 #pragma omp critical (read_from_buffer) 267 { 268 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 269 } 270 } 271 272 MPI_Barrier_local(comm); 273 } 274 } 275 } 276 277 int MPI_Gatherv_local_char(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm) 278 { 279 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 280 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 281 282 char *buffer = comm.my_buffer->buf_char; 283 char *send_buf = static_cast<char*>(const_cast<void*>(sendbuf)); 284 char *recv_buf = static_cast<char*>(recvbuf); 285 286 if(my_rank == 0) 287 { 288 assert(count == recvcounts[0]); 289 copy(send_buf, send_buf+count, recv_buf + displs[0]); 290 } 291 292 for(int j=0; count!=0? j<count: j<count+1; j+=BUFFER_SIZE) 293 { 294 for(int k=1; k<num_ep; k++) 295 { 296 if(my_rank == k) 297 { 298 #pragma omp critical (write_to_buffer) 299 { 300 if(count!=0) copy(send_buf+j, send_buf + min(BUFFER_SIZE, count-j) , buffer); 301 #pragma omp flush 302 } 303 } 304 305 MPI_Barrier_local(comm); 306 307 if(my_rank == 0) 308 { 309 #pragma omp flush 310 #pragma omp critical (read_from_buffer) 311 { 312 copy(buffer, buffer+min(BUFFER_SIZE, recvcounts[k]-j), recv_buf+j+displs[k]); 313 } 314 } 315 316 MPI_Barrier_local(comm); 317 } 318 } 319 } 320 321 322 int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], 46 int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int input_recvcounts[], const int input_displs[], 323 47 MPI_Datatype recvtype, int root, MPI_Comm comm) 324 48 { 325 49 326 if(!comm.is_ep && comm.mpi_comm)50 if(!comm.is_ep) 327 51 { 328 ::MPI_Gatherv(const_cast<void*>(sendbuf), sendcount, static_cast< ::MPI_Datatype>(sendtype), recvbuf, const_cast<int*>( recvcounts), const_cast<int*>(displs),52 ::MPI_Gatherv(const_cast<void*>(sendbuf), sendcount, static_cast< ::MPI_Datatype>(sendtype), recvbuf, const_cast<int*>(input_recvcounts), const_cast<int*>(input_displs), 329 53 static_cast< ::MPI_Datatype>(recvtype), root, static_cast< ::MPI_Comm>(comm.mpi_comm)); 330 54 return 0; 331 55 } 332 56 333 if(!comm.mpi_comm) return 0;334 57 335 assert(s tatic_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype));58 assert(sendtype == recvtype); 336 59 337 MPI_Datatype datatype = sendtype;338 int count = sendcount;339 340 int ep_rank, ep_rank_loc, mpi_rank;341 int ep_size, num_ep, mpi_size;342 343 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;344 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;345 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;346 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;347 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;348 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;349 60 350 351 352 if(ep_size == mpi_size) 353 return ::MPI_Gatherv(sendbuf, sendcount, static_cast< ::MPI_Datatype>(datatype), recvbuf, recvcounts, displs, 354 static_cast< ::MPI_Datatype>(datatype), root, static_cast< ::MPI_Comm>(comm.mpi_comm)); 355 356 if(ep_rank != root) 357 { 358 recvcounts = new int[ep_size]; 359 displs = new int[ep_size]; 360 } 361 362 MPI_Bcast(const_cast< int* >(displs), ep_size, MPI_INT, root, comm); 363 MPI_Bcast(const_cast< int* >(recvcounts), ep_size, MPI_INT, root, comm); 364 365 366 int recv_plus_displs[ep_size]; 367 for(int i=0; i<ep_size; i++) recv_plus_displs[i] = recvcounts[i] + displs[i]; 368 369 for(int j=0; j<mpi_size; j++) 370 { 371 if(recv_plus_displs[j*num_ep] < displs[j*num_ep+1] || 372 recv_plus_displs[j*num_ep + num_ep -1] < displs[j*num_ep + num_ep -2]) 373 { 374 Debug("Call special implementation of mpi_gatherv. 1st condition not OK\n"); 375 return MPI_Allgatherv_special(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); 376 } 377 378 for(int i=1; i<num_ep-1; i++) 379 { 380 if(recv_plus_displs[j*num_ep+i] < displs[j*num_ep+i+1] || 381 recv_plus_displs[j*num_ep+i] < displs[j*num_ep+i-1]) 382 { 383 Debug("Call special implementation of mpi_gatherv. 2nd condition not OK\n"); 384 return MPI_Allgatherv_special(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); 385 } 386 } 387 } 388 389 390 int root_mpi_rank = comm.rank_map->at(root).second; 391 int root_ep_loc = comm.rank_map->at(root).first; 392 393 394 ::MPI_Aint datasize, lb; 395 396 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 397 398 void *local_gather_recvbuf; 399 int buffer_size; 400 void *master_recvbuf; 401 402 if(ep_rank_loc == 0 && mpi_rank == root_mpi_rank && root_ep_loc != 0) 403 { 404 master_recvbuf = new void*[sizeof(recvbuf)]; 405 assert(root_ep_loc == 0); 406 } 407 408 if(ep_rank_loc==0) 409 { 410 buffer_size = *std::max_element(recv_plus_displs+ep_rank, recv_plus_displs+ep_rank+num_ep); 411 412 local_gather_recvbuf = new void*[datasize*buffer_size]; 413 } 414 415 MPI_Gatherv_local(sendbuf, count, datatype, local_gather_recvbuf, recvcounts+ep_rank-ep_rank_loc, displs+ep_rank-ep_rank_loc, comm); 416 417 //MPI_Gather 418 if(ep_rank_loc == 0) 419 { 420 int *mpi_recvcnt= new int[mpi_size]; 421 int *mpi_displs= new int[mpi_size]; 422 423 int buff_start = *std::min_element(displs+ep_rank, displs+ep_rank+num_ep);; 424 int buff_end = buffer_size; 425 426 int mpi_sendcnt = buff_end - buff_start; 427 428 429 ::MPI_Gather(&mpi_sendcnt, 1, MPI_INT_STD, mpi_recvcnt, 1, MPI_INT_STD, root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 430 ::MPI_Gather(&buff_start, 1, MPI_INT_STD, mpi_displs, 1, MPI_INT_STD, root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 431 432 if(root_ep_loc == 0) 433 { ::MPI_Gatherv(local_gather_recvbuf + datasize*buff_start, mpi_sendcnt, static_cast< ::MPI_Datatype>(datatype), recvbuf, mpi_recvcnt, 434 mpi_displs, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 435 } 436 else // gatherv to master_recvbuf 437 { ::MPI_Gatherv(local_gather_recvbuf + datasize*buff_start, mpi_sendcnt, static_cast< ::MPI_Datatype>(datatype), master_recvbuf, mpi_recvcnt, 438 mpi_displs, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 439 } 440 441 delete[] mpi_recvcnt; 442 delete[] mpi_displs; 443 } 444 445 int global_min_displs = *std::min_element(displs, displs+ep_size); 446 int global_recvcnt = *std::max_element(recv_plus_displs, recv_plus_displs+ep_size); 447 448 449 if(root_ep_loc != 0 && mpi_rank == root_mpi_rank) // root is not master, master send to root and root receive from master 450 { 451 innode_memcpy(0, master_recvbuf+datasize*global_min_displs, root_ep_loc, recvbuf+datasize*global_min_displs, global_recvcnt, datatype, comm); 452 if(ep_rank_loc == 0) delete[] master_recvbuf; 453 } 454 455 456 457 if(ep_rank_loc==0) 458 { 459 if(datatype == MPI_INT) 460 { 461 delete[] static_cast<int*>(local_gather_recvbuf); 462 } 463 else if(datatype == MPI_FLOAT) 464 { 465 delete[] static_cast<float*>(local_gather_recvbuf); 466 } 467 else if(datatype == MPI_DOUBLE) 468 { 469 delete[] static_cast<double*>(local_gather_recvbuf); 470 } 471 else if(datatype == MPI_LONG) 472 { 473 delete[] static_cast<long*>(local_gather_recvbuf); 474 } 475 else if(datatype == MPI_UNSIGNED_LONG) 476 { 477 delete[] static_cast<unsigned long*>(local_gather_recvbuf); 478 } 479 else // if(datatype == MPI_CHAR) 480 { 481 delete[] static_cast<char*>(local_gather_recvbuf); 482 } 483 } 484 else 485 { 486 delete[] recvcounts; 487 delete[] displs; 488 } 489 return 0; 490 } 491 492 493 494 int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], 495 MPI_Datatype recvtype, MPI_Comm comm) 496 { 497 498 if(!comm.is_ep && comm.mpi_comm) 499 { 500 ::MPI_Allgatherv(sendbuf, sendcount, static_cast< ::MPI_Datatype>(sendtype), recvbuf, recvcounts, displs, 501 static_cast< ::MPI_Datatype>(recvtype), static_cast< ::MPI_Comm>(comm.mpi_comm)); 502 return 0; 503 } 504 505 if(!comm.mpi_comm) return 0; 506 507 assert(static_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype)); 508 509 510 MPI_Datatype datatype = sendtype; 511 int count = sendcount; 512 513 int ep_rank, ep_rank_loc, mpi_rank; 514 int ep_size, num_ep, mpi_size; 515 516 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 517 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 518 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 519 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 520 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 521 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 522 523 if(ep_size == mpi_size) // needed by servers 524 return ::MPI_Allgatherv(sendbuf, sendcount, static_cast< ::MPI_Datatype>(datatype), recvbuf, recvcounts, displs, 525 static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Comm>(comm.mpi_comm)); 526 527 int recv_plus_displs[ep_size]; 528 for(int i=0; i<ep_size; i++) recv_plus_displs[i] = recvcounts[i] + displs[i]; 529 530 531 for(int j=0; j<mpi_size; j++) 532 { 533 if(recv_plus_displs[j*num_ep] < displs[j*num_ep+1] || 534 recv_plus_displs[j*num_ep + num_ep -1] < displs[j*num_ep + num_ep -2]) 535 { 536 printf("proc %d/%d Call special implementation of mpi_allgatherv.\n", ep_rank, ep_size); 537 for(int k=0; k<ep_size; k++) 538 printf("recv_plus_displs[%d] = %d\t displs[%d] = %d\n", k, recv_plus_displs[k], k, displs[k]); 539 540 return MPI_Allgatherv_special(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); 541 } 542 543 for(int i=1; i<num_ep-1; i++) 544 { 545 if(recv_plus_displs[j*num_ep+i] < displs[j*num_ep+i+1] || 546 recv_plus_displs[j*num_ep+i] < displs[j*num_ep+i-1]) 547 { 548 printf("proc %d/%d Call special implementation of mpi_allgatherv.\n", ep_rank, ep_size); 549 return MPI_Allgatherv_special(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); 550 } 551 } 552 } 553 554 ::MPI_Aint datasize, lb; 555 556 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 557 558 void *local_gather_recvbuf; 559 int buffer_size; 560 561 if(ep_rank_loc==0) 562 { 563 buffer_size = *std::max_element(recv_plus_displs+ep_rank, recv_plus_displs+ep_rank+num_ep); 564 565 local_gather_recvbuf = new void*[datasize*buffer_size]; 566 } 567 568 // local gather to master 569 MPI_Gatherv_local(sendbuf, count, datatype, local_gather_recvbuf, recvcounts+ep_rank-ep_rank_loc, displs+ep_rank-ep_rank_loc, comm); 570 571 //MPI_Gather 572 if(ep_rank_loc == 0) 573 { 574 int *mpi_recvcnt= new int[mpi_size]; 575 int *mpi_displs= new int[mpi_size]; 576 577 int buff_start = *std::min_element(displs+ep_rank, displs+ep_rank+num_ep);; 578 int buff_end = buffer_size; 579 580 int mpi_sendcnt = buff_end - buff_start; 581 582 583 ::MPI_Allgather(&mpi_sendcnt, 1, MPI_INT_STD, mpi_recvcnt, 1, MPI_INT_STD, static_cast< ::MPI_Comm>(comm.mpi_comm)); 584 ::MPI_Allgather(&buff_start, 1, MPI_INT_STD, mpi_displs, 1, MPI_INT_STD, static_cast< ::MPI_Comm>(comm.mpi_comm)); 585 586 587 ::MPI_Allgatherv((char*)local_gather_recvbuf + datasize*buff_start, mpi_sendcnt, static_cast< ::MPI_Datatype>(datatype), recvbuf, mpi_recvcnt, 588 mpi_displs, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Comm>(comm.mpi_comm)); 589 590 delete[] mpi_recvcnt; 591 delete[] mpi_displs; 592 } 593 594 int global_min_displs = *std::min_element(displs, displs+ep_size); 595 int global_recvcnt = *std::max_element(recv_plus_displs, recv_plus_displs+ep_size); 596 597 MPI_Bcast_local(recvbuf+datasize*global_min_displs, global_recvcnt, datatype, comm); 598 599 if(ep_rank_loc==0) 600 { 601 if(datatype == MPI_INT) 602 { 603 delete[] static_cast<int*>(local_gather_recvbuf); 604 } 605 else if(datatype == MPI_FLOAT) 606 { 607 delete[] static_cast<float*>(local_gather_recvbuf); 608 } 609 else if(datatype == MPI_DOUBLE) 610 { 611 delete[] static_cast<double*>(local_gather_recvbuf); 612 } 613 else if(datatype == MPI_LONG) 614 { 615 delete[] static_cast<long*>(local_gather_recvbuf); 616 } 617 else if(datatype == MPI_UNSIGNED_LONG) 618 { 619 delete[] static_cast<unsigned long*>(local_gather_recvbuf); 620 } 621 else // if(datatype == MPI_CHAR) 622 { 623 delete[] static_cast<char*>(local_gather_recvbuf); 624 } 625 } 626 } 627 628 int MPI_Gatherv_special(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], 629 MPI_Datatype recvtype, int root, MPI_Comm comm) 630 { 631 int ep_rank, ep_rank_loc, mpi_rank; 632 int ep_size, num_ep, mpi_size; 633 634 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 635 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 636 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 637 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 638 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 639 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 61 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 62 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 63 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 64 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 65 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 66 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 640 67 641 68 int root_mpi_rank = comm.rank_map->at(root).second; … … 643 70 644 71 ::MPI_Aint datasize, lb; 645 ::MPI_Type_get_extent( static_cast< ::MPI_Datatype>(sendtype), &lb, &datasize);72 ::MPI_Type_get_extent(to_mpi_type(sendtype), &lb, &datasize); 646 73 647 void *local_gather_recvbuf;648 int buffer_size;74 int *recvcounts; 75 int* displs; 649 76 650 int *local_displs = new int[num_ep]; 651 int *local_rvcnts = new int[num_ep]; 652 for(int i=0; i<num_ep; i++) local_rvcnts[i] = recvcounts[ep_rank-ep_rank_loc + i]; 653 local_displs[0] = 0; 654 for(int i=1; i<num_ep; i++) local_displs[i] = local_displs[i-1] + local_rvcnts[i-1]; 77 recvcounts = new int[ep_size]; 78 displs = new int[ep_size]; 655 79 656 if(ep_rank_loc==0) 657 { 658 buffer_size = local_displs[num_ep-1] + recvcounts[ep_rank+num_ep-1]; 659 local_gather_recvbuf = new void*[datasize*buffer_size]; 80 81 bool is_master = (ep_rank_loc==0 && mpi_rank != root_mpi_rank ) || ep_rank == root; 82 bool is_root = ep_rank == root; 83 84 void* local_recvbuf; 85 std::vector<int>local_recvcounts(num_ep, 0); 86 std::vector<int>local_displs(num_ep, 0); 87 88 89 if(is_root) 90 { 91 copy(input_recvcounts, input_recvcounts+ep_size, recvcounts); 92 copy(input_displs, input_displs+ep_size, displs); 660 93 } 661 94 662 // local gather to master663 MPI_ Gatherv_local(sendbuf, sendcount, sendtype, local_gather_recvbuf, local_rvcnts, local_displs, comm); // all sendbuf gathered to master95 MPI_Bcast(recvcounts, ep_size, MPI_INT, root, comm); 96 MPI_Bcast(displs, ep_size, MPI_INT, root, comm); 664 97 665 int **mpi_recvcnts = new int*[num_ep]; 666 int **mpi_displs = new int*[num_ep]; 667 for(int i=0; i<num_ep; i++) 98 if(mpi_rank == root_mpi_rank) MPI_Gather_local(&sendcount, 1, MPI_INT, local_recvcounts.data(), root_ep_loc, comm); 99 else MPI_Gather_local(&sendcount, 1, MPI_INT, local_recvcounts.data(), 0, comm); 100 101 102 103 if(is_master) 668 104 { 669 mpi_recvcnts[i] = new int[mpi_size]; 670 mpi_displs[i] = new int[mpi_size]; 671 for(int j=0; j<mpi_size; j++) 105 int local_recvbuf_size = std::accumulate(local_recvcounts.begin(), local_recvcounts.end(), 0); 106 107 for(int i=1; i<num_ep; i++) 108 local_displs[i] = local_displs[i-1] + local_recvcounts[i-1]; 109 110 local_recvbuf = new void*[datasize * local_recvbuf_size]; 111 } 112 113 if(mpi_rank == root_mpi_rank) MPI_Gatherv_local(sendbuf, sendcount, sendtype, local_recvbuf, local_recvcounts.data(), local_displs.data(), root_ep_loc, comm); 114 else MPI_Gatherv_local(sendbuf, sendcount, sendtype, local_recvbuf, local_recvcounts.data(), local_displs.data(), 0, comm); 115 116 //if(is_master) printf("local_recvbuf = %d %d %d %d\n", static_cast<int*>(local_recvbuf)[0], static_cast<int*>(local_recvbuf)[1], static_cast<int*>(local_recvbuf)[2], static_cast<int*>(local_recvbuf)[3]); 117 118 void* tmp_recvbuf; 119 int tmp_recvbuf_size = std::accumulate(recvcounts, recvcounts+ep_size, 0); 120 121 if(is_root) tmp_recvbuf = new void*[datasize * tmp_recvbuf_size]; 122 123 124 std::vector<int> mpi_recvcounts(mpi_size, 0); 125 std::vector<int> mpi_displs(mpi_size, 0); 126 127 128 if(is_master) 129 { 130 for(int i=0; i<ep_size; i++) 672 131 { 673 mpi_recvcnts[i][j] = recvcounts[j*num_ep + i]; 674 mpi_displs[i][j] = displs[j*num_ep + i]; 675 } 676 } 677 678 void *master_recvbuf; 679 if(ep_rank_loc == 0 && mpi_rank == root_mpi_rank && root_ep_loc != 0) master_recvbuf = new void*[sizeof(recvbuf)]; 680 681 if(ep_rank_loc == 0 && root_ep_loc == 0) // master in MPI_Allgatherv loop 682 for(int i=0; i<num_ep; i++) 683 { 684 ::MPI_Gatherv(local_gather_recvbuf + datasize*local_displs[i], recvcounts[ep_rank+i], static_cast< ::MPI_Datatype>(sendtype), recvbuf, mpi_recvcnts[i], mpi_displs[i], 685 static_cast< ::MPI_Datatype>(recvtype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 686 } 687 if(ep_rank_loc == 0 && root_ep_loc != 0) 688 for(int i=0; i<num_ep; i++) 689 { 690 ::MPI_Gatherv(local_gather_recvbuf + datasize*local_displs[i], recvcounts[ep_rank+i], static_cast< ::MPI_Datatype>(sendtype), master_recvbuf, mpi_recvcnts[i], mpi_displs[i], 691 static_cast< ::MPI_Datatype>(recvtype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 132 mpi_recvcounts[comm.rank_map->at(i).second]+=recvcounts[i]; 692 133 } 693 134 694 135 695 if(root_ep_loc != 0 && mpi_rank == root_mpi_rank) // root is not master, master send to root and root receive from master 136 137 for(int i=1; i<mpi_size; i++) 138 mpi_displs[i] = mpi_displs[i-1] + mpi_recvcounts[i-1]; 139 140 141 142 ::MPI_Gatherv(local_recvbuf, sendcount*num_ep, sendtype, tmp_recvbuf, mpi_recvcounts.data(), mpi_displs.data(), recvtype, root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 143 } 144 145 146 // reorder data 147 if(is_root) 696 148 { 149 // printf("tmp_recvbuf =\n"); 150 // for(int i=0; i<ep_size*sendcount; i++) printf("%d\t", static_cast<int*>(tmp_recvbuf)[i]); 151 // printf("\n"); 152 153 int offset; 697 154 for(int i=0; i<ep_size; i++) 698 innode_memcpy(0, master_recvbuf + datasize*displs[i], root_ep_loc, recvbuf + datasize*displs[i], recvcounts[i], sendtype, comm); 155 { 156 int extra = 0; 157 for(int j=0, k=0; j<ep_size, k<comm.rank_map->at(i).first; j++) 158 if(comm.rank_map->at(i).second == comm.rank_map->at(j).second) 159 { 160 extra += recvcounts[j]; 161 k++; 162 } 699 163 700 if(ep_rank_loc == 0) delete[] master_recvbuf; 164 offset = mpi_displs[comm.rank_map->at(i).second] + extra; 165 166 memcpy(recvbuf+displs[i]*datasize, tmp_recvbuf+offset*datasize, recvcounts[i]*datasize); 167 168 //printf("recvbuf[%d] = tmp_recvbuf[%d] \n", i, offset); 169 170 } 171 172 // printf("recvbuf =\n"); 173 // for(int i=0; i<ep_size*sendcount; i++) printf("%d\t", static_cast<int*>(recvbuf)[i]); 174 // printf("\n"); 175 701 176 } 702 177 703 704 delete[] local_displs; 705 delete[] local_rvcnts; 706 for(int i=0; i<num_ep; i++) { delete[] mpi_recvcnts[i]; 707 delete[] mpi_displs[i]; } 708 delete[] mpi_recvcnts; 709 delete[] mpi_displs; 710 if(ep_rank_loc==0) 178 delete[] recvcounts; 179 delete[] displs; 180 181 if(is_master) 711 182 { 712 if(sendtype == MPI_INT) 713 { 714 delete[] static_cast<int*>(local_gather_recvbuf); 715 } 716 else if(sendtype == MPI_FLOAT) 717 { 718 delete[] static_cast<float*>(local_gather_recvbuf); 719 } 720 else if(sendtype == MPI_DOUBLE) 721 { 722 delete[] static_cast<double*>(local_gather_recvbuf); 723 } 724 else if(sendtype == MPI_LONG) 725 { 726 delete[] static_cast<long*>(local_gather_recvbuf); 727 } 728 else if(sendtype == MPI_UNSIGNED_LONG) 729 { 730 delete[] static_cast<unsigned long*>(local_gather_recvbuf); 731 } 732 else // if(sendtype == MPI_CHAR) 733 { 734 delete[] static_cast<char*>(local_gather_recvbuf); 735 } 183 delete[] local_recvbuf; 736 184 } 185 if(is_root) delete[] tmp_recvbuf; 737 186 } 738 187 739 int MPI_Allgatherv_special(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[],740 MPI_Datatype recvtype, MPI_Comm comm)741 {742 int ep_rank, ep_rank_loc, mpi_rank;743 int ep_size, num_ep, mpi_size;744 745 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;746 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;747 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;748 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;749 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;750 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;751 752 753 ::MPI_Aint datasize, lb;754 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(sendtype), &lb, &datasize);755 756 void *local_gather_recvbuf;757 int buffer_size;758 759 int *local_displs = new int[num_ep];760 int *local_rvcnts = new int[num_ep];761 for(int i=0; i<num_ep; i++) local_rvcnts[i] = recvcounts[ep_rank-ep_rank_loc + i];762 local_displs[0] = 0;763 for(int i=1; i<num_ep; i++) local_displs[i] = local_displs[i-1] + local_rvcnts[i-1];764 765 if(ep_rank_loc==0)766 {767 buffer_size = local_displs[num_ep-1] + recvcounts[ep_rank+num_ep-1];768 local_gather_recvbuf = new void*[datasize*buffer_size];769 }770 771 // local gather to master772 MPI_Gatherv_local(sendbuf, sendcount, sendtype, local_gather_recvbuf, local_rvcnts, local_displs, comm); // all sendbuf gathered to master773 774 int **mpi_recvcnts = new int*[num_ep];775 int **mpi_displs = new int*[num_ep];776 for(int i=0; i<num_ep; i++)777 {778 mpi_recvcnts[i] = new int[mpi_size];779 mpi_displs[i] = new int[mpi_size];780 for(int j=0; j<mpi_size; j++)781 {782 mpi_recvcnts[i][j] = recvcounts[j*num_ep + i];783 mpi_displs[i][j] = displs[j*num_ep + i];784 }785 }786 787 if(ep_rank_loc == 0) // master in MPI_Allgatherv loop788 for(int i=0; i<num_ep; i++)789 {790 ::MPI_Allgatherv(local_gather_recvbuf + datasize*local_displs[i], recvcounts[ep_rank+i], static_cast< ::MPI_Datatype>(sendtype), recvbuf, mpi_recvcnts[i], mpi_displs[i],791 static_cast< ::MPI_Datatype>(recvtype), static_cast< ::MPI_Comm>(comm.mpi_comm));792 }793 794 for(int i=0; i<ep_size; i++)795 MPI_Bcast_local(recvbuf + datasize*displs[i], recvcounts[i], recvtype, comm);796 797 798 delete[] local_displs;799 delete[] local_rvcnts;800 for(int i=0; i<num_ep; i++) { delete[] mpi_recvcnts[i];801 delete[] mpi_displs[i]; }802 delete[] mpi_recvcnts;803 delete[] mpi_displs;804 if(ep_rank_loc==0)805 {806 if(sendtype == MPI_INT)807 {808 delete[] static_cast<int*>(local_gather_recvbuf);809 }810 else if(sendtype == MPI_FLOAT)811 {812 delete[] static_cast<float*>(local_gather_recvbuf);813 }814 else if(sendtype == MPI_DOUBLE)815 {816 delete[] static_cast<double*>(local_gather_recvbuf);817 }818 else if(sendtype == MPI_LONG)819 {820 delete[] static_cast<long*>(local_gather_recvbuf);821 }822 else if(sendtype == MPI_UNSIGNED_LONG)823 {824 delete[] static_cast<unsigned long*>(local_gather_recvbuf);825 }826 else // if(sendtype == MPI_CHAR)827 {828 delete[] static_cast<char*>(local_gather_recvbuf);829 }830 }831 }832 833 834 188 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_intercomm.cpp
r1196 r1287 32 32 33 33 34 35 34 if(ep_rank == local_leader) 36 35 { 37 ::MPI_Comm_rank(MPI_COMM_WORLD_STD, &leader_ranks[0]);36 MPI_Comm_rank(MPI_COMM_WORLD, &leader_ranks[0]); 38 37 39 38 leader_ranks[1] = mpi_size; 40 39 MPI_Comm_rank(peer_comm, &leader_ranks[2]); 41 40 42 MPI_Request req_s, req_r; 43 MPI_Isend(&leader_ranks[0], 3, MPI_INT_STD, remote_leader, tag, peer_comm, &req_s); 44 MPI_Status status; 45 MPI_Wait(&req_s, &status); 46 47 MPI_Irecv(&leader_ranks[3], 3, MPI_INT_STD, remote_leader, tag, peer_comm, &req_r); 48 MPI_Wait(&req_r, &status); 41 //printf("leader_ranks = %d, %d, %d\n", leader_ranks[0], leader_ranks[1], leader_ranks[2]); 42 MPI_Request request[2]; 43 MPI_Status status[2]; 44 45 MPI_Isend(&leader_ranks[0], 3, static_cast< ::MPI_Datatype>(MPI_INT), remote_leader, tag, peer_comm, &request[0]); 46 MPI_Irecv(&leader_ranks[3], 3, static_cast< ::MPI_Datatype>(MPI_INT), remote_leader, tag, peer_comm, &request[1]); 47 48 MPI_Waitall(2, request, status); 49 49 } 50 50 51 MPI_Bcast(leader_ranks, 6, MPI_INT_STD, local_leader, local_comm); 52 51 52 MPI_Bcast(leader_ranks, 6, static_cast< ::MPI_Datatype>(MPI_INT), local_leader, local_comm); 53 54 53 55 MPI_Barrier(local_comm); 56 54 57 55 58 if(leader_ranks[0] == leader_ranks[3]) … … 65 68 { 66 69 // change leader 67 if(ep_rank == local_leader) Debug("calling MPI_Intercomm_create_from_world\n");68 69 70 int new_local_leader; 70 71 … … 128 129 new_tag_in_world = TAG++; 129 130 } 130 MPI_Bcast(&new_tag_in_world, 1, MPI_INT_STD, new_local_leader, local_comm);131 if(ep_rank == local_leader) MPI_Send(&new_tag_in_world, 1, MPI_INT_STD, remote_leader, tag, peer_comm);131 MPI_Bcast(&new_tag_in_world, 1, static_cast< ::MPI_Datatype> (MPI_INT), new_local_leader, local_comm); 132 if(ep_rank == local_leader) MPI_Send(&new_tag_in_world, 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm); 132 133 } 133 134 else … … 136 137 { 137 138 MPI_Status status; 138 MPI_Recv(&new_tag_in_world, 1, MPI_INT_STD, remote_leader, tag, peer_comm, &status);139 } 140 MPI_Bcast(&new_tag_in_world, 1, MPI_INT_STD, new_local_leader, local_comm);139 MPI_Recv(&new_tag_in_world, 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &status); 140 } 141 MPI_Bcast(&new_tag_in_world, 1, static_cast< ::MPI_Datatype> (MPI_INT), new_local_leader, local_comm); 141 142 } 142 143 … … 144 145 if(ep_rank == new_local_leader) 145 146 { 146 ::MPI_Comm_rank( MPI_COMM_WORLD_STD, &leader_in_world[0]);147 } 148 149 MPI_Bcast(&leader_in_world[0], 1, MPI_INT_STD, new_local_leader, local_comm);147 ::MPI_Comm_rank(static_cast< ::MPI_Comm >(MPI_COMM_WORLD.mpi_comm), &leader_in_world[0]); 148 } 149 150 MPI_Bcast(&leader_in_world[0], 1, static_cast< ::MPI_Datatype> (MPI_INT), new_local_leader, local_comm); 150 151 151 152 152 153 if(ep_rank == local_leader) 153 154 { 154 MPI_Request req_s, req_r; 155 156 MPI_Isend(&leader_in_world[0], 1, MPI_INT_STD, remote_leader, tag, peer_comm, &req_s); 157 MPI_Irecv(&leader_in_world[1], 1, MPI_INT_STD, remote_leader, tag, peer_comm, &req_r); 158 159 MPI_Status status; 160 MPI_Wait(&req_s, &status); 161 MPI_Wait(&req_r, &status); 162 163 /* 164 MPI_Send(&leader_in_world[0], 1, MPI_INT_STD, remote_leader, tag, peer_comm); 165 MPI_Status status; 166 MPI_Recv(&leader_in_world[1], 1, MPI_INT_STD, remote_leader, tag, peer_comm, &status); 167 */ 168 } 169 170 155 MPI_Request request[2]; 156 MPI_Status status[2]; 157 158 MPI_Isend(&leader_in_world[0], 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &request[0]); 159 MPI_Irecv(&leader_in_world[1], 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &request[1]); 160 161 MPI_Waitall(2, request, status); 162 } 171 163 172 164 MPI_Bcast(&leader_in_world[1], 1, MPI_INT, local_leader, local_comm); 173 165 174 175 176 166 local_comm.ep_comm_ptr->comm_label = tag; 177 167 178 return MPI_Intercomm_create_from_world(local_comm, new_local_leader, MPI_COMM_WORLD_STD, leader_in_world[1], new_tag_in_world, newintercomm); 168 if(ep_rank == local_leader) Debug("calling MPI_Intercomm_create_from_world\n"); 169 170 return MPI_Intercomm_create_from_world(local_comm, new_local_leader, static_cast< ::MPI_Comm >(MPI_COMM_WORLD.mpi_comm), leader_in_world[1], new_tag_in_world, newintercomm); 171 179 172 } 180 173 } … … 194 187 return 0; 195 188 } 196 else if(comm.mpi_comm != MPI_COMM_NULL_STD)189 else if(comm.mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 197 190 { 198 191 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm> (comm.mpi_comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_intercomm_kernel.cpp
r1185 r1287 10 10 int MPI_Intercomm_create_kernel(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm) 11 11 { 12 13 12 int ep_rank, ep_rank_loc, mpi_rank; 14 13 int ep_size, num_ep, mpi_size; … … 36 35 37 36 38 ::MPI_Comm_rank( MPI_COMM_WORLD_STD, &rank_in_world);37 ::MPI_Comm_rank(static_cast< ::MPI_Comm>(MPI_COMM_WORLD.mpi_comm), &rank_in_world); 39 38 ::MPI_Comm_rank(static_cast< ::MPI_Comm>(local_comm.mpi_comm), &rank_in_local_parent); 40 39 … … 80 79 send_buf[2] = num_ep; 81 80 82 ::MPI_Allgather(send_buf.data(), 3, MPI_INT_STD, recv_buf.data(), 3, MPI_INT_STD, local_mpi_comm);81 ::MPI_Allgather(send_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), recv_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), local_mpi_comm); 83 82 84 83 for(int i=0; i<size_info[0]; i++) … … 105 104 MPI_Status sta_send, sta_recv; 106 105 107 MPI_Isend(send_buf.data(), 3, MPI_INT_STD, remote_leader, tag, peer_comm, &req_send);108 MPI_Irecv(recv_buf.data(), 3, MPI_INT_STD, remote_leader, tag, peer_comm, &req_recv);106 MPI_Isend(send_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_send); 107 MPI_Irecv(recv_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_recv); 109 108 110 109 … … 118 117 } 119 118 119 120 120 121 send_buf[0] = size_info[1]; 121 122 send_buf[1] = leader_info[0]; … … 124 125 send_buf[4] = rank_in_peer_mpi[1]; 125 126 126 ::MPI_Bcast(send_buf.data(), 5, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);127 ::MPI_Bcast(send_buf.data(), 5, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 127 128 128 129 size_info[1] = send_buf[0]; … … 149 150 std::copy ( ep_info[0].data(), ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[0] ); 150 151 151 MPI_Send(send_buf.data(), 3*size_info[0], MPI_INT_STD, remote_leader, tag+1, peer_comm);152 MPI_Recv(recv_buf.data(), 3*size_info[1], MPI_INT_STD, remote_leader, tag+1, peer_comm, &status);153 } 154 155 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);152 MPI_Send(send_buf.data(), 3*size_info[0], static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+1, peer_comm); 153 MPI_Recv(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+1, peer_comm, &status); 154 } 155 156 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 156 157 157 158 std::copy ( recv_buf.data(), recv_buf.data() + size_info[1], rank_info[2].begin() ); … … 270 271 size_info[2] = new_ep_info[0].size(); 271 272 MPI_Status status; 272 MPI_Send(&size_info[2], 1, MPI_INT_STD, remote_leader, tag+2, peer_comm);273 MPI_Recv(&size_info[3], 1, MPI_INT_STD, remote_leader, tag+2, peer_comm, &status);274 } 275 276 ::MPI_Bcast(&size_info[2], 2, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);273 MPI_Send(&size_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+2, peer_comm); 274 MPI_Recv(&size_info[3], 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+2, peer_comm, &status); 275 } 276 277 ::MPI_Bcast(&size_info[2], 2, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 277 278 278 279 new_rank_info[2].resize(size_info[3]); … … 291 292 std::copy ( new_ep_info[0].data(), new_ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[2] ); 292 293 293 MPI_Send(send_buf.data(), 3*size_info[2], MPI_INT_STD, remote_leader, tag+3, peer_comm);294 MPI_Recv(recv_buf.data(), 3*size_info[3], MPI_INT_STD, remote_leader, tag+3, peer_comm, &status);295 } 296 297 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);294 MPI_Send(send_buf.data(), 3*size_info[2], static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+3, peer_comm); 295 MPI_Recv(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+3, peer_comm, &status); 296 } 297 298 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 298 299 299 300 std::copy ( recv_buf.data(), recv_buf.data() + size_info[3], new_rank_info[2].begin() ); … … 303 304 } 304 305 305 306 306 307 307 308 if(is_proc_master) … … 327 328 } 328 329 329 ::MPI_Bcast(&leader_info[2], 1, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);330 331 if(new_comm != MPI_COMM_NULL_STD)330 ::MPI_Bcast(&leader_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 331 332 if(new_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 332 333 { 333 334 … … 360 361 //printf("tag_list size = %lu\n", tag_list.size()); 361 362 } 362 363 364 } 365 366 367 MPI_Barrier_local(local_comm); 363 } 368 364 369 365 vector<int> bcast_buf(8); … … 374 370 } 375 371 376 MPI_Bcast(bcast_buf.data(), 8, MPI_INT_STD, local_leader, local_comm);372 MPI_Bcast(bcast_buf.data(), 8, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 377 373 378 374 if(!is_local_leader) … … 399 395 } 400 396 401 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, MPI_INT_STD, local_leader, local_comm);397 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 402 398 403 399 if(!is_local_leader) … … 410 406 411 407 int my_position = offset[rank_in_local_parent]+ep_rank_loc; 412 413 408 414 409 MPI_Barrier_local(local_comm); 415 410 #pragma omp flush … … 425 420 if((*iter).first == make_pair(tag, min(leader_info[0], leader_info[1]))) 426 421 { 427 *newintercomm = 422 *newintercomm = iter->second[my_position]; 428 423 found = true; 429 424 break; … … 433 428 } 434 429 435 MPI_Barrier_local(local_comm); 430 MPI_Barrier(local_comm); 431 432 if(is_local_leader) 433 { 434 int local_flag = true; 435 int remote_flag = false; 436 MPI_Status mpi_status; 437 438 MPI_Send(&local_flag, 1, MPI_INT, remote_leader, tag, peer_comm); 439 440 MPI_Recv(&remote_flag, 1, MPI_INT, remote_leader, tag, peer_comm, &mpi_status); 441 } 442 443 444 MPI_Barrier(local_comm); 445 436 446 if(is_proc_master) 437 447 { … … 456 466 intercomm_mpi_size = newintercomm->ep_comm_ptr->size_rank_info[2].second; 457 467 458 MPI_Bcast(&remote_ep_size, 1, MPI_INT_STD, local_leader, local_comm);468 MPI_Bcast(&remote_ep_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 459 469 460 470 int my_rank_map_elem[2]; … … 470 480 (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->resize(local_ep_size); 471 481 472 MPI_Allgather(my_rank_map_elem, 2, MPI_INT_STD, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT_STD, local_comm); 482 MPI_Allgather(my_rank_map_elem, 2, static_cast< ::MPI_Datatype> (MPI_INT), 483 (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), local_comm); 473 484 474 485 (*newintercomm).ep_comm_ptr->intercomm->remote_rank_map = new RANK_MAP; … … 489 500 { 490 501 MPI_Status status; 491 MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, MPI_INT_STD, remote_leader, tag+4, peer_comm);492 MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, remote_leader, tag+4, peer_comm, &status);493 494 MPI_Send(&local_intercomm_size, 1, MPI_INT_STD, remote_leader, tag+5, peer_comm);495 MPI_Recv(&remote_intercomm_size, 1, MPI_INT_STD, remote_leader, tag+5, peer_comm, &status);502 MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+4, peer_comm); 503 MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+4, peer_comm, &status); 504 505 MPI_Send(&local_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+5, peer_comm); 506 MPI_Recv(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+5, peer_comm, &status); 496 507 497 508 new_bcast_root_0 = intercomm_ep_rank; 498 509 } 499 510 500 MPI_Allreduce(&new_bcast_root_0, &new_bcast_root, 1, MPI_INT_STD, MPI_SUM_STD, *newintercomm);501 502 503 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, local_leader, local_comm);504 MPI_Bcast(&remote_intercomm_size, 1, MPI_INT_STD, new_bcast_root, *newintercomm);511 MPI_Allreduce(&new_bcast_root_0, &new_bcast_root, 1, static_cast< ::MPI_Datatype> (MPI_INT), static_cast< ::MPI_Op>(MPI_SUM), *newintercomm); 512 513 514 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 515 MPI_Bcast(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), new_bcast_root, *newintercomm); 505 516 506 517 … … 514 525 { 515 526 MPI_Status status; 516 MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, MPI_INT_STD, remote_leader, tag+6, peer_comm);517 MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, remote_leader, tag+6, peer_comm, &status);518 } 519 520 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, new_bcast_root, *newintercomm);527 MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+6, peer_comm); 528 MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag+6, peer_comm, &status); 529 } 530 531 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), new_bcast_root, *newintercomm); 521 532 522 533 (*newintercomm).ep_comm_ptr->intercomm->local_comm = &(local_comm.ep_comm_ptr->comm_list[ep_rank_loc]); … … 579 590 int rank_in_peer_mpi[2]; 580 591 581 ::MPI_Comm_rank( MPI_COMM_WORLD_STD, &rank_in_world);592 ::MPI_Comm_rank(static_cast< ::MPI_Comm >(MPI_COMM_WORLD.mpi_comm), &rank_in_world); 582 593 583 594 … … 608 619 MPI_Request req_s, req_r; 609 620 610 MPI_Isend(send_buf.data(), 2, MPI_INT_STD, remote_leader, tag, peer_comm, &req_s);611 MPI_Irecv(recv_buf.data(), 2, MPI_INT_STD, remote_leader, tag, peer_comm, &req_r);621 MPI_Isend(send_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_s); 622 MPI_Irecv(recv_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_r); 612 623 613 624 … … 619 630 } 620 631 621 MPI_Bcast(recv_buf.data(), 3, MPI_INT_STD, local_leader, local_comm);632 MPI_Bcast(recv_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 622 633 623 634 remote_num_ep = recv_buf[0]; … … 660 671 MPI_Request req_s; 661 672 MPI_Status sta_s; 662 MPI_Isend(tag_label, 2, MPI_INT_STD, remote_leader, tag, peer_comm, &req_s);673 MPI_Isend(tag_label, 2, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_s); 663 674 664 675 MPI_Wait(&req_s, &sta_s); … … 674 685 MPI_Status status; 675 686 MPI_Request req_r; 676 MPI_Irecv(tag_label, 2, MPI_INT_STD, remote_leader, tag, peer_comm, &req_r);687 MPI_Irecv(tag_label, 2, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_r); 677 688 MPI_Wait(&req_r, &status); 678 689 } 679 690 } 680 691 681 MPI_Bcast(tag_label, 2, MPI_INT_STD, local_leader, local_comm);692 MPI_Bcast(tag_label, 2, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 682 693 683 694 … … 745 756 local_rank_map_ele[1] = (*newintercomm).ep_comm_ptr->comm_label; 746 757 747 MPI_Allgather(local_rank_map_ele, 2, MPI_INT_STD, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT_STD, local_comm); 758 MPI_Allgather(local_rank_map_ele, 2, static_cast< ::MPI_Datatype> (MPI_INT), 759 (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), local_comm); 748 760 749 761 if(ep_rank == local_leader) … … 752 764 MPI_Request req_s, req_r; 753 765 754 MPI_Isend((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_num_ep, MPI_INT_STD, remote_leader, tag, peer_comm, &req_s);755 MPI_Irecv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_num_ep, MPI_INT_STD, remote_leader, tag, peer_comm, &req_r);766 MPI_Isend((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_num_ep, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_s); 767 MPI_Irecv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_num_ep, static_cast< ::MPI_Datatype> (MPI_INT), remote_leader, tag, peer_comm, &req_r); 756 768 757 769 … … 761 773 } 762 774 763 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_num_ep, MPI_INT_STD, local_leader, local_comm);775 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_num_ep, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 764 776 (*newintercomm).ep_comm_ptr->intercomm->local_comm = &(local_comm.ep_comm_ptr->comm_list[ep_rank_loc]); 765 777 (*newintercomm).ep_comm_ptr->intercomm->intercomm_tag = tag; -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_intercomm_world.cpp
r1134 r1287 80 80 send_buf[2] = num_ep; 81 81 82 ::MPI_Allgather(send_buf.data(), 3, MPI_INT_STD, recv_buf.data(), 3, MPI_INT_STD, local_mpi_comm);82 ::MPI_Allgather(send_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), recv_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), local_mpi_comm); 83 83 84 84 for(int i=0; i<size_info[0]; i++) … … 99 99 send_buf[1] = local_ep_size; 100 100 101 ::MPI_Send(send_buf.data(), 2, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);102 103 ::MPI_Recv(recv_buf.data(), 2, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);101 ::MPI_Send(send_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 102 103 ::MPI_Recv(recv_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 104 104 105 105 recv_buf[2] = rank_in_world; … … 108 108 } 109 109 110 ::MPI_Bcast(recv_buf.data(), 4, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);110 ::MPI_Bcast(recv_buf.data(), 4, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 111 111 112 112 size_info[1] = recv_buf[0]; … … 132 132 std::copy ( ep_info[0].data(), ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[0] ); 133 133 134 ::MPI_Send(send_buf.data(), 3*size_info[0], MPI_INT_STD, mpi_remote_leader, tag, peer_comm);135 136 ::MPI_Recv(recv_buf.data(), 3*size_info[1], MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);137 138 } 139 140 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);134 ::MPI_Send(send_buf.data(), 3*size_info[0], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 135 136 ::MPI_Recv(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 137 138 } 139 140 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 141 141 142 142 std::copy ( recv_buf.data(), recv_buf.data() + size_info[1], rank_info[2].begin() ); … … 259 259 ::MPI_Status mpi_status; 260 260 261 ::MPI_Send(&size_info[2], 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);262 263 ::MPI_Recv(&size_info[3], 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);264 } 265 266 ::MPI_Bcast(&size_info[2], 2, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);261 ::MPI_Send(&size_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 262 263 ::MPI_Recv(&size_info[3], 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 264 } 265 266 ::MPI_Bcast(&size_info[2], 2, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 267 267 268 268 new_rank_info[2].resize(size_info[3]); … … 281 281 std::copy ( new_ep_info[0].data(), new_ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[2] ); 282 282 283 ::MPI_Send(send_buf.data(), 3*size_info[2], MPI_INT_STD, mpi_remote_leader, tag, peer_comm);284 ::MPI_Recv(recv_buf.data(), 3*size_info[3], MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);285 286 } 287 288 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);283 ::MPI_Send(send_buf.data(), 3*size_info[2], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 284 ::MPI_Recv(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 285 286 } 287 288 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 289 289 290 290 std::copy ( recv_buf.data(), recv_buf.data() + size_info[3], new_rank_info[2].begin() ); … … 318 318 } 319 319 320 ::MPI_Bcast(&leader_info[2], 1, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);321 322 if(new_comm != MPI_COMM_NULL_STD)320 ::MPI_Bcast(&leader_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 321 322 if(new_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 323 323 { 324 324 ::MPI_Barrier(new_comm); … … 362 362 } 363 363 364 MPI_Bcast(bcast_buf.data(), 8, MPI_INT_STD, local_leader, local_comm);364 MPI_Bcast(bcast_buf.data(), 8, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 365 365 366 366 if(!is_local_leader) … … 387 387 } 388 388 389 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, MPI_INT_STD, local_leader, local_comm);389 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 390 390 391 391 if(!is_local_leader) … … 424 424 } 425 425 426 MPI_Barrier_local(local_comm); 426 MPI_Barrier(local_comm); 427 428 if(is_local_leader) 429 { 430 int local_flag = true; 431 int remote_flag = false; 432 ::MPI_Status mpi_status; 433 434 ::MPI_Send(&local_flag, 1, static_cast< ::MPI_Datatype>(MPI_INT), mpi_remote_leader, tag, peer_comm); 435 436 ::MPI_Recv(&remote_flag, 1, static_cast< ::MPI_Datatype>(MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 437 } 438 439 MPI_Barrier(local_comm); 427 440 428 441 if(is_proc_master) … … 450 463 intercomm_mpi_size = newintercomm->ep_comm_ptr->size_rank_info[2].second; 451 464 452 MPI_Bcast(&remote_ep_size, 1, MPI_INT_STD, local_leader, local_comm);465 MPI_Bcast(&remote_ep_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 453 466 454 467 int my_rank_map_elem[2]; … … 466 479 (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->resize(local_ep_size); 467 480 468 MPI_Allgather (my_rank_map_elem, 2, MPI_INT_STD, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT_STD, local_comm);481 MPI_Allgather2(my_rank_map_elem, 2, MPI_INT, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT, local_comm); 469 482 470 483 (*newintercomm).ep_comm_ptr->intercomm->remote_rank_map = new RANK_MAP; … … 481 494 ::MPI_Status status; 482 495 483 ::MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);484 485 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);486 487 ::MPI_Send(&local_intercomm_size, 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);488 489 ::MPI_Recv(&remote_intercomm_size, 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);490 } 491 492 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, local_leader, local_comm);493 MPI_Bcast(&remote_intercomm_size, 1, MPI_INT_STD, 0, *newintercomm);496 ::MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 497 498 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 499 500 ::MPI_Send(&local_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 501 502 ::MPI_Recv(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 503 } 504 505 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 506 MPI_Bcast(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), 0, *newintercomm); 494 507 495 508 … … 506 519 ::MPI_Status status; 507 520 508 ::MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);509 510 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);511 } 512 513 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, 0, *newintercomm);521 ::MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 522 523 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 524 } 525 526 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), 0, *newintercomm); 514 527 515 528 (*newintercomm).ep_comm_ptr->intercomm->local_comm = &(local_comm.ep_comm_ptr->comm_list[ep_rank_loc]); … … 590 603 send_buf[2] = num_ep; 591 604 592 ::MPI_Allgather(send_buf.data(), 3, MPI_INT_STD, recv_buf.data(), 3, MPI_INT_STD, local_mpi_comm);605 ::MPI_Allgather(send_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), recv_buf.data(), 3, static_cast< ::MPI_Datatype> (MPI_INT), local_mpi_comm); 593 606 594 607 for(int i=0; i<size_info[0]; i++) … … 610 623 send_buf[1] = local_ep_size; 611 624 612 ::MPI_Send(send_buf.data(), 2, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);613 614 ::MPI_Recv(recv_buf.data(), 2, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);625 ::MPI_Send(send_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 626 627 ::MPI_Recv(recv_buf.data(), 2, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 615 628 616 629 recv_buf[2] = rank_in_world; … … 619 632 } 620 633 621 ::MPI_Bcast(recv_buf.data(), 4, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);634 ::MPI_Bcast(recv_buf.data(), 4, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 622 635 623 636 size_info[1] = recv_buf[0]; … … 643 656 std::copy ( ep_info[0].data(), ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[0] ); 644 657 645 ::MPI_Send(send_buf.data(), 3*size_info[0], MPI_INT_STD, mpi_remote_leader, tag, peer_comm);646 647 ::MPI_Recv(recv_buf.data(), 3*size_info[1], MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);648 649 } 650 651 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);658 ::MPI_Send(send_buf.data(), 3*size_info[0], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 659 660 ::MPI_Recv(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 661 662 } 663 664 ::MPI_Bcast(recv_buf.data(), 3*size_info[1], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 652 665 653 666 std::copy ( recv_buf.data(), recv_buf.data() + size_info[1], rank_info[2].begin() ); … … 770 783 ::MPI_Status mpi_status; 771 784 772 ::MPI_Send(&size_info[2], 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);773 774 ::MPI_Recv(&size_info[3], 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);775 } 776 777 ::MPI_Bcast(&size_info[2], 2, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);785 ::MPI_Send(&size_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 786 787 ::MPI_Recv(&size_info[3], 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 788 } 789 790 ::MPI_Bcast(&size_info[2], 2, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 778 791 779 792 new_rank_info[2].resize(size_info[3]); … … 792 805 std::copy ( new_ep_info[0].data(), new_ep_info[0].data() + size_info[0], send_buf.begin() + 2*size_info[2] ); 793 806 794 ::MPI_Send(send_buf.data(), 3*size_info[2], MPI_INT_STD, mpi_remote_leader, tag, peer_comm);795 796 ::MPI_Recv(recv_buf.data(), 3*size_info[3], MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &mpi_status);797 } 798 799 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);807 ::MPI_Send(send_buf.data(), 3*size_info[2], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 808 809 ::MPI_Recv(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 810 } 811 812 ::MPI_Bcast(recv_buf.data(), 3*size_info[3], static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 800 813 801 814 std::copy ( recv_buf.data(), recv_buf.data() + size_info[3], new_rank_info[2].begin() ); … … 829 842 } 830 843 831 ::MPI_Bcast(&leader_info[2], 1, MPI_INT_STD, local_comm.rank_map->at(local_leader).second, local_mpi_comm);832 833 if(new_comm != MPI_COMM_NULL_STD)844 ::MPI_Bcast(&leader_info[2], 1, static_cast< ::MPI_Datatype> (MPI_INT), local_comm.rank_map->at(local_leader).second, local_mpi_comm); 845 846 if(new_comm != static_cast< ::MPI_Comm >(MPI_COMM_NULL.mpi_comm)) 834 847 { 835 848 ::MPI_Barrier(new_comm); … … 857 870 #pragma omp critical (write_to_tag_list) 858 871 tag_list.push_back(make_pair( make_pair(tag, min(leader_info[0], leader_info[1])) , ep_intercomm)); 859 860 } 861 862 863 } 864 865 866 MPI_Barrier_local(local_comm); 872 873 874 } 875 876 877 } 867 878 868 879 … … 876 887 877 888 878 MPI_Bcast(bcast_buf.data(), 8, MPI_INT_STD, local_leader, local_comm);889 MPI_Bcast(bcast_buf.data(), 8, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 879 890 880 891 … … 904 915 } 905 916 906 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, MPI_INT_STD, local_leader, local_comm);917 MPI_Bcast(bcast_buf.data(), size_info[2]+size_info[1]+size_info[0]+1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 907 918 908 919 … … 919 930 920 931 921 MPI_Barrier_local(local_comm);922 #pragma omp flush923 924 925 932 #pragma omp critical (read_from_tag_list) 926 933 { … … 935 942 936 943 found = true; 937 //tag_list.erase(iter);938 944 break; 939 945 } … … 942 948 } 943 949 944 MPI_Barrier_local(local_comm); 950 MPI_Barrier(local_comm); 951 952 if(is_local_leader) 953 { 954 int local_flag = true; 955 int remote_flag = false; 956 ::MPI_Status mpi_status; 957 958 ::MPI_Send(&local_flag, 1, static_cast< ::MPI_Datatype>(MPI_INT), mpi_remote_leader, tag, peer_comm); 959 960 ::MPI_Recv(&remote_flag, 1, static_cast< ::MPI_Datatype>(MPI_INT), mpi_remote_leader, tag, peer_comm, &mpi_status); 961 } 962 963 MPI_Barrier(local_comm); 945 964 946 965 if(is_proc_master) … … 967 986 968 987 969 MPI_Bcast(&remote_ep_size, 1, MPI_INT_STD, local_leader, local_comm);988 MPI_Bcast(&remote_ep_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 970 989 971 990 int my_rank_map_elem[2]; … … 983 1002 (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->resize(local_ep_size); 984 1003 985 MPI_Allgather(my_rank_map_elem, 2, MPI_INT _STD, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT_STD, local_comm);986 1004 MPI_Allgather(my_rank_map_elem, 2, MPI_INT, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2, MPI_INT, local_comm); 1005 987 1006 (*newintercomm).ep_comm_ptr->intercomm->remote_rank_map = new RANK_MAP; 988 1007 (*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->resize(remote_ep_size); … … 996 1015 ::MPI_Status status; 997 1016 998 ::MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);999 1000 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);1001 1002 ::MPI_Send(&local_intercomm_size, 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);1003 1004 ::MPI_Recv(&remote_intercomm_size, 1, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);1005 } 1006 1007 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, MPI_INT_STD, local_leader, local_comm);1008 MPI_Bcast(&remote_intercomm_size, 1, MPI_INT_STD, 0, *newintercomm);1017 ::MPI_Send((*newintercomm).ep_comm_ptr->intercomm->local_rank_map->data(), 2*local_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 1018 1019 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 1020 1021 ::MPI_Send(&local_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 1022 1023 ::MPI_Recv(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 1024 } 1025 1026 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), local_leader, local_comm); 1027 MPI_Bcast(&remote_intercomm_size, 1, static_cast< ::MPI_Datatype> (MPI_INT), 0, *newintercomm); 1009 1028 1010 1029 … … 1021 1040 ::MPI_Status status; 1022 1041 1023 ::MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm);1024 1025 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, mpi_remote_leader, tag, peer_comm, &status);1026 } 1027 1028 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, MPI_INT_STD, 0, *newintercomm);1042 ::MPI_Send((*newintercomm).rank_map->data(), 2*local_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm); 1043 1044 ::MPI_Recv((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), mpi_remote_leader, tag, peer_comm, &status); 1045 } 1046 1047 MPI_Bcast((*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->data(), 2*remote_intercomm_size, static_cast< ::MPI_Datatype> (MPI_INT), 0, *newintercomm); 1029 1048 1030 1049 (*newintercomm).ep_comm_ptr->intercomm->local_comm = &(local_comm.ep_comm_ptr->comm_list[ep_rank_loc]); 1031 1050 (*newintercomm).ep_comm_ptr->intercomm->intercomm_tag = local_comm.ep_comm_ptr->comm_label; 1032 1051 1033 1034 // for(int i=0; i<local_ep_size; i++)1035 // if(local_comm.ep_comm_ptr->comm_label != 99) printf("ep_rank = %d, remote_rank_map[%d] = (%d,%d)\n", intercomm_ep_rank, i,1036 // (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->at(i).first, (*newintercomm).ep_comm_ptr->intercomm->local_rank_map->at(i).second);1037 1038 // for(int i=0; i<remote_intercomm_size; i++)1039 // if(local_comm.ep_comm_ptr->comm_label != 99) printf("ep_rank = %d, intercomm_rank_map[%d] = (%d,%d)\n", intercomm_ep_rank, i,1040 // (*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->at(i).first, (*newintercomm).ep_comm_ptr->intercomm->intercomm_rank_map->at(i).second);1041 1042 // for(int i=0; i<(*newintercomm).rank_map->size(); i++)1043 // if(local_comm.ep_comm_ptr->comm_label != 99) printf("ep_rank = %d, rank_map[%d] = (%d,%d)\n", intercomm_ep_rank, i,1044 // (*newintercomm).rank_map->at(i).first, (*newintercomm).rank_map->at(i).second);1045 1046 1047 1048 1049 1050 1052 return MPI_SUCCESS; 1051 1053 -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_lib.cpp
r1220 r1287 63 63 64 64 65 int innode_memcpy(int sender, const void* sendbuf, int receiver, void* recvbuf, int count, MPI_Datatype datatype, MPI_Comm comm)66 {67 int ep_rank, ep_rank_loc, mpi_rank;68 int ep_size, num_ep, mpi_size;69 70 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;71 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;72 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;73 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;74 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;75 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;76 77 78 79 if(datatype == MPI_INT)80 {81 82 int* send_buf = static_cast<int*>(const_cast<void*>(sendbuf));83 int* recv_buf = static_cast<int*>(recvbuf);84 int* buffer = comm.my_buffer->buf_int;85 86 for(int j=0; j<count; j+=BUFFER_SIZE)87 {88 if(ep_rank_loc == sender)89 {90 #pragma omp critical (write_to_buffer)91 {92 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);93 }94 #pragma omp flush95 }96 97 MPI_Barrier_local(comm);98 99 100 if(ep_rank_loc == receiver)101 {102 #pragma omp flush103 #pragma omp critical (read_from_buffer)104 {105 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);106 }107 }108 109 MPI_Barrier_local(comm);110 }111 }112 else if(datatype == MPI_FLOAT)113 {114 115 float* send_buf = static_cast<float*>(const_cast<void*>(sendbuf));116 float* recv_buf = static_cast<float*>(recvbuf);117 float* buffer = comm.my_buffer->buf_float;118 119 for(int j=0; j<count; j+=BUFFER_SIZE)120 {121 if(ep_rank_loc == sender)122 {123 #pragma omp critical (write_to_buffer)124 {125 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);126 }127 #pragma omp flush128 }129 130 MPI_Barrier_local(comm);131 132 133 if(ep_rank_loc == receiver)134 {135 #pragma omp flush136 #pragma omp critical (read_from_buffer)137 {138 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);139 }140 }141 142 MPI_Barrier_local(comm);143 }144 }145 else if(datatype == MPI_DOUBLE)146 {147 148 149 double* send_buf = static_cast<double*>(const_cast<void*>(sendbuf));150 double* recv_buf = static_cast<double*>(recvbuf);151 double* buffer = comm.my_buffer->buf_double;152 153 for(int j=0; j<count; j+=BUFFER_SIZE)154 {155 if(ep_rank_loc == sender)156 {157 #pragma omp critical (write_to_buffer)158 {159 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);160 }161 #pragma omp flush162 }163 164 MPI_Barrier_local(comm);165 166 167 if(ep_rank_loc == receiver)168 {169 #pragma omp flush170 #pragma omp critical (read_from_buffer)171 {172 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);173 }174 }175 176 MPI_Barrier_local(comm);177 }178 }179 else if(datatype == MPI_LONG)180 {181 long* send_buf = static_cast<long*>(const_cast<void*>(sendbuf));182 long* recv_buf = static_cast<long*>(recvbuf);183 long* buffer = comm.my_buffer->buf_long;184 185 for(int j=0; j<count; j+=BUFFER_SIZE)186 {187 if(ep_rank_loc == sender)188 {189 #pragma omp critical (write_to_buffer)190 {191 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);192 }193 #pragma omp flush194 }195 196 MPI_Barrier_local(comm);197 198 199 if(ep_rank_loc == receiver)200 {201 #pragma omp flush202 #pragma omp critical (read_from_buffer)203 {204 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);205 }206 }207 208 MPI_Barrier_local(comm);209 }210 }211 else if(datatype == MPI_UNSIGNED_LONG)212 {213 unsigned long* send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf));214 unsigned long* recv_buf = static_cast<unsigned long*>(recvbuf);215 unsigned long* buffer = comm.my_buffer->buf_ulong;216 217 for(int j=0; j<count; j+=BUFFER_SIZE)218 {219 if(ep_rank_loc == sender)220 {221 #pragma omp critical (write_to_buffer)222 {223 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);224 }225 #pragma omp flush226 }227 228 MPI_Barrier_local(comm);229 230 231 if(ep_rank_loc == receiver)232 {233 #pragma omp flush234 #pragma omp critical (read_from_buffer)235 {236 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);237 }238 }239 240 MPI_Barrier_local(comm);241 }242 }243 else if(datatype == MPI_CHAR)244 {245 char* send_buf = static_cast<char*>(const_cast<void*>(sendbuf));246 char* recv_buf = static_cast<char*>(recvbuf);247 char* buffer = comm.my_buffer->buf_char;248 249 for(int j=0; j<count; j+=BUFFER_SIZE)250 {251 if(ep_rank_loc == sender)252 {253 #pragma omp critical (write_to_buffer)254 {255 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer);256 }257 #pragma omp flush258 }259 260 MPI_Barrier_local(comm);261 262 263 if(ep_rank_loc == receiver)264 {265 #pragma omp flush266 #pragma omp critical (read_from_buffer)267 {268 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);269 }270 }271 272 MPI_Barrier_local(comm);273 }274 }275 else276 {277 printf("datatype not supported!!\n");278 exit(1);279 }280 return 0;281 }282 283 284 65 int MPI_Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count) 285 66 { 286 /* 287 ::MPI_Aint datasize, char_size, lb; 288 289 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 290 ::MPI_Type_get_extent(MPI_CHAR, &lb, &char_size); 291 292 *count = status->char_count / ( datasize/ char_size); 293 294 //printf("MPI_Get_count, status_count = %d\n", *count); 295 return 0; 296 */ 67 297 68 ::MPI_Status *mpi_status = static_cast< ::MPI_Status* >(status->mpi_status); 298 69 ::MPI_Datatype mpi_datatype = static_cast< ::MPI_Datatype >(datatype); … … 314 85 315 86 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 316 ::MPI_Type_get_extent( MPI_CHAR_STD, &lb, &intsize);87 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype> (MPI_CHAR), &lb, &intsize); 317 88 318 89 int_count = count * datasize / intsize ; … … 346 117 347 118 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 348 ::MPI_Type_get_extent( MPI_CHAR_STD, &lb, &intsize);119 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype> (MPI_CHAR), &lb, &intsize); 349 120 350 121 int_count = count * datasize / intsize ; … … 424 195 } 425 196 197 bool valid_type(MPI_Datatype datatype) 198 { 199 if( datatype == MPI_INT 200 || datatype == MPI_FLOAT 201 || datatype == MPI_DOUBLE 202 || datatype == MPI_CHAR 203 || datatype == MPI_LONG 204 || datatype == MPI_UNSIGNED_LONG) 205 { 206 return true; 207 } 208 209 return false; 210 } 211 212 213 bool valid_op(MPI_Op op) 214 { 215 if( op == MPI_MAX 216 || op == MPI_MIN 217 || op == MPI_SUM) 218 { 219 return true; 220 } 221 222 return false; 223 } 224 225 426 226 } 427 227 428 228 429 430 431 432 433 229 MPI_Datatype to_mpi_type(ep_lib::MPI_Datatype type) 230 { 231 return static_cast< MPI_Datatype >(type); 232 } 233 234 MPI_Op to_mpi_op(ep_lib::MPI_Op op) 235 { 236 return static_cast< MPI_Op >(op); 237 } 238 239 MPI_Comm to_mpi_comm(int comm) 240 { 241 return static_cast< MPI_Comm >(comm); 242 } 243 244 245 246 247 248 -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_lib.hpp
r1196 r1287 8 8 #include "ep_tag.hpp" 9 9 #include "ep_lib_fortran.hpp" 10 11 10 12 11 namespace ep_lib … … 87 86 void check_sum_send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, int type); 88 87 void check_sum_recv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm, int type); 88 89 bool valid_type(MPI_Datatype type); 90 bool valid_op(MPI_Op op); 91 89 92 } 90 93 94 //MPI_Datatype to_mpi(ep_lib::MPI_Datatype type); 91 95 92 96 -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_lib_collective.hpp
r1164 r1287 17 17 18 18 int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); 19 19 20 int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 20 21 … … 22 23 23 24 int MPI_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 25 24 26 int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 25 27 … … 31 33 int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], 32 34 MPI_Datatype recvtype, int root, MPI_Comm comm); 33 int MPI_Gatherv_special(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[],34 MPI_Datatype recvtype, int root, MPI_Comm comm);35 35 int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], 36 36 MPI_Datatype recvtype, MPI_Comm comm); 37 int MPI_Allgatherv_special(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[],38 MPI_Datatype recvtype, MPI_Comm comm);39 37 40 38 41 39 int MPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); 40 42 41 int MPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, 43 42 MPI_Datatype recvtype, int root, MPI_Comm comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_lib_fortran.hpp
r1134 r1287 6 6 namespace ep_lib 7 7 { 8 9 // #ifdef _intelmpi10 11 // MPI_Fint MPI_Comm_c2f(MPI_Comm comm);12 // MPI_Comm MPI_Comm_f2c(MPI_Fint comm);13 14 // #elif _openmpi15 16 // int MPI_Comm_c2f(MPI_Comm comm);17 // ep_lib::MPI_Comm MPI_Comm_f2c(MPI_Fint comm);18 19 // #endif20 8 21 9 int EP_Comm_c2f(MPI_Comm comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_lib_local.hpp
r1134 r1287 12 12 #endif 13 13 14 15 int MPI_Reduce_local (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 16 int MPI_Reduce_local_int (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 17 int MPI_Reduce_local_float (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 18 int MPI_Reduce_local_double(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 19 int MPI_Reduce_local_long (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 20 int MPI_Reduce_local_ulong (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 21 int MPI_Reduce_local_char (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm); 22 14 int MPI_Reduce_local (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int local_root, MPI_Comm comm); 23 15 24 16 int MPI_Scan_local (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 25 int MPI_Scan_local_int (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);26 int MPI_Scan_local_float (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);27 int MPI_Scan_local_double(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);28 int MPI_Scan_local_long (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);29 int MPI_Scan_local_ulong (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);30 int MPI_Scan_local_char (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);31 17 32 18 int MPI_Exscan_local (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); 33 int MPI_Exscan_local_int (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);34 int MPI_Exscan_local_float (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);35 int MPI_Exscan_local_double(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);36 int MPI_Exscan_local_long (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);37 int MPI_Exscan_local_ulong (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);38 int MPI_Exscan_local_char (const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm);39 19 40 int MPI_Bcast_local (void *buffer, int count, MPI_Datatype datatype, MPI_Comm comm); 41 int MPI_Bcast_local_int (void *buffer, int count, MPI_Comm comm); 42 int MPI_Bcast_local_float (void *buffer, int count, MPI_Comm comm); 43 int MPI_Bcast_local_double(void *buffer, int count, MPI_Comm comm); 44 int MPI_Bcast_local_long (void *buffer, int count, MPI_Comm comm); 45 int MPI_Bcast_local_ulong (void *buffer, int count, MPI_Comm comm); 46 int MPI_Bcast_local_char (void *buffer, int count, MPI_Comm comm); 47 48 int MPI_Gather_local (const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, MPI_Comm comm); 49 int MPI_Gather_local_int (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 50 int MPI_Gather_local_float (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 51 int MPI_Gather_local_double(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 52 int MPI_Gather_local_long (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 53 int MPI_Gather_local_ulong (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 54 int MPI_Gather_local_char (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 55 20 int MPI_Bcast_local (void *buffer, int count, MPI_Datatype datatype, int local_root, MPI_Comm comm); 21 22 int MPI_Gather_local (const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, int local_root, MPI_Comm comm); 56 23 57 24 int MPI_Gatherv_local (const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, 58 const int recvcounts[], const int displs[], MPI_Comm comm); 59 int MPI_Gatherv_local_int (const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 60 int MPI_Gatherv_local_float (const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 61 int MPI_Gatherv_local_double(const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 62 int MPI_Gatherv_local_long (const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 63 int MPI_Gatherv_local_ulong (const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 64 int MPI_Gatherv_local_char (const void *sendbuf, int count, void *recvbuf, const int recvcounts[], const int displs[], MPI_Comm comm); 25 const int recvcounts[], const int displs[], int local_root, MPI_Comm comm); 65 26 66 int MPI_Scatter_local (const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, MPI_Comm comm); 67 int MPI_Scatter_local_int (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 68 int MPI_Scatter_local_float (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 69 int MPI_Scatter_local_double(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 70 int MPI_Scatter_local_long (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 71 int MPI_Scatter_local_ulong (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 72 int MPI_Scatter_local_char (const void *sendbuf, int count, void *recvbuf, MPI_Comm comm); 27 int MPI_Scatter_local(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int local_root, MPI_Comm comm); 73 28 74 int MPI_Scatterv_local (const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype datatype, void *recvbuf, MPI_Comm comm); 75 int MPI_Scatterv_local_int (const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 76 int MPI_Scatterv_local_float (const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 77 int MPI_Scatterv_local_double(const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 78 int MPI_Scatterv_local_long (const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 79 int MPI_Scatterv_local_ulong (const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 80 int MPI_Scatterv_local_char (const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm); 81 82 int innode_memcpy(int sender, const void* sendbuf, int receiver, void* recvbuf, int count, MPI_Datatype datatype, MPI_Comm comm); 29 int MPI_Scatterv_local(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, 30 MPI_Datatype recvtype, int local_root, MPI_Comm comm); 83 31 84 32 int MPI_Barrier_local(MPI_Comm comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_memory.cpp
r1138 r1287 10 10 int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr) 11 11 { 12 //::MPI_Alloc_mem(size.mpi_aint, static_cast< ::MPI_Info>(info.mpi_info), baseptr);13 ::MPI_Alloc_mem(size.mpi_aint, MPI_INFO_NULL_STD, baseptr);12 ::MPI_Alloc_mem(size.mpi_aint, static_cast< ::MPI_Info>(info.mpi_info), baseptr); 13 //::MPI_Alloc_mem(size.mpi_aint, MPI_INFO_NULL_STD, baseptr); 14 14 return 0; 15 15 } … … 17 17 int MPI_Alloc_mem(unsigned long size, MPI_Info info, void *baseptr) 18 18 { 19 //::MPI_Alloc_mem(size, static_cast< ::MPI_Info>(info.mpi_info), baseptr);20 ::MPI_Alloc_mem(size, MPI_INFO_NULL_STD, baseptr);19 ::MPI_Alloc_mem(size, static_cast< ::MPI_Info>(info.mpi_info), baseptr); 20 //::MPI_Alloc_mem(size, MPI_INFO_NULL_STD, baseptr); 21 21 return 0; 22 22 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_merge.cpp
r1134 r1287 42 42 if(local_ep_rank == 0) 43 43 { 44 MPI_Status status; 45 MPI_Request req_s, req_r; 46 MPI_Isend(&local_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &req_s); 47 MPI_Irecv(&remote_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &req_r); 48 49 MPI_Wait(&req_s, &status); 50 MPI_Wait(&req_r, &status); 44 MPI_Status status[2]; 45 MPI_Request request[2]; 46 MPI_Isend(&local_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &request[0]); 47 MPI_Irecv(&remote_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &request[1]); 48 49 MPI_Waitall(2, request, status); 51 50 } 52 51 … … 54 53 MPI_Bcast(&remote_high, 1, MPI_INT, 0, *(inter_comm.ep_comm_ptr->intercomm->local_comm)); 55 54 56 // printf("%d, %d, %d, %d\n", local_ep_size, remote_ep_size, local_high, remote_high);57 55 58 56 … … 86 84 if(intra_ep_rank_loc == 0) 87 85 { 88 ::MPI_Bcast(reorder, intra_ep_size, MPI_INT_STD, 0, static_cast< ::MPI_Comm>(newintracomm->mpi_comm));86 ::MPI_Bcast(reorder, intra_ep_size, static_cast< ::MPI_Datatype> (MPI_INT), 0, static_cast< ::MPI_Comm>(newintracomm->mpi_comm)); 89 87 90 88 vector< pair<int, int> > tmp_rank_map(intra_ep_size); … … 130 128 Debug("intercomm_merge kernel\n"); 131 129 132 int ep_rank, ep_rank_loc, mpi_rank; 133 int ep_size, num_ep, mpi_size; 134 135 ep_rank = inter_comm.ep_comm_ptr->size_rank_info[0].first; 130 int ep_rank_loc; 131 int num_ep; 132 136 133 ep_rank_loc = inter_comm.ep_comm_ptr->size_rank_info[1].first; 137 mpi_rank = inter_comm.ep_comm_ptr->size_rank_info[2].first;138 ep_size = inter_comm.ep_comm_ptr->size_rank_info[0].second;139 134 num_ep = inter_comm.ep_comm_ptr->size_rank_info[1].second; 140 mpi_size = inter_comm.ep_comm_ptr->size_rank_info[2].second; 141 142 143 int local_ep_rank, local_ep_rank_loc, local_mpi_rank; 144 int local_ep_size, local_num_ep, local_mpi_size; 145 146 147 local_ep_rank = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[0].first; 148 local_ep_rank_loc = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[1].first; 149 local_mpi_rank = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[2].first; 150 local_ep_size = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[0].second; 151 local_num_ep = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[1].second; 152 local_mpi_size = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[2].second; 135 136 153 137 154 138 int remote_ep_size = inter_comm.ep_comm_ptr->intercomm->remote_rank_map->size(); 155 139 156 int local_high = high;157 int remote_high;158 140 159 141 MPI_Barrier(inter_comm); 160 161 // if(local_ep_rank == 0 && high == false)162 // {163 // MPI_Status status;164 // MPI_Send(&local_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm);165 // MPI_Recv(&remote_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &status);166 // }167 //168 // if(local_ep_rank == 0 && high == true)169 // {170 // MPI_Status status;171 // MPI_Recv(&remote_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &status);172 // MPI_Send(&local_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm);173 // }174 175 if(local_ep_rank == 0)176 {177 MPI_Status status;178 MPI_Request req_s, req_r;179 MPI_Isend(&local_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &req_s);180 MPI_Irecv(&remote_high, 1, MPI_INT, 0, inter_comm.ep_comm_ptr->intercomm->intercomm_tag, inter_comm, &req_r);181 182 MPI_Wait(&req_s, &status);183 MPI_Wait(&req_r, &status);184 }185 186 MPI_Bcast(&remote_high, 1, MPI_INT, 0, *(inter_comm.ep_comm_ptr->intercomm->local_comm));187 188 int intercomm_high;189 if(ep_rank == 0) intercomm_high = local_high;190 MPI_Bcast(&intercomm_high, 1, MPI_INT, 0, inter_comm);191 192 //printf("remote_ep_size = %d, local_high = %d, remote_high = %d, intercomm_high = %d\n", remote_ep_size, local_high, remote_high, intercomm_high);193 142 194 143 … … 201 150 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm>(inter_comm.ep_comm_ptr->intercomm->mpi_inter_comm); 202 151 203 ::MPI_Intercomm_merge(mpi_comm, intercomm_high, &mpi_intracomm);152 ::MPI_Intercomm_merge(mpi_comm, high, &mpi_intracomm); 204 153 MPI_Info info; 205 154 MPI_Comm_create_endpoints(mpi_intracomm, num_ep, info, ep_intracomm); … … 209 158 } 210 159 211 212 213 160 MPI_Barrier_local(inter_comm); 214 161 215 *newintracomm = inter_comm.ep_comm_ptr->comm_list->mem_bridge[ep_rank_loc]; 216 217 int my_ep_rank = local_high<remote_high? local_ep_rank: local_ep_rank+remote_ep_size; 218 219 int intra_ep_rank, intra_ep_rank_loc, intra_mpi_rank; 220 int intra_ep_size, intra_num_ep, intra_mpi_size; 221 222 intra_ep_rank = newintracomm->ep_comm_ptr->size_rank_info[0].first; 223 intra_ep_rank_loc = newintracomm->ep_comm_ptr->size_rank_info[1].first; 224 intra_mpi_rank = newintracomm->ep_comm_ptr->size_rank_info[2].first; 225 intra_ep_size = newintracomm->ep_comm_ptr->size_rank_info[0].second; 226 intra_num_ep = newintracomm->ep_comm_ptr->size_rank_info[1].second; 227 intra_mpi_size = newintracomm->ep_comm_ptr->size_rank_info[2].second; 228 229 230 231 MPI_Barrier_local(*newintracomm); 232 233 234 int *reorder; 235 if(intra_ep_rank_loc == 0) 236 { 237 reorder = new int[intra_ep_size]; 238 } 239 240 241 242 MPI_Gather(&my_ep_rank, 1, MPI_INT, reorder, 1, MPI_INT, 0, *newintracomm); 243 if(intra_ep_rank_loc == 0) 244 { 245 246 ::MPI_Bcast(reorder, intra_ep_size, MPI_INT_STD, 0, static_cast< ::MPI_Comm>(newintracomm->mpi_comm)); 247 248 vector< pair<int, int> > tmp_rank_map(intra_ep_size); 249 250 251 for(int i=0; i<intra_ep_size; i++) 252 { 253 tmp_rank_map[reorder[i]] = newintracomm->rank_map->at(i) ; 254 } 255 256 newintracomm->rank_map->swap(tmp_rank_map); 257 258 tmp_rank_map.clear(); 259 } 260 261 MPI_Barrier_local(*newintracomm); 162 int inter_rank; 163 MPI_Comm_rank(inter_comm, &inter_rank); 164 165 int my_ep_rank = high? inter_rank+remote_ep_size : inter_rank; 166 int my_ep_rank_loc = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[1].first; 167 int my_num_ep_loc = inter_comm.ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->size_rank_info[1].second; 168 int my_num_ep_total = inter_comm.ep_comm_ptr->comm_list->mem_bridge[0].ep_comm_ptr->size_rank_info[1].second; 169 int my_ep_size = inter_comm.ep_comm_ptr->comm_list->mem_bridge[0].ep_comm_ptr->size_rank_info[0].second; 170 171 int tmp_intra_ep_rank_loc = high?my_ep_rank_loc+my_num_ep_total-my_num_ep_loc: my_ep_rank_loc; 172 173 174 *newintracomm = inter_comm.ep_comm_ptr->comm_list->mem_bridge[tmp_intra_ep_rank_loc]; 175 176 int newintracomm_ep_rank = (*newintracomm).ep_comm_ptr->size_rank_info[0].first; 177 int newintracomm_ep_rank_loc = (*newintracomm).ep_comm_ptr->size_rank_info[1].first; 178 int newintracomm_mpi_rank = (*newintracomm).ep_comm_ptr->size_rank_info[2].first; 179 int newintracomm_ep_size = (*newintracomm).ep_comm_ptr->size_rank_info[0].second; 180 int newintracomm_num_ep = (*newintracomm).ep_comm_ptr->size_rank_info[1].second; 181 int newintracomm_mpi_size = (*newintracomm).ep_comm_ptr->size_rank_info[2].second; 182 183 184 int buf[3]; 185 buf[0] = my_ep_rank; 186 buf[1] = tmp_intra_ep_rank_loc; 187 buf[2] = newintracomm->ep_comm_ptr->size_rank_info[2].first; 188 189 // printf("my_ep_rank = %d, tmp_intra_ep_rank_loc = %d, mpi_rank = %d\n", my_ep_rank, tmp_intra_ep_rank_loc, newintracomm->ep_comm_ptr->size_rank_info[2].first); 190 191 int *rankmap_buf; 192 rankmap_buf = new int [3*my_ep_size]; 193 194 MPI_Allgather(buf, 3, MPI_INT, rankmap_buf, 3, MPI_INT, *newintracomm); 195 196 197 // printf(" ID = %d : rankmap_buf = (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d), (%d %d %d)\n", newintracomm_ep_rank, 198 // rankmap_buf[0], rankmap_buf[1], rankmap_buf[2], rankmap_buf[3], rankmap_buf[4], rankmap_buf[5], rankmap_buf[6], rankmap_buf[7], rankmap_buf[8], rankmap_buf[9], 199 // rankmap_buf[10], rankmap_buf[11], rankmap_buf[12], rankmap_buf[13], rankmap_buf[14], rankmap_buf[15], rankmap_buf[16], rankmap_buf[17], rankmap_buf[18], rankmap_buf[19], 200 // rankmap_buf[20], rankmap_buf[21], rankmap_buf[22], rankmap_buf[23], rankmap_buf[24], rankmap_buf[25], rankmap_buf[26], rankmap_buf[27], rankmap_buf[28], rankmap_buf[29], 201 // rankmap_buf[30], rankmap_buf[31], rankmap_buf[32], rankmap_buf[33], rankmap_buf[34], rankmap_buf[35], rankmap_buf[36], rankmap_buf[37], rankmap_buf[38], rankmap_buf[39], 202 // rankmap_buf[40], rankmap_buf[41], rankmap_buf[42], rankmap_buf[43], rankmap_buf[44], rankmap_buf[45], rankmap_buf[46], rankmap_buf[47]); 203 204 205 for(int i=0; i<newintracomm_ep_size; i++) 206 { 207 (*newintracomm).rank_map->at(rankmap_buf[3*i]).first = rankmap_buf[3*i+1]; 208 (*newintracomm).rank_map->at(rankmap_buf[3*i]).second = rankmap_buf[3*i+2]; 209 } 210 262 211 263 212 (*newintracomm).ep_comm_ptr->size_rank_info[0].first = my_ep_rank; 264 265 266 if(intra_ep_rank_loc == 0) 267 { 268 delete[] reorder; 269 270 } 271 272 /* 273 if(intra_ep_rank == 0) 274 { 275 for(int i=0; i<intra_ep_size; i++) 276 { 277 printf("intra rank_map[%d] = (%d, %d)\n", i, newintracomm->rank_map->at(i).first, newintracomm->rank_map->at(i).second); 278 } 279 } 280 */ 213 (*newintracomm).ep_comm_ptr->size_rank_info[1].first = tmp_intra_ep_rank_loc; 214 215 216 delete[] rankmap_buf; 217 218 281 219 return MPI_SUCCESS; 282 220 -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_message.cpp
r1220 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 #include "ep_mpi.hpp" 11 12 12 13 using namespace std; … … 20 21 int Message_Check(MPI_Comm comm) 21 22 { 22 int myRank;23 MPI_Comm_rank(comm, &myRank);24 25 23 if(!comm.is_ep) return 0; 26 24 … … 38 36 { 39 37 Debug("Message probing for intracomm\n"); 40 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm> (comm.mpi_comm); 38 39 41 40 #ifdef _openmpi 42 41 #pragma omp critical (_mpi_call) 43 42 { 44 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &status);43 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.mpi_comm), &flag, &status); 45 44 if(flag) 46 45 { … … 48 47 mpi_source = status.MPI_SOURCE; 49 48 int tag = status.MPI_TAG; 50 ::MPI_Mprobe(mpi_source, tag, mpi_comm, &message, &status);49 ::MPI_Mprobe(mpi_source, tag, to_mpi_comm(comm.mpi_comm), &message, &status); 51 50 52 51 } 53 52 } 54 53 #elif _intelmpi 55 #pragma omp critical (_mpi_call) 56 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &message, &status); 54 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.mpi_comm), &flag, &message, &status); 57 55 #endif 58 56 … … 68 66 69 67 msg_block->ep_src = get_ep_rank(comm, src_loc, src_mpi); 70 int dest_mpi = comm.ep_comm_ptr->size_rank_info[2].first;71 int ep_dest = get_ep_rank(comm, dest_loc, dest_mpi);72 68 msg_block->mpi_status = new ::MPI_Status(status); 73 69 … … 79 75 { 80 76 #pragma omp flush 81 ptr_comm_target->ep_comm_ptr->message_queue->push_back(*msg_block); 82 77 comm.ep_comm_ptr->comm_list[dest_loc].ep_comm_ptr->message_queue->push_back(*msg_block); 83 78 #pragma omp flush 84 79 } … … 110 105 { 111 106 Debug("Message probing for intracomm\n"); 112 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm> (comm.ep_comm_ptr->intercomm->mpi_inter_comm); // => mpi_intercomm 113 107 114 108 #ifdef _openmpi 115 109 #pragma omp critical (_mpi_call) 116 110 { 117 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &status);111 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.ep_comm_ptr->intercomm->mpi_inter_comm), &flag, &status); 118 112 if(flag) 119 113 { … … 121 115 mpi_source = status.MPI_SOURCE; 122 116 int tag = status.MPI_TAG; 123 ::MPI_Mprobe(mpi_source, tag, mpi_comm, &message, &status);117 ::MPI_Mprobe(mpi_source, tag, to_mpi_comm(comm.ep_comm_ptr->intercomm->mpi_inter_comm), &message, &status); 124 118 125 119 } 126 120 } 127 121 #elif _intelmpi 128 #pragma omp critical (_mpi_call) 129 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &message, &status); 122 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.ep_comm_ptr->intercomm->mpi_inter_comm), &flag, &message, &status); 130 123 #endif 131 124 … … 153 146 { 154 147 #pragma omp flush 155 ptr_comm_target->ep_comm_ptr->message_queue->push_back(*msg_block);148 comm.ep_comm_ptr->comm_list[dest_loc].ep_comm_ptr->message_queue->push_back(*msg_block); 156 149 #pragma omp flush 157 150 } … … 167 160 { 168 161 Debug("Message probing for intracomm\n"); 169 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm> (comm.mpi_comm);162 170 163 #ifdef _openmpi 171 164 #pragma omp critical (_mpi_call) 172 165 { 173 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &status);166 ::MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.mpi_comm), &flag, &status); 174 167 if(flag) 175 168 { … … 177 170 mpi_source = status.MPI_SOURCE; 178 171 int tag = status.MPI_TAG; 179 ::MPI_Mprobe(mpi_source, tag, mpi_comm, &message, &status);172 ::MPI_Mprobe(mpi_source, tag, to_mpi_comm(comm.mpi_comm), &message, &status); 180 173 181 174 } 182 175 } 183 176 #elif _intelmpi 184 #pragma omp critical (_mpi_call) 185 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, mpi_comm, &flag, &message, &status); 177 ::MPI_Improbe(MPI_ANY_SOURCE, MPI_ANY_TAG, to_mpi_comm(comm.mpi_comm), &flag, &message, &status); 186 178 #endif 187 179 … … 196 188 int dest_loc = bitset<8> (status.MPI_TAG) .to_ulong(); 197 189 int src_mpi = status.MPI_SOURCE; 198 int current_inter = comm.ep_comm_ptr->intercomm->local_rank_map->at(current_ep_rank).first;199 190 200 191 msg_block->ep_src = get_ep_rank_intercomm(comm, src_loc, src_mpi); … … 209 200 { 210 201 #pragma omp flush 211 ptr_comm_target->ep_comm_ptr->message_queue->push_back(*msg_block);202 comm.ep_comm_ptr->comm_list[dest_loc].ep_comm_ptr->message_queue->push_back(*msg_block); 212 203 #pragma omp flush 213 204 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_recv.cpp
r1220 r1287 56 56 int dest_rank; 57 57 MPI_Comm_rank(comm, &dest_rank); 58 int world_rank; 59 MPI_Comm_rank(MPI_COMM_WORLD_STD, &world_rank); 58 60 59 61 60 if(!comm.is_ep) … … 71 70 } 72 71 73 request->mpi_request = MPI_REQUEST_NULL _STD;72 request->mpi_request = MPI_REQUEST_NULL.mpi_request; 74 73 request->buf = buf; 75 74 request->comm = comm; -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_reduce.cpp
r1134 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 #include "ep_mpi.hpp" 11 12 12 13 using namespace std; … … 27 28 } 28 29 29 30 int MPI_Reduce_local(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) 31 { 32 if(datatype == MPI_INT) 33 { 34 Debug("datatype is INT\n"); 35 return MPI_Reduce_local_int(sendbuf, recvbuf, count, op, comm); 36 } 37 else if(datatype == MPI_FLOAT) 38 { 39 Debug("datatype is FLOAT\n"); 40 return MPI_Reduce_local_float(sendbuf, recvbuf, count, op, comm); 41 } 42 else if(datatype == MPI_DOUBLE) 43 { 44 Debug("datatype is DOUBLE\n"); 45 return MPI_Reduce_local_double(sendbuf, recvbuf, count, op, comm); 46 } 47 else if(datatype == MPI_LONG) 48 { 49 Debug("datatype is DOUBLE\n"); 50 return MPI_Reduce_local_long(sendbuf, recvbuf, count, op, comm); 51 } 52 else if(datatype == MPI_UNSIGNED_LONG) 53 { 54 Debug("datatype is DOUBLE\n"); 55 return MPI_Reduce_local_ulong(sendbuf, recvbuf, count, op, comm); 56 } 57 else if(datatype == MPI_CHAR) 58 { 59 Debug("datatype is DOUBLE\n"); 60 return MPI_Reduce_local_char(sendbuf, recvbuf, count, op, comm); 61 } 62 else 63 { 64 printf("MPI_Reduce Datatype not supported!\n"); 65 exit(0); 66 } 67 } 68 69 70 int MPI_Reduce_local_int(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 71 { 72 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 73 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 74 75 int *buffer = comm.my_buffer->buf_int; 76 int *send_buf = static_cast<int*>(const_cast<void*>(sendbuf)); 77 int *recv_buf = static_cast<int*>(const_cast<void*>(recvbuf)); 78 79 for(int j=0; j<count; j+=BUFFER_SIZE) 80 { 81 if( 0 == my_rank ) 30 template<typename T> 31 void reduce_max(const T * buffer, T* recvbuf, int count) 32 { 33 transform(buffer, buffer+count, recvbuf, recvbuf, max_op<T>); 34 } 35 36 template<typename T> 37 void reduce_min(const T * buffer, T* recvbuf, int count) 38 { 39 transform(buffer, buffer+count, recvbuf, recvbuf, min_op<T>); 40 } 41 42 template<typename T> 43 void reduce_sum(const T * buffer, T* recvbuf, int count) 44 { 45 transform(buffer, buffer+count, recvbuf, recvbuf, std::plus<T>()); 46 } 47 48 int MPI_Reduce_local(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int local_root, MPI_Comm comm) 49 { 50 assert(valid_type(datatype)); 51 assert(valid_op(op)); 52 53 ::MPI_Aint datasize, lb; 54 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 55 56 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 57 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 58 59 #pragma omp critical (_reduce) 60 comm.my_buffer->void_buffer[ep_rank_loc] = const_cast< void* >(sendbuf); 61 62 MPI_Barrier_local(comm); 63 64 if(ep_rank_loc == local_root) 65 { 66 memcpy(recvbuf, comm.my_buffer->void_buffer[0], datasize * count); 67 68 if(op == MPI_MAX) 82 69 { 83 #pragma omp critical (write_to_buffer) 84 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 85 #pragma omp flush 70 if(datasize == sizeof(int)) 71 { 72 for(int i=1; i<num_ep; i++) 73 reduce_max<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 74 } 75 76 else if(datasize == sizeof(float)) 77 { 78 for(int i=1; i<num_ep; i++) 79 reduce_max<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 80 } 81 82 else if(datasize == sizeof(double)) 83 { 84 for(int i=1; i<num_ep; i++) 85 reduce_max<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 86 } 87 88 else if(datasize == sizeof(char)) 89 { 90 for(int i=1; i<num_ep; i++) 91 reduce_max<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 92 } 93 94 else if(datasize == sizeof(long)) 95 { 96 for(int i=1; i<num_ep; i++) 97 reduce_max<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 98 } 99 100 else if(datasize == sizeof(unsigned long)) 101 { 102 for(int i=1; i<num_ep; i++) 103 reduce_max<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 104 } 105 106 else printf("datatype Error\n"); 107 86 108 } 87 109 88 MPI_Barrier_local(comm); 89 90 if(my_rank !=0 ) 110 if(op == MPI_MIN) 91 111 { 92 #pragma omp critical (write_to_buffer) 93 { 94 #pragma omp flush 95 if(op == MPI_SUM) 96 { 97 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<int>()); 98 } 99 100 else if (op == MPI_MAX) 101 { 102 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<int>); 103 } 104 105 else if (op == MPI_MIN) 106 { 107 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<int>); 108 } 109 110 else 111 { 112 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 113 exit(1); 114 } 115 #pragma omp flush 116 } 112 if(datasize == sizeof(int)) 113 { 114 for(int i=1; i<num_ep; i++) 115 reduce_min<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 116 } 117 118 else if(datasize == sizeof(float)) 119 { 120 for(int i=1; i<num_ep; i++) 121 reduce_min<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 122 } 123 124 else if(datasize == sizeof(double)) 125 { 126 for(int i=1; i<num_ep; i++) 127 reduce_min<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 128 } 129 130 else if(datasize == sizeof(char)) 131 { 132 for(int i=1; i<num_ep; i++) 133 reduce_min<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 134 } 135 136 else if(datasize == sizeof(long)) 137 { 138 for(int i=1; i<num_ep; i++) 139 reduce_min<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 140 } 141 142 else if(datasize == sizeof(unsigned long)) 143 { 144 for(int i=1; i<num_ep; i++) 145 reduce_min<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 146 } 147 148 else printf("datatype Error\n"); 149 117 150 } 118 151 119 MPI_Barrier_local(comm); 120 121 if(my_rank == 0) 152 153 if(op == MPI_SUM) 122 154 { 123 #pragma omp flush 124 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 155 if(datasize == sizeof(int)) 156 { 157 for(int i=1; i<num_ep; i++) 158 reduce_sum<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 159 } 160 161 else if(datasize == sizeof(float)) 162 { 163 for(int i=1; i<num_ep; i++) 164 reduce_sum<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 165 } 166 167 else if(datasize == sizeof(double)) 168 { 169 for(int i=1; i<num_ep; i++) 170 reduce_sum<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 171 } 172 173 else if(datasize == sizeof(char)) 174 { 175 for(int i=1; i<num_ep; i++) 176 reduce_sum<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 177 } 178 179 else if(datasize == sizeof(long)) 180 { 181 for(int i=1; i<num_ep; i++) 182 reduce_sum<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 183 } 184 185 else if(datasize == sizeof(unsigned long)) 186 { 187 for(int i=1; i<num_ep; i++) 188 reduce_sum<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 189 } 190 191 else printf("datatype Error\n"); 192 125 193 } 126 MPI_Barrier_local(comm); 127 } 128 } 129 130 131 int MPI_Reduce_local_float(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 132 { 133 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 134 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 135 136 float *buffer = comm.my_buffer->buf_float; 137 float *send_buf = static_cast<float*>(const_cast<void*>(sendbuf)); 138 float *recv_buf = static_cast<float*>(const_cast<void*>(recvbuf)); 139 140 for(int j=0; j<count; j+=BUFFER_SIZE) 141 { 142 if( 0 == my_rank ) 143 { 144 #pragma omp critical (write_to_buffer) 145 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 146 #pragma omp flush 147 } 148 149 MPI_Barrier_local(comm); 150 151 if(my_rank !=0 ) 152 { 153 #pragma omp critical (write_to_buffer) 154 { 155 #pragma omp flush 156 157 if(op == MPI_SUM) 158 { 159 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<float>()); 160 } 161 162 else if (op == MPI_MAX) 163 { 164 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<float>); 165 } 166 167 else if (op == MPI_MIN) 168 { 169 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<float>); 170 } 171 172 else 173 { 174 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 175 exit(1); 176 } 177 #pragma omp flush 178 } 179 } 180 181 MPI_Barrier_local(comm); 182 183 if(my_rank == 0) 184 { 185 #pragma omp flush 186 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 187 } 188 MPI_Barrier_local(comm); 189 } 190 } 191 192 int MPI_Reduce_local_double(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 193 { 194 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 195 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 196 197 double *buffer = comm.my_buffer->buf_double; 198 double *send_buf = static_cast<double*>(const_cast<void*>(sendbuf)); 199 double *recv_buf = static_cast<double*>(const_cast<void*>(recvbuf)); 200 201 for(int j=0; j<count; j+=BUFFER_SIZE) 202 { 203 if( 0 == my_rank ) 204 { 205 #pragma omp critical (write_to_buffer) 206 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 207 #pragma omp flush 208 } 209 210 MPI_Barrier_local(comm); 211 212 if(my_rank !=0 ) 213 { 214 #pragma omp critical (write_to_buffer) 215 { 216 #pragma omp flush 217 218 219 if(op == MPI_SUM) 220 { 221 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<double>()); 222 } 223 224 else if (op == MPI_MAX) 225 { 226 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<double>); 227 } 228 229 230 else if (op == MPI_MIN) 231 { 232 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<double>); 233 } 234 235 else 236 { 237 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 238 exit(1); 239 } 240 #pragma omp flush 241 } 242 } 243 244 MPI_Barrier_local(comm); 245 246 if(my_rank == 0) 247 { 248 #pragma omp flush 249 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 250 } 251 MPI_Barrier_local(comm); 252 } 253 } 254 255 int MPI_Reduce_local_long(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 256 { 257 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 258 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 259 260 long *buffer = comm.my_buffer->buf_long; 261 long *send_buf = static_cast<long*>(const_cast<void*>(sendbuf)); 262 long *recv_buf = static_cast<long*>(const_cast<void*>(recvbuf)); 263 264 for(int j=0; j<count; j+=BUFFER_SIZE) 265 { 266 if( 0 == my_rank ) 267 { 268 #pragma omp critical (write_to_buffer) 269 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 270 #pragma omp flush 271 } 272 273 MPI_Barrier_local(comm); 274 275 if(my_rank !=0 ) 276 { 277 #pragma omp critical (write_to_buffer) 278 { 279 #pragma omp flush 280 281 282 if(op == MPI_SUM) 283 { 284 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<long>()); 285 } 286 287 else if (op == MPI_MAX) 288 { 289 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<long>); 290 } 291 292 293 else if (op == MPI_MIN) 294 { 295 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<long>); 296 } 297 298 else 299 { 300 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 301 exit(1); 302 } 303 #pragma omp flush 304 } 305 } 306 307 MPI_Barrier_local(comm); 308 309 if(my_rank == 0) 310 { 311 #pragma omp flush 312 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 313 } 314 MPI_Barrier_local(comm); 315 } 316 } 317 318 int MPI_Reduce_local_ulong(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 319 { 320 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 321 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 322 323 unsigned long *buffer = comm.my_buffer->buf_ulong; 324 unsigned long *send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf)); 325 unsigned long *recv_buf = static_cast<unsigned long*>(const_cast<void*>(recvbuf)); 326 327 for(int j=0; j<count; j+=BUFFER_SIZE) 328 { 329 if( 0 == my_rank ) 330 { 331 #pragma omp critical (write_to_buffer) 332 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 333 #pragma omp flush 334 } 335 336 MPI_Barrier_local(comm); 337 338 if(my_rank !=0 ) 339 { 340 #pragma omp critical (write_to_buffer) 341 { 342 #pragma omp flush 343 344 345 if(op == MPI_SUM) 346 { 347 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<unsigned long>()); 348 } 349 350 else if (op == MPI_MAX) 351 { 352 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<unsigned long>); 353 } 354 355 356 else if (op == MPI_MIN) 357 { 358 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<unsigned long>); 359 } 360 361 else 362 { 363 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 364 exit(1); 365 } 366 #pragma omp flush 367 } 368 } 369 370 MPI_Barrier_local(comm); 371 372 if(my_rank == 0) 373 { 374 #pragma omp flush 375 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 376 } 377 MPI_Barrier_local(comm); 378 } 379 } 380 381 int MPI_Reduce_local_char(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 382 { 383 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 384 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 385 386 char *buffer = comm.my_buffer->buf_char; 387 char *send_buf = static_cast<char*>(const_cast<void*>(sendbuf)); 388 char *recv_buf = static_cast<char*>(const_cast<void*>(recvbuf)); 389 390 for(int j=0; j<count; j+=BUFFER_SIZE) 391 { 392 if( 0 == my_rank ) 393 { 394 #pragma omp critical (write_to_buffer) 395 copy(send_buf+j, send_buf+j + min(BUFFER_SIZE, count-j), buffer); 396 #pragma omp flush 397 } 398 399 MPI_Barrier_local(comm); 400 401 if(my_rank !=0 ) 402 { 403 #pragma omp critical (write_to_buffer) 404 { 405 #pragma omp flush 406 407 408 if(op == MPI_SUM) 409 { 410 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<char>()); 411 } 412 413 else if (op == MPI_MAX) 414 { 415 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<char>); 416 } 417 418 419 else if (op == MPI_MIN) 420 { 421 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<char>); 422 } 423 424 else 425 { 426 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 427 exit(1); 428 } 429 #pragma omp flush 430 } 431 } 432 433 MPI_Barrier_local(comm); 434 435 if(my_rank == 0) 436 { 437 #pragma omp flush 438 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 439 } 440 MPI_Barrier_local(comm); 441 } 194 } 195 196 MPI_Barrier_local(comm); 197 442 198 } 443 199 … … 445 201 int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) 446 202 { 203 447 204 if(!comm.is_ep && comm.mpi_comm) 448 205 { 449 ::MPI_Reduce(sendbuf, recvbuf, count, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Op>(op), root, 450 static_cast< ::MPI_Comm>(comm.mpi_comm)); 451 return 0; 452 } 453 454 455 if(!comm.mpi_comm) return 0; 206 return ::MPI_Reduce(sendbuf, recvbuf, count, to_mpi_type(datatype), to_mpi_op(op), root, to_mpi_comm(comm.mpi_comm)); 207 } 208 209 210 211 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 212 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 213 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 214 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 215 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 216 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 456 217 457 218 int root_mpi_rank = comm.rank_map->at(root).second; 458 219 int root_ep_loc = comm.rank_map->at(root).first; 459 220 460 int ep_rank, ep_rank_loc, mpi_rank;461 int ep_size, num_ep, mpi_size;462 463 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;464 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;465 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;466 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;467 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;468 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;469 470 471 ::MPI_Aint recvsize, lb;472 473 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &recvsize);474 475 void *local_recvbuf;476 if(ep_rank_loc==0)477 {478 local_recvbuf = new void*[recvsize*count];479 }480 481 MPI_Reduce_local(sendbuf, local_recvbuf, count, datatype, op, comm);482 483 484 if(ep_rank_loc==0)485 {486 ::MPI_Reduce(local_recvbuf, recvbuf, count, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Op>(op), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm));487 }488 489 if(root_ep_loc != 0 && mpi_rank == root_mpi_rank) // root is not master, master send to root and root receive from master490 {491 innode_memcpy(0, recvbuf, root_ep_loc, recvbuf, count, datatype, comm);492 }493 494 if(ep_rank_loc==0)495 {496 if(datatype == MPI_INT) delete[] static_cast<int*>(local_recvbuf);497 else if(datatype == MPI_FLOAT) delete[] static_cast<float*>(local_recvbuf);498 else if(datatype == MPI_DOUBLE) delete[] static_cast<double*>(local_recvbuf);499 else if(datatype == MPI_LONG) delete[] static_cast<long*>(local_recvbuf);500 else if(datatype == MPI_UNSIGNED_LONG) delete[] static_cast<unsigned long*>(local_recvbuf);501 else delete[] static_cast<char*>(local_recvbuf);502 }503 504 Message_Check(comm);505 506 return 0;507 }508 509 510 511 512 int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)513 {514 if(!comm.is_ep && comm.mpi_comm)515 {516 ::MPI_Allreduce(sendbuf, recvbuf, count, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Op>(op),517 static_cast< ::MPI_Comm>(comm.mpi_comm));518 return 0;519 }520 521 if(!comm.mpi_comm) return 0;522 523 524 int ep_rank, ep_rank_loc, mpi_rank;525 int ep_size, num_ep, mpi_size;526 527 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;528 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;529 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;530 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;531 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;532 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;533 534 535 ::MPI_Aint recvsize, lb;536 537 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &recvsize);538 539 void *local_recvbuf;540 if(ep_rank_loc==0)541 {542 local_recvbuf = new void*[recvsize*count];543 }544 545 MPI_Reduce_local(sendbuf, local_recvbuf, count, datatype, op, comm);546 547 548 if(ep_rank_loc==0)549 {550 ::MPI_Allreduce(local_recvbuf, recvbuf, count, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Op>(op), static_cast< ::MPI_Comm>(comm.mpi_comm));551 }552 553 MPI_Bcast_local(recvbuf, count, datatype, comm);554 555 if(ep_rank_loc==0)556 {557 if(datatype == MPI_INT) delete[] static_cast<int*>(local_recvbuf);558 else if(datatype == MPI_FLOAT) delete[] static_cast<float*>(local_recvbuf);559 else if(datatype == MPI_DOUBLE) delete[] static_cast<double*>(local_recvbuf);560 else if(datatype == MPI_LONG) delete[] static_cast<long*>(local_recvbuf);561 else if(datatype == MPI_UNSIGNED_LONG) delete[] static_cast<unsigned long*>(local_recvbuf);562 else delete[] static_cast<char*>(local_recvbuf);563 }564 565 Message_Check(comm);566 567 return 0;568 }569 570 571 int MPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)572 {573 574 if(!comm.is_ep && comm.mpi_comm)575 {576 ::MPI_Reduce_scatter(sendbuf, recvbuf, recvcounts, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Op>(op),577 static_cast< ::MPI_Comm>(comm.mpi_comm));578 return 0;579 }580 581 if(!comm.mpi_comm) return 0;582 583 int ep_rank, ep_rank_loc, mpi_rank;584 int ep_size, num_ep, mpi_size;585 586 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first;587 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first;588 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first;589 ep_size = comm.ep_comm_ptr->size_rank_info[0].second;590 num_ep = comm.ep_comm_ptr->size_rank_info[1].second;591 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second;592 593 void* local_buf;594 void* local_buf2;595 int local_buf_size = accumulate(recvcounts, recvcounts+ep_size, 0);596 int local_buf2_size = accumulate(recvcounts+ep_rank-ep_rank_loc, recvcounts+ep_rank-ep_rank_loc+num_ep, 0);597 598 221 ::MPI_Aint datasize, lb; 599 222 600 223 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 601 224 602 if(ep_rank_loc == 0) 603 { 604 local_buf = new void*[local_buf_size*datasize]; 605 local_buf2 = new void*[local_buf2_size*datasize]; 606 } 607 MPI_Reduce_local(sendbuf, local_buf, local_buf_size, MPI_INT, op, comm); 608 609 610 if(ep_rank_loc == 0) 611 { 612 int local_recvcnt[mpi_size]; 613 for(int i=0; i<mpi_size; i++) 614 { 615 local_recvcnt[i] = accumulate(recvcounts+ep_rank, recvcounts+ep_rank+num_ep, 0); 616 } 617 618 ::MPI_Reduce_scatter(local_buf, local_buf2, local_recvcnt, static_cast< ::MPI_Datatype>(datatype), 619 static_cast< ::MPI_Op>(op), static_cast< ::MPI_Comm>(comm.mpi_comm)); 620 } 621 622 623 int displs[num_ep]; 624 displs[0] = 0; 625 for(int i=1; i<num_ep; i++) 626 { 627 displs[i] = displs[i-1] + recvcounts[ep_rank-ep_rank_loc+i-1]; 628 } 629 630 MPI_Scatterv_local(local_buf2, recvcounts+ep_rank-ep_rank_loc, displs, datatype, recvbuf, comm); 631 632 if(ep_rank_loc == 0) 633 { 634 if(datatype == MPI_INT) 635 { 636 delete[] static_cast<int*>(local_buf); 637 delete[] static_cast<int*>(local_buf2); 638 } 639 else if(datatype == MPI_FLOAT) 640 { 641 delete[] static_cast<float*>(local_buf); 642 delete[] static_cast<float*>(local_buf2); 643 } 644 else if(datatype == MPI_DOUBLE) 645 { 646 delete[] static_cast<double*>(local_buf); 647 delete[] static_cast<double*>(local_buf2); 648 } 649 else if(datatype == MPI_LONG) 650 { 651 delete[] static_cast<long*>(local_buf); 652 delete[] static_cast<long*>(local_buf2); 653 } 654 else if(datatype == MPI_UNSIGNED_LONG) 655 { 656 delete[] static_cast<unsigned long*>(local_buf); 657 delete[] static_cast<unsigned long*>(local_buf2); 658 } 659 else // if(datatype == MPI_DOUBLE) 660 { 661 delete[] static_cast<char*>(local_buf); 662 delete[] static_cast<char*>(local_buf2); 663 } 664 } 665 666 Message_Check(comm); 667 return 0; 668 } 225 bool is_master = (ep_rank_loc==0 && mpi_rank != root_mpi_rank ) || ep_rank == root; 226 bool is_root = ep_rank == root; 227 228 void* local_recvbuf; 229 230 if(is_master) 231 { 232 local_recvbuf = new void*[datasize * count]; 233 } 234 235 if(mpi_rank == root_mpi_rank) MPI_Reduce_local(sendbuf, local_recvbuf, count, datatype, op, root_ep_loc, comm); 236 else MPI_Reduce_local(sendbuf, local_recvbuf, count, datatype, op, 0, comm); 237 238 239 240 if(is_master) 241 { 242 ::MPI_Reduce(local_recvbuf, recvbuf, count, to_mpi_type(datatype), to_mpi_op(op), root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 243 244 } 245 246 if(is_master) 247 { 248 delete[] local_recvbuf; 249 } 250 251 MPI_Barrier_local(comm); 252 } 253 254 669 255 } 670 256 -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_scan.cpp
r1134 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 #include "ep_mpi.hpp" 11 12 12 13 using namespace std; … … 26 27 } 27 28 29 template<typename T> 30 void reduce_max(const T * buffer, T* recvbuf, int count) 31 { 32 transform(buffer, buffer+count, recvbuf, recvbuf, max_op<T>); 33 } 34 35 template<typename T> 36 void reduce_min(const T * buffer, T* recvbuf, int count) 37 { 38 transform(buffer, buffer+count, recvbuf, recvbuf, min_op<T>); 39 } 40 41 template<typename T> 42 void reduce_sum(const T * buffer, T* recvbuf, int count) 43 { 44 transform(buffer, buffer+count, recvbuf, recvbuf, std::plus<T>()); 45 } 46 28 47 29 48 int MPI_Scan_local(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) 30 49 { 31 if(datatype == MPI_INT) 32 { 33 return MPI_Scan_local_int(sendbuf, recvbuf, count, op, comm); 34 } 35 else if(datatype == MPI_FLOAT) 36 { 37 return MPI_Scan_local_float(sendbuf, recvbuf, count, op, comm); 38 } 39 else if(datatype == MPI_DOUBLE) 40 { 41 return MPI_Scan_local_double(sendbuf, recvbuf, count, op, comm); 42 } 43 else if(datatype == MPI_LONG) 44 { 45 return MPI_Scan_local_long(sendbuf, recvbuf, count, op, comm); 46 } 47 else if(datatype == MPI_UNSIGNED_LONG) 48 { 49 return MPI_Scan_local_ulong(sendbuf, recvbuf, count, op, comm); 50 } 51 else if(datatype == MPI_CHAR) 52 { 53 return MPI_Scan_local_char(sendbuf, recvbuf, count, op, comm); 50 valid_op(op); 51 52 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 53 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 54 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 55 56 57 ::MPI_Aint datasize, lb; 58 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 59 60 if(ep_rank_loc == 0 && mpi_rank != 0) 61 { 62 if(op == MPI_SUM) 63 { 64 if(datatype == MPI_INT && datasize == sizeof(int)) 65 reduce_sum<int>(static_cast<int*>(const_cast<void*>(sendbuf)), static_cast<int*>(recvbuf), count); 66 67 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 68 reduce_sum<float>(static_cast<float*>(const_cast<void*>(sendbuf)), static_cast<float*>(recvbuf), count); 69 70 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 71 reduce_sum<double>(static_cast<double*>(const_cast<void*>(sendbuf)), static_cast<double*>(recvbuf), count); 72 73 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 74 reduce_sum<char>(static_cast<char*>(const_cast<void*>(sendbuf)), static_cast<char*>(recvbuf), count); 75 76 else if(datatype == MPI_LONG && datasize == sizeof(long)) 77 reduce_sum<long>(static_cast<long*>(const_cast<void*>(sendbuf)), static_cast<long*>(recvbuf), count); 78 79 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 80 reduce_sum<unsigned long>(static_cast<unsigned long*>(const_cast<void*>(sendbuf)), static_cast<unsigned long*>(recvbuf), count); 81 82 else printf("datatype Error\n"); 83 } 84 85 else if(op == MPI_MAX) 86 { 87 if(datatype == MPI_INT && datasize == sizeof(int)) 88 reduce_max<int>(static_cast<int*>(const_cast<void*>(sendbuf)), static_cast<int*>(recvbuf), count); 89 90 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 91 reduce_max<float>(static_cast<float*>(const_cast<void*>(sendbuf)), static_cast<float*>(recvbuf), count); 92 93 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 94 reduce_max<double>(static_cast<double*>(const_cast<void*>(sendbuf)), static_cast<double*>(recvbuf), count); 95 96 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 97 reduce_max<char>(static_cast<char*>(const_cast<void*>(sendbuf)), static_cast<char*>(recvbuf), count); 98 99 else if(datatype == MPI_LONG && datasize == sizeof(long)) 100 reduce_max<long>(static_cast<long*>(const_cast<void*>(sendbuf)), static_cast<long*>(recvbuf), count); 101 102 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 103 reduce_max<unsigned long>(static_cast<unsigned long*>(const_cast<void*>(sendbuf)), static_cast<unsigned long*>(recvbuf), count); 104 105 else printf("datatype Error\n"); 106 } 107 108 else //(op == MPI_MIN) 109 { 110 if(datatype == MPI_INT && datasize == sizeof(int)) 111 reduce_min<int>(static_cast<int*>(const_cast<void*>(sendbuf)), static_cast<int*>(recvbuf), count); 112 113 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 114 reduce_min<float>(static_cast<float*>(const_cast<void*>(sendbuf)), static_cast<float*>(recvbuf), count); 115 116 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 117 reduce_min<double>(static_cast<double*>(const_cast<void*>(sendbuf)), static_cast<double*>(recvbuf), count); 118 119 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 120 reduce_min<char>(static_cast<char*>(const_cast<void*>(sendbuf)), static_cast<char*>(recvbuf), count); 121 122 else if(datatype == MPI_LONG && datasize == sizeof(long)) 123 reduce_min<long>(static_cast<long*>(const_cast<void*>(sendbuf)), static_cast<long*>(recvbuf), count); 124 125 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 126 reduce_min<unsigned long>(static_cast<unsigned long*>(const_cast<void*>(sendbuf)), static_cast<unsigned long*>(recvbuf), count); 127 128 else printf("datatype Error\n"); 129 } 130 131 comm.my_buffer->void_buffer[0] = recvbuf; 54 132 } 55 133 else 56 134 { 57 printf("MPI_Scan Datatype not supported!\n"); 58 exit(0); 59 } 60 61 } 62 63 64 65 66 int MPI_Scan_local_int(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 67 { 68 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 69 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 70 71 int *buffer = comm.my_buffer->buf_int; 72 int *send_buf = static_cast<int*>(const_cast<void*>(sendbuf)); 73 int *recv_buf = static_cast<int*>(recvbuf); 74 75 for(int j=0; j<count; j+=BUFFER_SIZE) 76 { 77 if(my_rank == 0) 78 { 79 80 #pragma omp critical (write_to_buffer) 81 { 82 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 83 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), recv_buf+j); 84 #pragma omp flush 85 } 86 } 87 88 MPI_Barrier_local(comm); 89 90 for(int k=1; k<num_ep; k++) 91 { 92 #pragma omp critical (write_to_buffer) 93 { 94 if(my_rank == k) 95 { 96 #pragma omp flush 97 if(op == MPI_SUM) 98 { 99 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<int>()); 100 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 101 } 102 else if(op == MPI_MAX) 103 { 104 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<int>); 105 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 106 } 107 else if(op == MPI_MIN) 108 { 109 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<int>); 110 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 111 } 112 else 113 { 114 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 115 exit(1); 116 } 117 #pragma omp flush 118 } 119 } 120 121 MPI_Barrier_local(comm); 122 } 123 } 124 125 } 126 127 int MPI_Scan_local_float(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 128 { 129 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 130 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 131 132 float *buffer = comm.my_buffer->buf_float; 133 float *send_buf = static_cast<float*>(const_cast<void*>(sendbuf)); 134 float *recv_buf = static_cast<float*>(recvbuf); 135 136 for(int j=0; j<count; j+=BUFFER_SIZE) 137 { 138 if(my_rank == 0) 139 { 140 141 #pragma omp critical (write_to_buffer) 142 { 143 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 144 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), recv_buf+j); 145 #pragma omp flush 146 } 147 } 148 149 MPI_Barrier_local(comm); 150 151 for(int k=1; k<num_ep; k++) 152 { 153 #pragma omp critical (write_to_buffer) 154 { 155 if(my_rank == k) 156 { 157 #pragma omp flush 158 if(op == MPI_SUM) 159 { 160 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<float>()); 161 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 162 } 163 else if(op == MPI_MAX) 164 { 165 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<float>); 166 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 167 168 } 169 else if(op == MPI_MIN) 170 { 171 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<float>); 172 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 173 174 } 175 else 176 { 177 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 178 exit(1); 179 } 180 #pragma omp flush 181 } 182 } 183 184 MPI_Barrier_local(comm); 185 } 186 } 187 } 188 189 int MPI_Scan_local_double(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 190 { 191 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 192 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 193 194 double *buffer = comm.my_buffer->buf_double; 195 double *send_buf = static_cast<double*>(const_cast<void*>(sendbuf)); 196 double *recv_buf = static_cast<double*>(recvbuf); 197 198 for(int j=0; j<count; j+=BUFFER_SIZE) 199 { 200 if(my_rank == 0) 201 { 202 203 #pragma omp critical (write_to_buffer) 204 { 205 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 206 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), recv_buf+j); 207 #pragma omp flush 208 } 209 } 210 211 MPI_Barrier_local(comm); 212 213 for(int k=1; k<num_ep; k++) 214 { 215 #pragma omp critical (write_to_buffer) 216 { 217 if(my_rank == k) 218 { 219 #pragma omp flush 220 if(op == MPI_SUM) 221 { 222 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<double>()); 223 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 224 } 225 else if(op == MPI_MAX) 226 { 227 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<double>); 228 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 229 } 230 else if(op == MPI_MIN) 231 { 232 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<double>); 233 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 234 } 235 else 236 { 237 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 238 exit(1); 239 } 240 #pragma omp flush 241 } 242 } 243 244 MPI_Barrier_local(comm); 245 } 246 } 247 } 248 249 int MPI_Scan_local_long(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 250 { 251 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 252 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 253 254 long *buffer = comm.my_buffer->buf_long; 255 long *send_buf = static_cast<long*>(const_cast<void*>(sendbuf)); 256 long *recv_buf = static_cast<long*>(recvbuf); 257 258 for(int j=0; j<count; j+=BUFFER_SIZE) 259 { 260 if(my_rank == 0) 261 { 262 263 #pragma omp critical (write_to_buffer) 264 { 265 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 266 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), recv_buf+j); 267 #pragma omp flush 268 } 269 } 270 271 MPI_Barrier_local(comm); 272 273 for(int k=1; k<num_ep; k++) 274 { 275 #pragma omp critical (write_to_buffer) 276 { 277 if(my_rank == k) 278 { 279 #pragma omp flush 280 if(op == MPI_SUM) 281 { 282 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<long>()); 283 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 284 } 285 else if(op == MPI_MAX) 286 { 287 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<long>); 288 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 289 } 290 else if(op == MPI_MIN) 291 { 292 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<long>); 293 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 294 } 295 else 296 { 297 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 298 exit(1); 299 } 300 #pragma omp flush 301 } 302 } 303 304 MPI_Barrier_local(comm); 305 } 306 } 307 } 308 309 int MPI_Scan_local_ulong(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 310 { 311 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 312 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 313 314 unsigned long *buffer = comm.my_buffer->buf_ulong; 315 unsigned long *send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf)); 316 unsigned long *recv_buf = static_cast<unsigned long*>(recvbuf); 317 318 for(int j=0; j<count; j+=BUFFER_SIZE) 319 { 320 if(my_rank == 0) 321 { 322 323 #pragma omp critical (write_to_buffer) 324 { 325 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 326 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), recv_buf+j); 327 #pragma omp flush 328 } 329 } 330 331 MPI_Barrier_local(comm); 332 333 for(int k=1; k<num_ep; k++) 334 { 335 #pragma omp critical (write_to_buffer) 336 { 337 if(my_rank == k) 338 { 339 #pragma omp flush 340 if(op == MPI_SUM) 341 { 342 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<unsigned long>()); 343 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 344 } 345 else if(op == MPI_MAX) 346 { 347 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<unsigned long>); 348 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 349 } 350 else if(op == MPI_MIN) 351 { 352 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<unsigned long>); 353 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 354 } 355 else 356 { 357 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 358 exit(1); 359 } 360 #pragma omp flush 361 } 362 } 363 364 MPI_Barrier_local(comm); 365 } 366 } 367 } 368 369 int MPI_Scan_local_char(const void *sendbuf, void *recvbuf, int count, MPI_Op op, MPI_Comm comm) 370 { 371 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 372 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 373 374 char *buffer = comm.my_buffer->buf_char; 375 char *send_buf = static_cast<char*>(const_cast<void*>(sendbuf)); 376 char *recv_buf = static_cast<char*>(recvbuf); 377 378 for(int j=0; j<count; j+=BUFFER_SIZE) 379 { 380 if(my_rank == 0) 381 { 382 383 #pragma omp critical (write_to_buffer) 384 { 385 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), buffer); 386 copy(send_buf+j, send_buf+j+min(BUFFER_SIZE, count-j), recv_buf+j); 387 #pragma omp flush 388 } 389 } 390 391 MPI_Barrier_local(comm); 392 393 for(int k=1; k<num_ep; k++) 394 { 395 #pragma omp critical (write_to_buffer) 396 { 397 if(my_rank == k) 398 { 399 #pragma omp flush 400 if(op == MPI_SUM) 401 { 402 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, std::plus<char>()); 403 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 404 } 405 else if(op == MPI_MAX) 406 { 407 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, max_op<char>); 408 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 409 } 410 else if(op == MPI_MIN) 411 { 412 transform(buffer, buffer+min(BUFFER_SIZE, count-j), send_buf+j, buffer, min_op<char>); 413 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 414 } 415 else 416 { 417 printf("Supported operation: MPI_SUM, MPI_MAX, MPI_MIN\n"); 418 exit(1); 419 } 420 #pragma omp flush 421 } 422 } 423 424 MPI_Barrier_local(comm); 425 } 426 } 135 comm.my_buffer->void_buffer[ep_rank_loc] = const_cast<void*>(sendbuf); 136 memcpy(recvbuf, sendbuf, datasize*count); 137 } 138 139 140 141 MPI_Barrier_local(comm); 142 143 memcpy(recvbuf, comm.my_buffer->void_buffer[0], datasize*count); 144 145 146 if(op == MPI_SUM) 147 { 148 if(datatype == MPI_INT && datasize == sizeof(int)) 149 { 150 for(int i=1; i<ep_rank_loc+1; i++) 151 reduce_sum<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 152 } 153 154 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 155 { 156 for(int i=1; i<ep_rank_loc+1; i++) 157 reduce_sum<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 158 } 159 160 161 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 162 { 163 for(int i=1; i<ep_rank_loc+1; i++) 164 reduce_sum<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 165 } 166 167 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 168 { 169 for(int i=1; i<ep_rank_loc+1; i++) 170 reduce_sum<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 171 } 172 173 else if(datatype == MPI_LONG && datasize == sizeof(long)) 174 { 175 for(int i=1; i<ep_rank_loc+1; i++) 176 reduce_sum<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 177 } 178 179 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 180 { 181 for(int i=1; i<ep_rank_loc+1; i++) 182 reduce_sum<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 183 } 184 185 else printf("datatype Error\n"); 186 187 188 } 189 190 else if(op == MPI_MAX) 191 { 192 if(datatype == MPI_INT && datasize == sizeof(int)) 193 for(int i=1; i<ep_rank_loc+1; i++) 194 reduce_max<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 195 196 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 197 for(int i=1; i<ep_rank_loc+1; i++) 198 reduce_max<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 199 200 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 201 for(int i=1; i<ep_rank_loc+1; i++) 202 reduce_max<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 203 204 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 205 for(int i=1; i<ep_rank_loc+1; i++) 206 reduce_max<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 207 208 else if(datatype == MPI_LONG && datasize == sizeof(long)) 209 for(int i=1; i<ep_rank_loc+1; i++) 210 reduce_max<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 211 212 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 213 for(int i=1; i<ep_rank_loc+1; i++) 214 reduce_max<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 215 216 else printf("datatype Error\n"); 217 } 218 219 else //if(op == MPI_MIN) 220 { 221 if(datatype == MPI_INT && datasize == sizeof(int)) 222 for(int i=1; i<ep_rank_loc+1; i++) 223 reduce_min<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count); 224 225 else if(datatype == MPI_FLOAT && datasize == sizeof(float)) 226 for(int i=1; i<ep_rank_loc+1; i++) 227 reduce_min<float>(static_cast<float*>(comm.my_buffer->void_buffer[i]), static_cast<float*>(recvbuf), count); 228 229 else if(datatype == MPI_DOUBLE && datasize == sizeof(double)) 230 for(int i=1; i<ep_rank_loc+1; i++) 231 reduce_min<double>(static_cast<double*>(comm.my_buffer->void_buffer[i]), static_cast<double*>(recvbuf), count); 232 233 else if(datatype == MPI_CHAR && datasize == sizeof(char)) 234 for(int i=1; i<ep_rank_loc+1; i++) 235 reduce_min<char>(static_cast<char*>(comm.my_buffer->void_buffer[i]), static_cast<char*>(recvbuf), count); 236 237 else if(datatype == MPI_LONG && datasize == sizeof(long)) 238 for(int i=1; i<ep_rank_loc+1; i++) 239 reduce_min<long>(static_cast<long*>(comm.my_buffer->void_buffer[i]), static_cast<long*>(recvbuf), count); 240 241 else if(datatype == MPI_UNSIGNED_LONG && datasize == sizeof(unsigned long)) 242 for(int i=1; i<ep_rank_loc+1; i++) 243 reduce_min<unsigned long>(static_cast<unsigned long*>(comm.my_buffer->void_buffer[i]), static_cast<unsigned long*>(recvbuf), count); 244 245 else printf("datatype Error\n"); 246 } 247 248 MPI_Barrier_local(comm); 249 427 250 } 428 251 … … 432 255 if(!comm.is_ep) 433 256 { 434 435 ::MPI_Scan(sendbuf, recvbuf, count, static_cast< ::MPI_Datatype>(datatype), 436 static_cast< ::MPI_Op>(op), static_cast< ::MPI_Comm>(comm.mpi_comm)); 437 return 0; 438 } 439 440 if(!comm.mpi_comm) return 0; 441 442 int ep_rank, ep_rank_loc, mpi_rank; 443 int ep_size, num_ep, mpi_size; 444 445 446 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 447 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 448 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 449 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 450 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 451 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 452 257 return ::MPI_Scan(sendbuf, recvbuf, count, to_mpi_type(datatype), to_mpi_op(op), to_mpi_comm(comm.mpi_comm)); 258 } 259 260 valid_type(datatype); 261 262 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 263 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 264 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 265 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 266 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 267 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 453 268 454 269 ::MPI_Aint datasize, lb; 455 456 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 457 458 void* local_scan_recvbuf; 459 local_scan_recvbuf = new void*[datasize * count]; 460 461 462 // local scan 463 MPI_Scan_local(sendbuf, recvbuf, count, datatype, op, comm); 464 465 // MPI_scan 466 void* local_sum; 467 void* mpi_scan_recvbuf; 468 469 470 mpi_scan_recvbuf = new void*[datasize*count]; 270 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 271 272 void* tmp_sendbuf; 273 tmp_sendbuf = new void*[datasize * count]; 274 275 int my_src = 0; 276 int my_dst = ep_rank; 277 278 std::vector<int> my_map(mpi_size, 0); 279 280 for(int i=0; i<comm.rank_map->size(); i++) my_map[comm.rank_map->at(i).second]++; 281 282 for(int i=0; i<mpi_rank; i++) my_src += my_map[i]; 283 my_src += ep_rank_loc; 284 285 286 for(int i=0; i<mpi_size; i++) 287 { 288 if(my_dst < my_map[i]) 289 { 290 my_dst = get_ep_rank(comm, my_dst, i); 291 break; 292 } 293 else 294 my_dst -= my_map[i]; 295 } 296 297 //printf("ID = %d : send to %d, recv from %d\n", ep_rank, my_dst, my_src); 298 MPI_Barrier(comm); 299 300 if(my_dst == ep_rank && my_src == ep_rank) memcpy(tmp_sendbuf, sendbuf, datasize*count); 301 302 if(ep_rank != my_dst) 303 { 304 MPI_Request request[2]; 305 MPI_Status status[2]; 306 307 MPI_Isend(sendbuf, count, datatype, my_dst, my_dst, comm, &request[0]); 308 309 MPI_Irecv(tmp_sendbuf, count, datatype, my_src, ep_rank, comm, &request[1]); 310 311 MPI_Waitall(2, request, status); 312 } 313 314 315 void* tmp_recvbuf; 316 tmp_recvbuf = new void*[datasize * count]; 317 318 MPI_Reduce_local(tmp_sendbuf, tmp_recvbuf, count, datatype, op, 0, comm); 471 319 472 320 if(ep_rank_loc == 0) 473 { 474 local_sum = new void*[datasize*count]; 475 } 476 477 478 MPI_Reduce_local(sendbuf, local_sum, count, datatype, op, comm); 479 480 if(ep_rank_loc == 0) 481 { 482 ::MPI_Exscan(local_sum, mpi_scan_recvbuf, count, static_cast< ::MPI_Datatype>(datatype), static_cast< ::MPI_Op>(op), static_cast< ::MPI_Comm>(comm.mpi_comm)); 483 } 484 485 486 if(mpi_rank > 0) 487 { 488 MPI_Bcast_local(mpi_scan_recvbuf, count, datatype, comm); 489 } 490 491 492 if(datatype == MPI_DOUBLE) 493 { 494 double* sum_buf = static_cast<double*>(mpi_scan_recvbuf); 495 double* recv_buf = static_cast<double*>(recvbuf); 496 497 if(mpi_rank != 0) 498 { 499 if(op == MPI_SUM) 500 { 501 for(int i=0; i<count; i++) 502 { 503 recv_buf[i] += sum_buf[i]; 504 } 505 } 506 else if (op == MPI_MAX) 507 { 508 for(int i=0; i<count; i++) 509 { 510 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 511 } 512 } 513 else if(op == MPI_MIN) 514 { 515 for(int i=0; i<count; i++) 516 { 517 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 518 } 519 } 520 else 521 { 522 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 523 exit(1); 524 } 525 } 526 527 delete[] static_cast<double*>(mpi_scan_recvbuf); 528 if(ep_rank_loc == 0) 529 { 530 delete[] static_cast<double*>(local_sum); 531 } 532 } 533 534 else if(datatype == MPI_FLOAT) 535 { 536 float* sum_buf = static_cast<float*>(mpi_scan_recvbuf); 537 float* recv_buf = static_cast<float*>(recvbuf); 538 539 if(mpi_rank != 0) 540 { 541 if(op == MPI_SUM) 542 { 543 for(int i=0; i<count; i++) 544 { 545 recv_buf[i] += sum_buf[i]; 546 } 547 } 548 else if (op == MPI_MAX) 549 { 550 for(int i=0; i<count; i++) 551 { 552 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 553 } 554 } 555 else if(op == MPI_MIN) 556 { 557 for(int i=0; i<count; i++) 558 { 559 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 560 } 561 } 562 else 563 { 564 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 565 exit(1); 566 } 567 } 568 569 delete[] static_cast<float*>(mpi_scan_recvbuf); 570 if(ep_rank_loc == 0) 571 { 572 delete[] static_cast<float*>(local_sum); 573 } 574 } 575 576 else if(datatype == MPI_INT) 577 { 578 int* sum_buf = static_cast<int*>(mpi_scan_recvbuf); 579 int* recv_buf = static_cast<int*>(recvbuf); 580 581 if(mpi_rank != 0) 582 { 583 if(op == MPI_SUM) 584 { 585 for(int i=0; i<count; i++) 586 { 587 recv_buf[i] += sum_buf[i]; 588 } 589 } 590 else if (op == MPI_MAX) 591 { 592 for(int i=0; i<count; i++) 593 { 594 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 595 } 596 } 597 else if(op == MPI_MIN) 598 { 599 for(int i=0; i<count; i++) 600 { 601 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 602 } 603 } 604 else 605 { 606 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 607 exit(1); 608 } 609 } 610 611 delete[] static_cast<int*>(mpi_scan_recvbuf); 612 if(ep_rank_loc == 0) 613 { 614 delete[] static_cast<int*>(local_sum); 615 } 616 } 617 618 else if(datatype == MPI_LONG) 619 { 620 long* sum_buf = static_cast<long*>(mpi_scan_recvbuf); 621 long* recv_buf = static_cast<long*>(recvbuf); 622 623 if(mpi_rank != 0) 624 { 625 if(op == MPI_SUM) 626 { 627 for(int i=0; i<count; i++) 628 { 629 recv_buf[i] += sum_buf[i]; 630 } 631 } 632 else if (op == MPI_MAX) 633 { 634 for(int i=0; i<count; i++) 635 { 636 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 637 } 638 } 639 else if(op == MPI_MIN) 640 { 641 for(int i=0; i<count; i++) 642 { 643 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 644 } 645 } 646 else 647 { 648 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 649 exit(1); 650 } 651 } 652 653 delete[] static_cast<long*>(mpi_scan_recvbuf); 654 if(ep_rank_loc == 0) 655 { 656 delete[] static_cast<long*>(local_sum); 657 } 658 } 659 660 else if(datatype == MPI_UNSIGNED_LONG) 661 { 662 unsigned long* sum_buf = static_cast<unsigned long*>(mpi_scan_recvbuf); 663 unsigned long* recv_buf = static_cast<unsigned long*>(recvbuf); 664 665 if(mpi_rank != 0) 666 { 667 if(op == MPI_SUM) 668 { 669 for(int i=0; i<count; i++) 670 { 671 recv_buf[i] += sum_buf[i]; 672 } 673 } 674 else if (op == MPI_MAX) 675 { 676 for(int i=0; i<count; i++) 677 { 678 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 679 } 680 } 681 else if(op == MPI_MIN) 682 { 683 for(int i=0; i<count; i++) 684 { 685 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 686 } 687 } 688 else 689 { 690 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 691 exit(1); 692 } 693 } 694 695 delete[] static_cast<unsigned long*>(mpi_scan_recvbuf); 696 if(ep_rank_loc == 0) 697 { 698 delete[] static_cast<unsigned long*>(local_sum); 699 } 700 } 701 702 else if(datatype == MPI_CHAR) 703 { 704 char* sum_buf = static_cast<char*>(mpi_scan_recvbuf); 705 char* recv_buf = static_cast<char*>(recvbuf); 706 707 if(mpi_rank != 0) 708 { 709 if(op == MPI_SUM) 710 { 711 for(int i=0; i<count; i++) 712 { 713 recv_buf[i] += sum_buf[i]; 714 } 715 } 716 else if (op == MPI_MAX) 717 { 718 for(int i=0; i<count; i++) 719 { 720 recv_buf[i] = max(recv_buf[i], sum_buf[i]); 721 } 722 } 723 else if(op == MPI_MIN) 724 { 725 for(int i=0; i<count; i++) 726 { 727 recv_buf[i] = min(recv_buf[i], sum_buf[i]); 728 } 729 } 730 else 731 { 732 printf("Support operator for MPI_Scan is MPI_SUM, MPI_MAX, and MPI_MIN\n"); 733 exit(1); 734 } 735 } 736 737 delete[] static_cast<char*>(mpi_scan_recvbuf); 738 if(ep_rank_loc == 0) 739 { 740 delete[] static_cast<char*>(local_sum); 741 } 742 } 743 321 ::MPI_Exscan(MPI_IN_PLACE, tmp_recvbuf, count, to_mpi_type(datatype), to_mpi_op(op), to_mpi_comm(comm.mpi_comm)); 322 323 //printf(" ID=%d : %d %d \n", ep_rank, static_cast<int*>(tmp_recvbuf)[0], static_cast<int*>(tmp_recvbuf)[1]); 324 325 MPI_Scan_local(tmp_sendbuf, tmp_recvbuf, count, datatype, op, comm); 326 327 // printf(" ID=%d : after local tmp_sendbuf = %d %d ; tmp_recvbuf = %d %d \n", ep_rank, static_cast<int*>(tmp_sendbuf)[0], static_cast<int*>(tmp_sendbuf)[1], static_cast<int*>(tmp_recvbuf)[0], static_cast<int*>(tmp_recvbuf)[1]); 328 329 330 331 if(ep_rank != my_src) 332 { 333 MPI_Request request[2]; 334 MPI_Status status[2]; 335 336 MPI_Isend(tmp_recvbuf, count, datatype, my_src, my_src, comm, &request[0]); 337 338 MPI_Irecv(recvbuf, count, datatype, my_dst, ep_rank, comm, &request[1]); 339 340 MPI_Waitall(2, request, status); 341 } 342 343 else memcpy(recvbuf, tmp_recvbuf, datasize*count); 344 345 346 347 348 delete[] tmp_sendbuf; 349 delete[] tmp_recvbuf; 744 350 745 351 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_scatter.cpp
r1134 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 #include "ep_mpi.hpp" 11 12 12 13 using namespace std; … … 15 16 { 16 17 17 int MPI_Scatter_local( const void *sendbuf, int count, MPI_Datatype datatype, void *recvbuf, MPI_Comm comm)18 int MPI_Scatter_local(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int local_root, MPI_Comm comm) 18 19 { 19 if(datatype == MPI_INT) 20 { 21 Debug("datatype is INT\n"); 22 return MPI_Scatter_local_int(sendbuf, count, recvbuf, comm); 23 } 24 else if(datatype == MPI_FLOAT) 25 { 26 Debug("datatype is FLOAT\n"); 27 return MPI_Scatter_local_float(sendbuf, count, recvbuf, comm); 28 } 29 else if(datatype == MPI_DOUBLE) 30 { 31 Debug("datatype is DOUBLE\n"); 32 return MPI_Scatter_local_double(sendbuf, count, recvbuf, comm); 33 } 34 else if(datatype == MPI_LONG) 35 { 36 Debug("datatype is LONG\n"); 37 return MPI_Scatter_local_long(sendbuf, count, recvbuf, comm); 38 } 39 else if(datatype == MPI_UNSIGNED_LONG) 40 { 41 Debug("datatype is uLONG\n"); 42 return MPI_Scatter_local_ulong(sendbuf, count, recvbuf, comm); 43 } 44 else if(datatype == MPI_CHAR) 45 { 46 Debug("datatype is CHAR\n"); 47 return MPI_Scatter_local_char(sendbuf, count, recvbuf, comm); 48 } 49 else 50 { 51 printf("MPI_Scatter Datatype not supported!\n"); 52 exit(0); 53 } 54 } 20 assert(valid_type(sendtype) && valid_type(recvtype)); 21 assert(recvcount == sendcount); 55 22 56 int MPI_Scatter_local_int(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm) 57 { 58 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first; 59 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 23 ::MPI_Aint datasize, lb; 24 ::MPI_Type_get_extent(to_mpi_type(sendtype), &lb, &datasize); 25 26 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 27 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 60 28 61 29 62 int *buffer = comm.my_buffer->buf_int; 63 int *send_buf = static_cast<int*>(const_cast<void*>(sendbuf)); 64 int *recv_buf = static_cast<int*>(recvbuf); 30 if(ep_rank_loc == local_root) 31 comm.my_buffer->void_buffer[local_root] = const_cast<void*>(sendbuf); 65 32 66 for(int k=0; k<num_ep; k++) 67 { 68 for(int j=0; j<count; j+=BUFFER_SIZE) 69 { 70 if(my_rank == 0) 71 { 72 #pragma omp critical (write_to_buffer) 73 { 74 copy(send_buf+k*count+j, send_buf+k*count+j+min(BUFFER_SIZE, count-j), buffer); 75 #pragma omp flush 76 } 77 } 33 MPI_Barrier_local(comm); 78 34 79 MPI_Barrier_local(comm); 35 #pragma omp critical (_scatter) 36 memcpy(recvbuf, comm.my_buffer->void_buffer[local_root]+datasize*ep_rank_loc*sendcount, datasize * recvcount); 37 80 38 81 if(my_rank == k) 82 { 83 #pragma omp critical (read_from_buffer) 84 { 85 #pragma omp flush 86 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j); 87 } 88 } 89 MPI_Barrier_local(comm); 90 } 91 } 39 MPI_Barrier_local(comm); 92 40 } 93 94 int MPI_Scatter_local_float(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)95 {96 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;97 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;98 99 float *buffer = comm.my_buffer->buf_float;100 float *send_buf = static_cast<float*>(const_cast<void*>(sendbuf));101 float *recv_buf = static_cast<float*>(recvbuf);102 103 for(int k=0; k<num_ep; k++)104 {105 for(int j=0; j<count; j+=BUFFER_SIZE)106 {107 if(my_rank == 0)108 {109 #pragma omp critical (write_to_buffer)110 {111 copy(send_buf+k*count+j, send_buf+k*count+j+min(BUFFER_SIZE, count-j), buffer);112 #pragma omp flush113 }114 }115 116 MPI_Barrier_local(comm);117 118 if(my_rank == k)119 {120 #pragma omp critical (read_from_buffer)121 {122 #pragma omp flush123 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);124 }125 }126 MPI_Barrier_local(comm);127 }128 }129 }130 131 int MPI_Scatter_local_double(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)132 {133 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;134 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;135 136 double *buffer = comm.my_buffer->buf_double;137 double *send_buf = static_cast<double*>(const_cast<void*>(sendbuf));138 double *recv_buf = static_cast<double*>(recvbuf);139 140 for(int k=0; k<num_ep; k++)141 {142 for(int j=0; j<count; j+=BUFFER_SIZE)143 {144 if(my_rank == 0)145 {146 #pragma omp critical (write_to_buffer)147 {148 copy(send_buf+k*count+j, send_buf+k*count+j+min(BUFFER_SIZE, count-j), buffer);149 #pragma omp flush150 }151 }152 153 MPI_Barrier_local(comm);154 155 if(my_rank == k)156 {157 #pragma omp critical (read_from_buffer)158 {159 #pragma omp flush160 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);161 }162 }163 MPI_Barrier_local(comm);164 }165 }166 }167 168 int MPI_Scatter_local_long(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)169 {170 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;171 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;172 173 long *buffer = comm.my_buffer->buf_long;174 long *send_buf = static_cast<long*>(const_cast<void*>(sendbuf));175 long *recv_buf = static_cast<long*>(recvbuf);176 177 for(int k=0; k<num_ep; k++)178 {179 for(int j=0; j<count; j+=BUFFER_SIZE)180 {181 if(my_rank == 0)182 {183 #pragma omp critical (write_to_buffer)184 {185 copy(send_buf+k*count+j, send_buf+k*count+j+min(BUFFER_SIZE, count-j), buffer);186 #pragma omp flush187 }188 }189 190 MPI_Barrier_local(comm);191 192 if(my_rank == k)193 {194 #pragma omp critical (read_from_buffer)195 {196 #pragma omp flush197 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);198 }199 }200 MPI_Barrier_local(comm);201 }202 }203 }204 205 206 int MPI_Scatter_local_ulong(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)207 {208 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;209 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;210 211 unsigned long *buffer = comm.my_buffer->buf_ulong;212 unsigned long *send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf));213 unsigned long *recv_buf = static_cast<unsigned long*>(recvbuf);214 215 for(int k=0; k<num_ep; k++)216 {217 for(int j=0; j<count; j+=BUFFER_SIZE)218 {219 if(my_rank == 0)220 {221 #pragma omp critical (write_to_buffer)222 {223 copy(send_buf+k*count+j, send_buf+k*count+j+min(BUFFER_SIZE, count-j), buffer);224 #pragma omp flush225 }226 }227 228 MPI_Barrier_local(comm);229 230 if(my_rank == k)231 {232 #pragma omp critical (read_from_buffer)233 {234 #pragma omp flush235 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);236 }237 }238 MPI_Barrier_local(comm);239 }240 }241 }242 243 244 int MPI_Scatter_local_char(const void *sendbuf, int count, void *recvbuf, MPI_Comm comm)245 {246 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;247 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;248 249 char *buffer = comm.my_buffer->buf_char;250 char *send_buf = static_cast<char*>(const_cast<void*>(sendbuf));251 char *recv_buf = static_cast<char*>(recvbuf);252 253 for(int k=0; k<num_ep; k++)254 {255 for(int j=0; j<count; j+=BUFFER_SIZE)256 {257 if(my_rank == 0)258 {259 #pragma omp critical (write_to_buffer)260 {261 copy(send_buf+k*count+j, send_buf+k*count+j+min(BUFFER_SIZE, count-j), buffer);262 #pragma omp flush263 }264 }265 266 MPI_Barrier_local(comm);267 268 if(my_rank == k)269 {270 #pragma omp critical (read_from_buffer)271 {272 #pragma omp flush273 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);274 }275 }276 MPI_Barrier_local(comm);277 }278 }279 }280 281 282 283 284 41 285 42 int MPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) … … 287 44 if(!comm.is_ep) 288 45 { 289 ::MPI_Scatter(sendbuf, sendcount, static_cast< ::MPI_Datatype>(sendtype), recvbuf, recvcount, static_cast< ::MPI_Datatype>(recvtype), 290 root, static_cast< ::MPI_Comm>(comm.mpi_comm)); 291 return 0; 46 return ::MPI_Scatter(sendbuf, sendcount, to_mpi_type(sendtype), recvbuf, recvcount, to_mpi_type(recvtype), root, to_mpi_comm(comm.mpi_comm)); 292 47 } 48 49 assert(sendcount == recvcount); 293 50 294 if(!comm.mpi_comm) return 0; 295 296 assert(static_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype) && sendcount == recvcount); 51 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 52 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 53 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 54 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 55 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 56 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 297 57 298 58 int root_mpi_rank = comm.rank_map->at(root).second; 299 59 int root_ep_loc = comm.rank_map->at(root).first; 300 60 301 int ep_rank, ep_rank_loc, mpi_rank; 302 int ep_size, num_ep, mpi_size; 303 304 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 305 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 306 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 307 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 308 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 309 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 310 61 bool is_master = (ep_rank_loc==0 && mpi_rank != root_mpi_rank ) || ep_rank == root; 62 bool is_root = ep_rank == root; 311 63 312 64 MPI_Datatype datatype = sendtype; … … 314 66 315 67 ::MPI_Aint datasize, lb; 68 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 69 70 void *tmp_sendbuf; 71 if(is_root) tmp_sendbuf = new void*[ep_size * count * datasize]; 316 72 317 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 73 // reorder tmp_sendbuf 74 std::vector<int>local_ranks(num_ep); 75 std::vector<int>ranks(ep_size); 76 77 if(mpi_rank == root_mpi_rank) MPI_Gather_local(&ep_rank, 1, MPI_INT, local_ranks.data(), root_ep_loc, comm); 78 else MPI_Gather_local(&ep_rank, 1, MPI_INT, local_ranks.data(), 0, comm); 318 79 319 80 320 void *master_sendbuf;321 void *local_recvbuf;81 std::vector<int> recvcounts(mpi_size, 0); 82 std::vector<int> displs(mpi_size, 0); 322 83 323 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) 84 85 if(is_master) 324 86 { 325 if(ep_rank_loc == 0) master_sendbuf = new void*[datasize*count*ep_size]; 87 for(int i=0; i<ep_size; i++) 88 { 89 recvcounts[comm.rank_map->at(i).second]++; 90 } 326 91 327 innode_memcpy(root_ep_loc, sendbuf, 0, master_sendbuf, count*ep_size, datatype, comm); 92 for(int i=1; i<mpi_size; i++) 93 displs[i] = displs[i-1] + recvcounts[i-1]; 94 95 ::MPI_Gatherv(local_ranks.data(), num_ep, MPI_INT, ranks.data(), recvcounts.data(), displs.data(), MPI_INT, root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 328 96 } 329 97 330 98 331 99 332 if(ep_rank_loc == 0) 100 // if(is_root) printf("\nranks = %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", ranks[0], ranks[1], ranks[2], ranks[3], ranks[4], ranks[5], ranks[6], ranks[7], 101 // ranks[8], ranks[9], ranks[10], ranks[11], ranks[12], ranks[13], ranks[14], ranks[15]); 102 103 if(is_root) 104 for(int i=0; i<ep_size; i++) 333 105 { 334 int mpi_sendcnt = count*num_ep; 335 int mpi_scatterv_sendcnt[mpi_size]; 336 int displs[mpi_size]; 106 memcpy(tmp_sendbuf + i*datasize*count, sendbuf + ranks[i]*datasize*count, count*datasize); 107 } 337 108 338 local_recvbuf = new void*[datasize*mpi_sendcnt]; 339 340 ::MPI_Gather(&mpi_sendcnt, 1, MPI_INT_STD, mpi_scatterv_sendcnt, 1, MPI_INT_STD, root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 341 342 displs[0] = 0; 343 for(int i=1; i<mpi_size; i++) 344 displs[i] = displs[i-1] + mpi_scatterv_sendcnt[i-1]; 109 // if(is_root) printf("\ntmp_sendbuf = %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", static_cast<int*>(tmp_sendbuf)[0], static_cast<int*>(tmp_sendbuf)[2], static_cast<int*>(tmp_sendbuf)[4], static_cast<int*>(tmp_sendbuf)[6], 110 // static_cast<int*>(tmp_sendbuf)[8], static_cast<int*>(tmp_sendbuf)[10], static_cast<int*>(tmp_sendbuf)[12], static_cast<int*>(tmp_sendbuf)[14], 111 // static_cast<int*>(tmp_sendbuf)[16], static_cast<int*>(tmp_sendbuf)[18], static_cast<int*>(tmp_sendbuf)[20], static_cast<int*>(tmp_sendbuf)[22], 112 // static_cast<int*>(tmp_sendbuf)[24], static_cast<int*>(tmp_sendbuf)[26], static_cast<int*>(tmp_sendbuf)[28], static_cast<int*>(tmp_sendbuf)[30] ); 345 113 346 114 347 if(root_ep_loc!=0) 348 { 349 ::MPI_Scatterv(master_sendbuf, mpi_scatterv_sendcnt, displs, static_cast< ::MPI_Datatype>(datatype), 350 local_recvbuf, mpi_sendcnt, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 351 } 352 else 353 { 354 ::MPI_Scatterv(sendbuf, mpi_scatterv_sendcnt, displs, static_cast< ::MPI_Datatype>(datatype), 355 local_recvbuf, mpi_sendcnt, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 356 } 115 // MPI_Scatterv from root to masters 116 void* local_recvbuf; 117 if(is_master) local_recvbuf = new void*[datasize * num_ep * count]; 118 119 120 if(is_master) 121 { 122 int local_sendcount = num_ep * count; 123 ::MPI_Gather(&local_sendcount, 1, to_mpi_type(MPI_INT), recvcounts.data(), 1, to_mpi_type(MPI_INT), root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 124 125 if(is_root) for(int i=1; i<mpi_size; i++) displs[i] = displs[i-1] + recvcounts[i-1]; 126 127 ::MPI_Scatterv(tmp_sendbuf, recvcounts.data(), displs.data(), to_mpi_type(sendtype), local_recvbuf, num_ep*count, to_mpi_type(recvtype), root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 128 129 // printf("local_recvbuf = %d %d %d %d\n", static_cast<int*>(local_recvbuf)[0], static_cast<int*>(local_recvbuf)[1], static_cast<int*>(local_recvbuf)[2], static_cast<int*>(local_recvbuf)[3]); 130 // static_cast<int*>(local_recvbuf)[4], static_cast<int*>(local_recvbuf)[5], static_cast<int*>(local_recvbuf)[6], static_cast<int*>(local_recvbuf)[7]); 357 131 } 358 132 359 MPI_Scatter_local(local_recvbuf, count, datatype, recvbuf, comm); 133 if(mpi_rank == root_mpi_rank) MPI_Scatter_local(local_recvbuf, count, sendtype, recvbuf, recvcount, recvtype, root_ep_loc, comm); 134 else MPI_Scatter_local(local_recvbuf, count, sendtype, recvbuf, recvcount, recvtype, 0, comm); 360 135 361 if(ep_rank_loc == 0) 362 { 363 if(datatype == MPI_INT) 364 { 365 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<int*>(master_sendbuf); 366 delete[] static_cast<int*>(local_recvbuf); 367 } 368 else if(datatype == MPI_FLOAT) 369 { 370 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<float*>(master_sendbuf); 371 delete[] static_cast<float*>(local_recvbuf); 372 } 373 else if(datatype == MPI_DOUBLE) 374 { 375 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<double*>(master_sendbuf); 376 delete[] static_cast<double*>(local_recvbuf); 377 } 378 else if(datatype == MPI_LONG) 379 { 380 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<long*>(master_sendbuf); 381 delete[] static_cast<long*>(local_recvbuf); 382 } 383 else if(datatype == MPI_UNSIGNED_LONG) 384 { 385 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<unsigned long*>(master_sendbuf); 386 delete[] static_cast<unsigned long*>(local_recvbuf); 387 } 388 else //if(datatype == MPI_DOUBLE) 389 { 390 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<char*>(master_sendbuf); 391 delete[] static_cast<char*>(local_recvbuf); 392 } 393 } 136 if(is_root) delete[] tmp_sendbuf; 137 if(is_master) delete[] local_recvbuf; 138 } 394 139 395 }396 140 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_scatterv.cpp
r1134 r1287 9 9 #include <mpi.h> 10 10 #include "ep_declaration.hpp" 11 #include "ep_mpi.hpp" 11 12 12 13 using namespace std; … … 15 16 { 16 17 17 int MPI_Scatterv_local(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype datatype, void *recvbuf, MPI_Comm comm) 18 int MPI_Scatterv_local(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, 19 MPI_Datatype recvtype, int local_root, MPI_Comm comm) 18 20 { 19 if(datatype == MPI_INT) 20 { 21 Debug("datatype is INT\n"); 22 return MPI_Scatterv_local_int(sendbuf, sendcounts, displs, recvbuf, comm); 23 } 24 else if(datatype == MPI_FLOAT) 25 { 26 Debug("datatype is FLOAT\n"); 27 return MPI_Scatterv_local_float(sendbuf, sendcounts, displs, recvbuf, comm); 28 } 29 else if(datatype == MPI_DOUBLE) 30 { 31 Debug("datatype is DOUBLE\n"); 32 return MPI_Scatterv_local_double(sendbuf, sendcounts, displs, recvbuf, comm); 33 } 34 else if(datatype == MPI_LONG) 35 { 36 Debug("datatype is LONG\n"); 37 return MPI_Scatterv_local_long(sendbuf, sendcounts, displs, recvbuf, comm); 38 } 39 else if(datatype == MPI_UNSIGNED_LONG) 40 { 41 Debug("datatype is uLONG\n"); 42 return MPI_Scatterv_local_ulong(sendbuf, sendcounts, displs, recvbuf, comm); 43 } 44 else if(datatype == MPI_CHAR) 45 { 46 Debug("datatype is CHAR\n"); 47 return MPI_Scatterv_local_char(sendbuf, sendcounts, displs, recvbuf, comm); 48 } 49 else 50 { 51 printf("MPI_scatterv Datatype not supported!\n"); 52 exit(0); 53 } 21 22 assert(valid_type(sendtype) && valid_type(recvtype)); 23 24 ::MPI_Aint datasize, lb; 25 ::MPI_Type_get_extent(to_mpi_type(sendtype), &lb, &datasize); 26 27 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 28 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 29 30 assert(recvcount == sendcounts[ep_rank_loc]); 31 32 if(ep_rank_loc == local_root) 33 comm.my_buffer->void_buffer[local_root] = const_cast<void*>(sendbuf); 34 35 MPI_Barrier_local(comm); 36 37 #pragma omp critical (_scatterv) 38 memcpy(recvbuf, comm.my_buffer->void_buffer[local_root]+datasize*displs[ep_rank_loc], datasize * recvcount); 39 40 41 MPI_Barrier_local(comm); 54 42 } 55 56 int MPI_Scatterv_local_int(const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm)57 {58 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;59 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;60 61 62 int *buffer = comm.my_buffer->buf_int;63 int *send_buf = static_cast<int*>(const_cast<void*>(sendbuf));64 int *recv_buf = static_cast<int*>(recvbuf);65 66 for(int k=0; k<num_ep; k++)67 {68 int count = sendcounts[k];69 for(int j=0; j<count; j+=BUFFER_SIZE)70 {71 if(my_rank == 0)72 {73 #pragma omp critical (write_to_buffer)74 {75 copy(send_buf+displs[k]+j, send_buf+displs[k]+j+min(BUFFER_SIZE, count-j), buffer);76 #pragma omp flush77 }78 }79 80 MPI_Barrier_local(comm);81 82 if(my_rank == k)83 {84 #pragma omp critical (read_from_buffer)85 {86 #pragma omp flush87 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);88 }89 }90 MPI_Barrier_local(comm);91 }92 }93 }94 95 int MPI_Scatterv_local_float(const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm)96 {97 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;98 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;99 100 101 float *buffer = comm.my_buffer->buf_float;102 float *send_buf = static_cast<float*>(const_cast<void*>(sendbuf));103 float *recv_buf = static_cast<float*>(recvbuf);104 105 for(int k=0; k<num_ep; k++)106 {107 int count = sendcounts[k];108 for(int j=0; j<count; j+=BUFFER_SIZE)109 {110 if(my_rank == 0)111 {112 #pragma omp critical (write_to_buffer)113 {114 copy(send_buf+displs[k]+j, send_buf+displs[k]+j+min(BUFFER_SIZE, count-j), buffer);115 #pragma omp flush116 }117 }118 119 MPI_Barrier_local(comm);120 121 if(my_rank == k)122 {123 #pragma omp critical (read_from_buffer)124 {125 #pragma omp flush126 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);127 }128 }129 MPI_Barrier_local(comm);130 }131 }132 }133 134 int MPI_Scatterv_local_double(const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm)135 {136 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;137 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;138 139 140 double *buffer = comm.my_buffer->buf_double;141 double *send_buf = static_cast<double*>(const_cast<void*>(sendbuf));142 double *recv_buf = static_cast<double*>(recvbuf);143 144 for(int k=0; k<num_ep; k++)145 {146 int count = sendcounts[k];147 for(int j=0; j<count; j+=BUFFER_SIZE)148 {149 if(my_rank == 0)150 {151 #pragma omp critical (write_to_buffer)152 {153 copy(send_buf+displs[k]+j, send_buf+displs[k]+j+min(BUFFER_SIZE, count-j), buffer);154 #pragma omp flush155 }156 }157 158 MPI_Barrier_local(comm);159 160 if(my_rank == k)161 {162 #pragma omp critical (read_from_buffer)163 {164 #pragma omp flush165 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);166 }167 }168 MPI_Barrier_local(comm);169 }170 }171 }172 173 int MPI_Scatterv_local_long(const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm)174 {175 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;176 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;177 178 179 long *buffer = comm.my_buffer->buf_long;180 long *send_buf = static_cast<long*>(const_cast<void*>(sendbuf));181 long *recv_buf = static_cast<long*>(recvbuf);182 183 for(int k=0; k<num_ep; k++)184 {185 int count = sendcounts[k];186 for(int j=0; j<count; j+=BUFFER_SIZE)187 {188 if(my_rank == 0)189 {190 #pragma omp critical (write_to_buffer)191 {192 copy(send_buf+displs[k]+j, send_buf+displs[k]+j+min(BUFFER_SIZE, count-j), buffer);193 #pragma omp flush194 }195 }196 197 MPI_Barrier_local(comm);198 199 if(my_rank == k)200 {201 #pragma omp critical (read_from_buffer)202 {203 #pragma omp flush204 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);205 }206 }207 MPI_Barrier_local(comm);208 }209 }210 }211 212 213 int MPI_Scatterv_local_ulong(const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm)214 {215 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;216 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;217 218 219 unsigned long *buffer = comm.my_buffer->buf_ulong;220 unsigned long *send_buf = static_cast<unsigned long*>(const_cast<void*>(sendbuf));221 unsigned long *recv_buf = static_cast<unsigned long*>(recvbuf);222 223 for(int k=0; k<num_ep; k++)224 {225 int count = sendcounts[k];226 for(int j=0; j<count; j+=BUFFER_SIZE)227 {228 if(my_rank == 0)229 {230 #pragma omp critical (write_to_buffer)231 {232 copy(send_buf+displs[k]+j, send_buf+displs[k]+j+min(BUFFER_SIZE, count-j), buffer);233 #pragma omp flush234 }235 }236 237 MPI_Barrier_local(comm);238 239 if(my_rank == k)240 {241 #pragma omp critical (read_from_buffer)242 {243 #pragma omp flush244 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);245 }246 }247 MPI_Barrier_local(comm);248 }249 }250 }251 252 253 int MPI_Scatterv_local_char(const void *sendbuf, const int sendcounts[], const int displs[], void *recvbuf, MPI_Comm comm)254 {255 int my_rank = comm.ep_comm_ptr->size_rank_info[1].first;256 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second;257 258 259 char *buffer = comm.my_buffer->buf_char;260 char *send_buf = static_cast<char*>(const_cast<void*>(sendbuf));261 char *recv_buf = static_cast<char*>(recvbuf);262 263 for(int k=0; k<num_ep; k++)264 {265 int count = sendcounts[k];266 for(int j=0; j<count; j+=BUFFER_SIZE)267 {268 if(my_rank == 0)269 {270 #pragma omp critical (write_to_buffer)271 {272 copy(send_buf+displs[k]+j, send_buf+displs[k]+j+min(BUFFER_SIZE, count-j), buffer);273 #pragma omp flush274 }275 }276 277 MPI_Barrier_local(comm);278 279 if(my_rank == k)280 {281 #pragma omp critical (read_from_buffer)282 {283 #pragma omp flush284 copy(buffer, buffer+min(BUFFER_SIZE, count-j), recv_buf+j);285 }286 }287 MPI_Barrier_local(comm);288 }289 }290 }291 292 43 293 44 int MPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, … … 296 47 if(!comm.is_ep) 297 48 { 298 ::MPI_Scatterv(sendbuf, sendcounts, displs, static_cast< ::MPI_Datatype>(sendtype), recvbuf, recvcount, 299 static_cast< ::MPI_Datatype>(recvtype), root, static_cast< ::MPI_Comm>(comm.mpi_comm)); 300 return 0; 49 return ::MPI_Scatterv(sendbuf, sendcounts, displs, to_mpi_type(sendtype), recvbuf, recvcount, to_mpi_type(recvtype), root, to_mpi_comm(comm.mpi_comm)); 301 50 } 302 if(!comm.mpi_comm) return 0; 51 52 assert(sendtype == recvtype); 303 53 304 assert(static_cast< ::MPI_Datatype>(sendtype) == static_cast< ::MPI_Datatype>(recvtype)); 305 306 MPI_Datatype datatype = sendtype; 54 int ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 55 int ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 56 int mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 57 int ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 58 int num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 59 int mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 307 60 308 61 int root_mpi_rank = comm.rank_map->at(root).second; 309 62 int root_ep_loc = comm.rank_map->at(root).first; 310 63 311 int ep_rank, ep_rank_loc, mpi_rank;312 int ep_size, num_ep, mpi_size;64 bool is_master = (ep_rank_loc==0 && mpi_rank != root_mpi_rank ) || ep_rank == root; 65 bool is_root = ep_rank == root; 313 66 314 ep_rank = comm.ep_comm_ptr->size_rank_info[0].first; 315 ep_rank_loc = comm.ep_comm_ptr->size_rank_info[1].first; 316 mpi_rank = comm.ep_comm_ptr->size_rank_info[2].first; 317 ep_size = comm.ep_comm_ptr->size_rank_info[0].second; 318 num_ep = comm.ep_comm_ptr->size_rank_info[1].second; 319 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 320 321 if(ep_rank != root) 322 { 323 sendcounts = new int[ep_size]; 324 displs = new int[ep_size]; 325 } 326 327 MPI_Bcast(const_cast<int*>(sendcounts), ep_size, MPI_INT, root, comm); 328 MPI_Bcast(const_cast<int*>(displs), ep_size, MPI_INT, root, comm); 329 330 67 MPI_Datatype datatype = sendtype; 331 68 int count = recvcount; 332 69 333 70 ::MPI_Aint datasize, lb; 71 ::MPI_Type_get_extent(to_mpi_type(datatype), &lb, &datasize); 72 73 void *tmp_sendbuf; 74 if(is_root) tmp_sendbuf = new void*[ep_size * count * datasize]; 334 75 335 ::MPI_Type_get_extent(static_cast< ::MPI_Datatype>(datatype), &lb, &datasize); 76 // reorder tmp_sendbuf 77 std::vector<int>local_ranks(num_ep); 78 std::vector<int>ranks(ep_size); 336 79 337 assert(accumulate(sendcounts, sendcounts+ep_size-1, 0) == displs[ep_size-1]); // Only for contunuous gather. 80 if(mpi_rank == root_mpi_rank) MPI_Gather_local(&ep_rank, 1, MPI_INT, local_ranks.data(), root_ep_loc, comm); 81 else MPI_Gather_local(&ep_rank, 1, MPI_INT, local_ranks.data(), 0, comm); 338 82 339 83 340 void *master_sendbuf;341 void *local_recvbuf;84 std::vector<int> recvcounts(mpi_size, 0); 85 std::vector<int> my_displs(mpi_size, 0); 342 86 343 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) 87 88 if(is_master) 344 89 { 345 int count_sum = accumulate(sendcounts, sendcounts+ep_size, 0); 346 if(ep_rank_loc == 0) master_sendbuf = new void*[datasize*count_sum]; 90 for(int i=0; i<ep_size; i++) 91 { 92 recvcounts[comm.rank_map->at(i).second]++; 93 } 347 94 348 innode_memcpy(root_ep_loc, sendbuf, 0, master_sendbuf, count_sum, datatype, comm); 95 for(int i=1; i<mpi_size; i++) 96 my_displs[i] = my_displs[i-1] + recvcounts[i-1]; 97 98 ::MPI_Gatherv(local_ranks.data(), num_ep, MPI_INT, ranks.data(), recvcounts.data(), my_displs.data(), MPI_INT, root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 349 99 } 350 100 351 101 102 if(is_root) 103 { 104 int local_displs = 0; 105 for(int i=0; i<ep_size; i++) 106 { 107 //printf("i=%d : start from %d, src displs = %d, count = %d\n ", i, local_displs/datasize, displs[ranks[i]], sendcounts[ranks[i]]); 108 memcpy(tmp_sendbuf+local_displs, sendbuf + displs[ranks[i]]*datasize, sendcounts[ranks[i]]*datasize); 109 local_displs += sendcounts[ranks[i]]*datasize; 110 } 111 112 //for(int i=0; i<ep_size*2; i++) printf("%d\t", static_cast<int*>(const_cast<void*>(tmp_sendbuf))[i]); 113 } 352 114 353 if(ep_rank_loc == 0) 115 // MPI_Scatterv from root to masters 116 117 void* local_sendbuf; 118 int local_sendcount; 119 if(mpi_rank == root_mpi_rank) MPI_Reduce_local(&recvcount, &local_sendcount, 1, MPI_INT, MPI_SUM, root_ep_loc, comm); 120 else MPI_Reduce_local(&recvcount, &local_sendcount, 1, MPI_INT, MPI_SUM, 0, comm); 121 122 if(is_master) 354 123 { 355 int mpi_sendcnt = accumulate(sendcounts+ep_rank, sendcounts+ep_rank+num_ep, 0);356 int mpi_scatterv_sendcnt[mpi_size];357 int mpi_displs[mpi_size];124 local_sendbuf = new void*[datasize * local_sendcount]; 125 126 ::MPI_Gather(&local_sendcount, 1, to_mpi_type(MPI_INT), recvcounts.data(), 1, to_mpi_type(MPI_INT), root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 358 127 359 local_recvbuf = new void*[datasize*mpi_sendcnt];128 if(is_root) for(int i=1; i<mpi_size; i++) my_displs[i] = my_displs[i-1] + recvcounts[i-1]; 360 129 361 ::MPI_ Gather(&mpi_sendcnt, 1, MPI_INT_STD, mpi_scatterv_sendcnt, 1, MPI_INT_STD, root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm));130 ::MPI_Scatterv(tmp_sendbuf, recvcounts.data(), my_displs.data(), to_mpi_type(sendtype), local_sendbuf, num_ep*count, to_mpi_type(recvtype), root_mpi_rank, to_mpi_comm(comm.mpi_comm)); 362 131 363 mpi_displs[0] = displs[0]; 364 for(int i=1; i<mpi_size; i++) 365 mpi_displs[i] = mpi_displs[i-1] + mpi_scatterv_sendcnt[i-1]; 132 // printf("my_displs = %d %d %d %d\n", my_displs[0], my_displs[1], my_displs[2], my_displs[3] ); 133 134 // printf("%d %d %d %d %d %d %d %d\n", static_cast<int*>(local_sendbuf)[0], static_cast<int*>(local_sendbuf)[1], static_cast<int*>(local_sendbuf)[2], static_cast<int*>(local_sendbuf)[3], 135 // static_cast<int*>(local_sendbuf)[4], static_cast<int*>(local_sendbuf)[5], static_cast<int*>(local_sendbuf)[6], static_cast<int*>(local_sendbuf)[7]); 136 } 137 138 std::vector<int>local_sendcounts(num_ep, 0); 139 std::vector<int>local_displs(num_ep, 0); 140 141 MPI_Gather_local(&recvcount, 1, MPI_INT, local_sendcounts.data(), 0, comm); 142 MPI_Bcast_local(local_sendcounts.data(), num_ep, MPI_INT, 0, comm); 143 for(int i=1; i<num_ep; i++) 144 local_displs[i] = local_displs[i-1] + local_sendcounts[i-1]; 145 146 147 if(mpi_rank == root_mpi_rank) MPI_Scatterv_local(local_sendbuf, local_sendcounts.data(), local_displs.data(), sendtype, recvbuf, recvcount, recvtype, root_ep_loc, comm); 148 else MPI_Scatterv_local(local_sendbuf, local_sendcounts.data(), local_displs.data(), sendtype, recvbuf, recvcount, recvtype, 0, comm); 366 149 367 150 368 if(root_ep_loc!=0) 369 { 370 ::MPI_Scatterv(master_sendbuf, mpi_scatterv_sendcnt, mpi_displs, static_cast< ::MPI_Datatype>(datatype), 371 local_recvbuf, mpi_sendcnt, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 372 } 373 else 374 { 375 ::MPI_Scatterv(sendbuf, mpi_scatterv_sendcnt, mpi_displs, static_cast< ::MPI_Datatype>(datatype), 376 local_recvbuf, mpi_sendcnt, static_cast< ::MPI_Datatype>(datatype), root_mpi_rank, static_cast< ::MPI_Comm>(comm.mpi_comm)); 377 } 378 } 151 if(is_root) delete[] tmp_sendbuf; 152 if(is_master) delete[] local_sendbuf; 153 } 379 154 380 int local_displs[num_ep];381 local_displs[0] = 0;382 for(int i=1; i<num_ep; i++)383 {384 local_displs[i] = displs[ep_rank-ep_rank_loc+i]-displs[ep_rank-ep_rank_loc];385 }386 155 387 MPI_Scatterv_local(local_recvbuf, sendcounts+ep_rank-ep_rank_loc, local_displs, datatype, recvbuf, comm);388 389 if(ep_rank_loc == 0)390 {391 if(datatype == MPI_INT)392 {393 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<int*>(master_sendbuf);394 delete[] static_cast<int*>(local_recvbuf);395 }396 else if(datatype == MPI_FLOAT)397 {398 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<float*>(master_sendbuf);399 delete[] static_cast<float*>(local_recvbuf);400 }401 else if(datatype == MPI_DOUBLE)402 {403 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<double*>(master_sendbuf);404 delete[] static_cast<double*>(local_recvbuf);405 }406 else if(datatype == MPI_LONG)407 {408 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<long*>(master_sendbuf);409 delete[] static_cast<long*>(local_recvbuf);410 }411 else if(datatype == MPI_UNSIGNED_LONG)412 {413 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<unsigned long*>(master_sendbuf);414 delete[] static_cast<unsigned long*>(local_recvbuf);415 }416 else // if(datatype == MPI_DOUBLE)417 {418 if(root_ep_loc!=0 && mpi_rank == root_mpi_rank) delete[] static_cast<char*>(master_sendbuf);419 delete[] static_cast<char*>(local_recvbuf);420 }421 }422 else423 {424 delete[] sendcounts;425 delete[] displs;426 }427 428 }429 156 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_send.cpp
r1196 r1287 51 51 } 52 52 53 int MPI_Bsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) 54 { 55 if(!comm.is_ep) 56 { 57 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm > (comm.mpi_comm); 58 ::MPI_Bsend(buf, count, static_cast< ::MPI_Datatype>(datatype), dest, tag, mpi_comm); 59 return 0; 60 } 61 62 MPI_Request request; 63 MPI_Status status; 64 //MPI_Ibsend(buf, count, datatype, dest, tag, comm, &request); 65 MPI_Wait(&request, &status); 66 67 //check_sum_send(buf, count, datatype, dest, tag, comm); 68 69 return 0; 70 } 71 53 72 54 73 55 … … 187 169 } 188 170 189 int MPI_Ibsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)190 {191 Debug("\nMPI_Isend with EP\n");192 int src_rank;193 MPI_Comm_rank(comm, &src_rank);194 195 196 197 if(!comm.is_ep)198 {199 ::MPI_Request mpi_request;200 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm > (comm.mpi_comm);201 ::MPI_Ibsend(buf, count, static_cast< ::MPI_Datatype> (datatype), dest, tag, mpi_comm, &mpi_request);202 203 request->mpi_request = mpi_request;204 205 request->ep_src = src_rank;206 request->ep_tag = tag;207 request->ep_datatype = datatype;208 request->type = 1;209 request->comm = comm;210 211 return 0;212 }213 214 if(comm.is_intercomm) return 0;//MPI_Ibsend_intercomm(buf, count, datatype, dest, tag, comm, request);215 216 // EP intracomm217 218 //check_sum_send(buf, count, datatype, dest, tag, comm, 1);219 220 int ep_src_loc = comm.ep_comm_ptr->size_rank_info[1].first;221 int ep_dest_loc = comm.ep_comm_ptr->comm_list->rank_map->at(dest).first;222 int mpi_tag = tag_combine(tag, ep_src_loc, ep_dest_loc);223 int mpi_dest = comm.ep_comm_ptr->comm_list->rank_map->at(dest).second;224 225 request->ep_src = src_rank;226 request->ep_tag = tag;227 request->ep_datatype = datatype;228 229 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm> (comm.mpi_comm);230 ::MPI_Request mpi_request;231 232 ::MPI_Ibsend(buf, count, static_cast< ::MPI_Datatype>(datatype), mpi_dest, mpi_tag, mpi_comm, &mpi_request);233 234 request->mpi_request = mpi_request;235 request->type = 1; // used in wait236 request->comm = comm;237 request->buf = const_cast<void*>(buf);238 239 //Message_Check(comm);240 241 return 0;242 }243 171 244 172 … … 256 184 int src_comm_label; 257 185 258 for(int i=0; i<comm.ep_comm_ptr->intercomm->local_rank_map->size(); i++) 259 { 260 if(comm.ep_comm_ptr->intercomm->local_rank_map->at(i).first == src_ep_rank) 261 { 262 src_comm_label = comm.ep_comm_ptr->intercomm->local_rank_map->at(i).second; 263 break; 264 } 265 } 186 src_comm_label = comm.ep_comm_ptr->intercomm->local_rank_map->at(src_ep_rank).second; 187 188 189 266 190 267 191 //Message_Check(comm); … … 389 313 } 390 314 391 int MPI_Ibsend_intercomm(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)392 {393 printf("MPI_Ibsend with intercomm not yet implemented\n");394 MPI_Abort(comm, 0);395 //check_sum_send(buf, count, datatype, dest, tag, comm, 1);396 397 int dest_remote_ep_rank = comm.ep_comm_ptr->intercomm->remote_rank_map->at(dest).first;398 int dest_remote_comm_label = comm.ep_comm_ptr->intercomm->remote_rank_map->at(dest).second;399 400 int src_ep_rank = comm.ep_comm_ptr->intercomm->size_rank_info[0].first;401 int src_comm_label;402 403 for(int i=0; i<comm.ep_comm_ptr->intercomm->local_rank_map->size(); i++)404 {405 if(comm.ep_comm_ptr->intercomm->local_rank_map->at(i).first == src_ep_rank)406 {407 src_comm_label = comm.ep_comm_ptr->intercomm->local_rank_map->at(i).second;408 break;409 }410 }411 412 //Message_Check(comm);413 414 415 if(dest_remote_comm_label == src_comm_label) // mpi_dest differs416 {417 int inter_src = comm.ep_comm_ptr->intercomm->local_rank_map->at(src_ep_rank).first;418 int ep_src_loc = comm.rank_map->at(inter_src).first;419 int ep_dest_loc = comm.rank_map->at(dest_remote_ep_rank).first;420 int mpi_dest = comm.rank_map->at(dest_remote_ep_rank).second;421 int mpi_tag = tag_combine(tag, ep_src_loc, ep_dest_loc);422 423 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm > (comm.mpi_comm);424 ::MPI_Request mpi_request;425 426 ::MPI_Isend(buf, count, static_cast< ::MPI_Datatype >(datatype), mpi_dest, mpi_tag, mpi_comm, &mpi_request);427 428 request->mpi_request = mpi_request;429 request->type = 1; // used in wait430 request->comm = comm;431 432 request->ep_src = src_ep_rank;433 request->ep_tag = tag;434 request->ep_datatype = datatype;435 }436 437 else // dest_remote_comm_label != src_comm_label438 {439 int inter_src = comm.ep_comm_ptr->intercomm->local_rank_map->at(src_ep_rank).first;440 int ep_src_loc = comm.rank_map->at(inter_src).first;441 int ep_dest_loc = comm.ep_comm_ptr->intercomm->intercomm_rank_map->at(dest_remote_ep_rank).first;442 int mpi_dest = comm.ep_comm_ptr->intercomm->intercomm_rank_map->at(dest_remote_ep_rank).second;443 int mpi_tag = tag_combine(tag, ep_src_loc, ep_dest_loc);444 445 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm >(comm.ep_comm_ptr->intercomm->mpi_inter_comm);446 ::MPI_Request mpi_request;447 448 ::MPI_Isend(buf, count, static_cast< ::MPI_Datatype >(datatype), mpi_dest, mpi_tag, mpi_comm, &mpi_request);449 450 request->mpi_request = mpi_request;451 request->type = 1; // used in wait452 request->comm = comm;453 454 request->ep_src = src_ep_rank;455 request->ep_tag = tag;456 request->ep_datatype = datatype;457 }458 459 return 0;460 461 }462 315 } 463 316 -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_size.cpp
r1134 r1287 31 31 Debug("Calling EP_Comm_size\n"); 32 32 33 if(comm.mpi_comm != MPI_COMM_NULL_STD)33 if(comm.mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 34 34 { 35 35 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm > (comm.mpi_comm); … … 54 54 if(!comm.is_ep) 55 55 { 56 if(comm.mpi_comm != MPI_COMM_NULL_STD)56 if(comm.mpi_comm != static_cast< ::MPI_Comm>(MPI_COMM_NULL.mpi_comm)) 57 57 { 58 58 ::MPI_Comm mpi_comm = static_cast< ::MPI_Comm > (comm.mpi_comm); -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_split.cpp
r1134 r1287 45 45 mpi_size = comm.ep_comm_ptr->size_rank_info[2].second; 46 46 47 48 49 50 47 int num_color = 0; 51 48 … … 59 56 vector<int> all_color_loc(num_ep); 60 57 61 MPI_Gather_local(&color, 1, MPI_INT, all_color_loc.data(), comm);62 MPI_Bcast_local(all_color_loc.data(), num_ep, MPI_INT, comm);58 MPI_Gather_local(&color, 1, MPI_INT, all_color_loc.data(), 0, comm); 59 MPI_Bcast_local(all_color_loc.data(), num_ep, MPI_INT, 0, comm); 63 60 64 61 … … 100 97 vector<int> key_loc(num_ep); 101 98 102 MPI_Gather_local(&key, 1, MPI_INT, key_loc.data(), comm);103 MPI_Bcast_local(key_loc.data(), num_ep, MPI_INT, comm);99 MPI_Gather_local(&key, 1, MPI_INT, key_loc.data(), 0, comm); 100 MPI_Bcast_local(key_loc.data(), num_ep, MPI_INT, 0, comm); 104 101 105 102 for(int i=0; i<num_ep; i++) … … 167 164 { 168 165 *newcomm = comm.ep_comm_ptr->comm_list->mem_bridge[new_ep_rank_loc]; 169 // newcomm = &(comm.ep_comm_ptr->comm_list->mem_bridge[new_ep_rank_loc]);170 166 (*newcomm).ep_comm_ptr->comm_label = color; 171 167 } -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_test.cpp
r1220 r1287 24 24 if(request->type == 1) // isend 25 25 { 26 ::MPI_Request mpi_request = static_cast< ::MPI_Request >(request->mpi_request);27 26 ::MPI_Status mpi_status; 28 ::MPI_Test(& mpi_request, flag, &mpi_status);27 ::MPI_Test(&(request->mpi_request), flag, &mpi_status); 29 28 30 29 if(*flag) … … 49 48 if(request->type == 3) // imrecv 50 49 { 51 ::MPI_Request *mpi_request = static_cast< ::MPI_Request* >(&(request->mpi_request));52 50 ::MPI_Status mpi_status; 53 51 54 ::MPI_Test( mpi_request, flag, &mpi_status);52 ::MPI_Test(&(request->mpi_request), flag, &mpi_status); 55 53 56 54 … … 68 66 return 0; 69 67 } 70 68 71 69 } 72 70 -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_type.hpp
r1220 r1287 16 16 #include <assert.h> 17 17 #include <math.h> 18 #include <string.h> 18 19 19 20 #ifdef _Debug … … 247 248 struct BUFFER 248 249 { 249 double *buf_double; 250 float *buf_float; 251 int *buf_int; 252 long *buf_long; 253 unsigned long *buf_ulong; 254 char *buf_char; 250 void * void_buffer[12]; 255 251 }; 256 252 … … 276 272 277 273 MPI_Comm *mem_bridge; 274 278 275 279 276 #ifdef _intelmpi -
XIOS/dev/branch_openmp/extern/src_ep_dev/ep_wait.cpp
r1220 r1287 18 18 19 19 namespace ep_lib 20 { 20 { 21 21 22 22 int MPI_Wait(MPI_Request *request, MPI_Status *status) … … 32 32 33 33 34 ::MPI_Request mpi_request = static_cast< ::MPI_Request >(request->mpi_request);35 34 ::MPI_Status mpi_status; 36 ::MPI_Wait(& mpi_request, &mpi_status);35 ::MPI_Wait(&(request->mpi_request), &mpi_status); 37 36 38 request->mpi_request = mpi_request;39 37 40 38 status->mpi_status = &mpi_status; -
XIOS/dev/branch_openmp/src/client_server_mapping.cpp
r1220 r1287 65 65 66 66 67 for(int i=0; i<nbClient; i++)68 printf("MPI_Allgather : recvCount[%d] = %d\n", i, recvCount[i]);67 // for(int i=0; i<nbClient; i++) 68 // printf("MPI_Allgather : recvCount[%d] = %d\n", i, recvCount[i]); 69 69 70 70 displ[0]=0 ; … … 76 76 MPI_Allgatherv(sendBuff,nbConnectedServer,MPI_INT,recvBuff,recvCount,displ,MPI_INT,clientIntraComm) ; 77 77 78 for(int i=0; i<recvSize; i++)79 printf("MPI_Allgatherv : recvBuff[%d] = %d\n", i, recvBuff[i]);78 // for(int i=0; i<recvSize; i++) 79 // printf("MPI_Allgatherv : recvBuff[%d] = %d\n", i, recvBuff[i]); 80 80 81 81 -
XIOS/dev/branch_openmp/src/data_output.hpp
r1096 r1287 59 59 virtual void writeTimeDimension_(void) = 0; 60 60 virtual void writeTimeAxis_ (CField* field, 61 const shared_ptr<CCalendar> cal) = 0;61 const boost::shared_ptr<CCalendar> cal) = 0; 62 62 63 63 /// Propriétés protégées /// -
XIOS/dev/branch_openmp/src/filter/spatial_transform_filter.cpp
r1220 r1287 53 53 CSpatialTransformFilterEngine* spaceFilter = static_cast<CSpatialTransformFilterEngine*>(engine); 54 54 CDataPacketPtr outputPacket = spaceFilter->applyFilter(data, outputDefaultValue); 55 printf("spalceFilter applied\n");56 55 if (outputPacket) 57 56 onOutputReady(outputPacket); -
XIOS/dev/branch_openmp/src/group_factory_decl.cpp
r976 r1287 5 5 { 6 6 # define macro(U) \ 7 template void CGroupFactory::AddGroup<U>( shared_ptr<U> pgroup,shared_ptr<U> cgroup); \8 template void CGroupFactory::AddChild<U>( shared_ptr<U> group,shared_ptr<U::RelChild> child); \9 template shared_ptr<U> CGroupFactory::GetGroup<U>(shared_ptr<U> group, const StdString & id); \10 template shared_ptr<U::RelChild> CGroupFactory::GetChild<U>(shared_ptr<U> group, const StdString & id); \11 template int CGroupFactory::GetGroupNum<U>( shared_ptr<U> group); \12 template int CGroupFactory::GetGroupIdNum<U>( shared_ptr<U> group); \13 template int CGroupFactory::GetChildNum<U>( shared_ptr<U> group); \7 template void CGroupFactory::AddGroup<U>(boost::shared_ptr<U> pgroup,boost::shared_ptr<U> cgroup); \ 8 template void CGroupFactory::AddChild<U>(boost::shared_ptr<U> group, boost::shared_ptr<U::RelChild> child); \ 9 template boost::shared_ptr<U> CGroupFactory::GetGroup<U>(boost::shared_ptr<U> group, const StdString & id); \ 10 template boost::shared_ptr<U::RelChild> CGroupFactory::GetChild<U>(boost::shared_ptr<U> group, const StdString & id); \ 11 template int CGroupFactory::GetGroupNum<U>(boost::shared_ptr<U> group); \ 12 template int CGroupFactory::GetGroupIdNum<U>(boost::shared_ptr<U> group); \ 13 template int CGroupFactory::GetChildNum<U>(boost::shared_ptr<U> group); \ 14 14 template int CGroupFactory::GetChildIdNum<U>(boost::shared_ptr<U> group); \ 15 template bool CGroupFactory::HasGroup<U>( shared_ptr<U> group, const StdString & id); \15 template bool CGroupFactory::HasGroup<U>(boost::shared_ptr<U> group, const StdString & id); \ 16 16 template bool CGroupFactory::HasChild<U>(boost::shared_ptr<U> group, const StdString & id); \ 17 template shared_ptr<U> CGroupFactory::CreateGroup<U>(shared_ptr<U> group, const StdString & id ); \18 template shared_ptr<U::RelChild> CGroupFactory::CreateChild<U>(shared_ptr<U> group, const StdString & id);17 template boost::shared_ptr<U> CGroupFactory::CreateGroup<U>(boost::shared_ptr<U> group, const StdString & id ); \ 18 template boost::shared_ptr<U::RelChild> CGroupFactory::CreateChild<U>(boost::shared_ptr<U> group, const StdString & id); 19 19 20 20 macro(CFieldGroup) -
XIOS/dev/branch_openmp/src/io/inetcdf4.cpp
r1172 r1287 23 23 24 24 mpi = comm && !multifile; 25 MPI_Info m_info = MPI_INFO_NULL _STD;25 MPI_Info m_info = MPI_INFO_NULL.mpi_info; 26 26 27 27 // The file format will be detected automatically by NetCDF, it is safe to always set NC_MPIIO -
XIOS/dev/branch_openmp/src/io/onetcdf4.cpp
r1205 r1287 56 56 CTimer::get("Files : create").resume(); 57 57 if (wmpi) 58 CNetCdfInterface::createPar(filename, mode, *comm, MPI_INFO_NULL _STD, this->ncidp);58 CNetCdfInterface::createPar(filename, mode, *comm, MPI_INFO_NULL.mpi_info, this->ncidp); 59 59 else 60 60 CNetCdfInterface::create(filename, mode, this->ncidp); … … 68 68 CTimer::get("Files : open").resume(); 69 69 if (wmpi) 70 CNetCdfInterface::openPar(filename, mode, *comm, MPI_INFO_NULL _STD, this->ncidp);70 CNetCdfInterface::openPar(filename, mode, *comm, MPI_INFO_NULL.mpi_info, this->ncidp); 71 71 else 72 72 CNetCdfInterface::open(filename, mode, this->ncidp); -
XIOS/dev/branch_openmp/src/node/context.cpp
r1205 r1287 21 21 22 22 //shared_ptr<CContextGroup> CContext::root; 23 shared_ptr<CContextGroup> * CContext::root_ptr = 0;23 boost::shared_ptr<CContextGroup> * CContext::root_ptr = 0; 24 24 25 25 /// ////////////////////// Dfinitions ////////////////////// /// … … 63 63 if(root_ptr == 0) //root_ptr = new shared_ptr<CContextGroup>; 64 64 // if (root_ptr->get()==NULL) 65 root_ptr = new shared_ptr<CContextGroup>(new CContextGroup(xml::CXMLNode::GetRootName()));65 root_ptr = new boost::shared_ptr<CContextGroup>(new CContextGroup(xml::CXMLNode::GetRootName())); 66 66 return root_ptr->get(); 67 67 } -
XIOS/dev/branch_openmp/src/node/context.hpp
r1134 r1287 208 208 //static shared_ptr<CContextGroup> root; 209 209 210 static shared_ptr<CContextGroup> *root_ptr;210 static boost::shared_ptr<CContextGroup> *root_ptr; 211 211 #pragma omp threadprivate(root_ptr) 212 212 -
XIOS/dev/branch_openmp/src/node/file.cpp
r1205 r1287 482 482 if (isOpen) data_out->closeFile(); 483 483 484 data_out = shared_ptr<CDataOutput>(new CNc4DataOutput(this, oss.str(), append, useClassicFormat, useCFConvention,484 data_out = boost::shared_ptr<CDataOutput>(new CNc4DataOutput(this, oss.str(), append, useClassicFormat, useCFConvention, 485 485 static_cast< ::MPI_Comm >(fileComm.mpi_comm), multifile, isCollective, time_counter_name)); 486 486 isOpen = true; … … 600 600 multifile = true; 601 601 if (isOpen) data_out->closeFile(); 602 if (time_counter_name.isEmpty()) data_in = shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), static_cast< ::MPI_Comm >(fileComm.mpi_comm), multifile, isCollective));603 else data_in = shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), static_cast< ::MPI_Comm >(fileComm.mpi_comm), multifile, isCollective, time_counter_name));602 if (time_counter_name.isEmpty()) data_in = boost::shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), static_cast< ::MPI_Comm >(fileComm.mpi_comm), multifile, isCollective)); 603 else data_in = boost::shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), static_cast< ::MPI_Comm >(fileComm.mpi_comm), multifile, isCollective, time_counter_name)); 604 604 isOpen = true; 605 605 #else 606 606 if (isOpen) data_out->closeFile(); 607 if (time_counter_name.isEmpty()) data_in = shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), static_cast< ::MPI_Comm >(fileComm.mpi_comm), multifile, isCollective));608 else data_in = shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), static_cast< ::MPI_Comm >(fileComm.mpi_comm), multifile, isCollective, time_counter_name));607 if (time_counter_name.isEmpty()) data_in = boost::shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), static_cast< ::MPI_Comm >(fileComm.mpi_comm), multifile, isCollective)); 608 else data_in = boost::shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), static_cast< ::MPI_Comm >(fileComm.mpi_comm), multifile, isCollective, time_counter_name)); 609 609 isOpen = true; 610 610 #endif -
XIOS/dev/branch_openmp/src/object_factory_decl.cpp
r976 r1287 5 5 { 6 6 #define macro(U) \ 7 template shared_ptr<U> CObjectFactory::GetObject<U>(const StdString& id); \8 template shared_ptr<U> CObjectFactory::GetObject<U>(const StdString& context,const StdString& id); \9 template shared_ptr<U> CObjectFactory::GetObject<U>(const U* const object); \7 template boost::shared_ptr<U> CObjectFactory::GetObject<U>(const StdString& id); \ 8 template boost::shared_ptr<U> CObjectFactory::GetObject<U>(const StdString& context,const StdString& id); \ 9 template boost::shared_ptr<U> CObjectFactory::GetObject<U>(const U* const object); \ 10 10 template int CObjectFactory::GetObjectNum<U>(void); \ 11 11 template int CObjectFactory::GetObjectIdNum<U>(void); \ 12 template const std::vector< shared_ptr<U> >& CObjectFactory::GetObjectVector<U>(const StdString& context ); \12 template const std::vector<boost::shared_ptr<U> >& CObjectFactory::GetObjectVector<U>(const StdString& context ); \ 13 13 template bool CObjectFactory::HasObject<U>(const StdString& id); \ 14 14 template bool CObjectFactory::HasObject<U>(const StdString& context,const StdString& id); \ -
XIOS/dev/branch_openmp/src/object_template.hpp
r1134 r1287 77 77 static T* get(const string& contextId, const string& id) ; 78 78 T* get(void) ; 79 shared_ptr<T> getShared(void) ;80 static shared_ptr<T> getShared(const T* ptr) ;79 boost::shared_ptr<T> getShared(void) ; 80 static boost::shared_ptr<T> getShared(const T* ptr) ; 81 81 82 82 static T* create(const string& id=string("")) ; -
XIOS/dev/branch_openmp/src/object_template_impl.hpp
r1134 r1287 321 321 322 322 template <typename T> 323 shared_ptr<T> CObjectTemplate<T>::getShared(const T* ptr)323 boost::shared_ptr<T> CObjectTemplate<T>::getShared(const T* ptr) 324 324 { 325 325 return CObjectFactory::GetObject<T>(ptr); … … 327 327 328 328 template <typename T> 329 shared_ptr<T> CObjectTemplate<T>::getShared(void)329 boost::shared_ptr<T> CObjectTemplate<T>::getShared(void) 330 330 { 331 331 return CObjectFactory::GetObject<T>((T*)this); … … 335 335 const vector<T*> CObjectTemplate<T>::getAll() 336 336 { 337 const vector< shared_ptr<T> >& shared_vect= CObjectFactory::GetObjectVector<T>();337 const vector< boost::shared_ptr<T> >& shared_vect= CObjectFactory::GetObjectVector<T>(); 338 338 vector<T*> vect; 339 339 340 typename vector< shared_ptr<T> >::const_iterator it;340 typename vector<boost::shared_ptr<T> >::const_iterator it; 341 341 for(it=shared_vect.begin();it!=shared_vect.end();++it) vect.push_back(it->get()); 342 342 return vect; … … 346 346 const vector<T*> CObjectTemplate<T>::getAll(const string & id) 347 347 { 348 const vector< shared_ptr<T> >& shared_vect= CObjectFactory::GetObjectVector<T>(id);348 const vector< boost::shared_ptr<T> >& shared_vect= CObjectFactory::GetObjectVector<T>(id); 349 349 vector<T*> vect; 350 350 351 typename vector< shared_ptr<T> >::const_iterator it;351 typename vector<boost::shared_ptr<T> >::const_iterator it; 352 352 for(it=shared_vect.begin();it!=shared_vect.end();++it) vect.push_back(it->get()); 353 353 return vect;
Note: See TracChangeset
for help on using the changeset viewer.