Changeset 1538 for XIOS/dev/branch_openmp/extern
- Timestamp:
- 06/11/18 19:06:45 (6 years ago)
- Location:
- XIOS/dev/branch_openmp/extern
- Files:
-
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
XIOS/dev/branch_openmp/extern/ep_dev/ep_free.cpp
r1527 r1538 18 18 { 19 19 Debug("MPI_Comm_free with MPI\n"); 20 21 20 return ::MPI_Comm_free(to_mpi_comm_ptr((*comm)->mpi_comm)); 22 23 21 } 24 22 … … 30 28 31 29 ep_rank_loc = (*comm)->ep_comm_ptr->size_rank_info[1].first; 32 int ep_rank = (*comm)->ep_comm_ptr->size_rank_info[0].first;33 30 num_ep = (*comm)->ep_comm_ptr->size_rank_info[1].second; 34 31 35 MPI_Barrier (*comm);32 MPI_Barrier_local(*comm); 36 33 37 34 if(ep_rank_loc == 0) -
XIOS/dev/branch_openmp/extern/ep_dev/ep_intercomm.cpp
r1533 r1538 14 14 namespace ep_lib 15 15 { 16 16 17 int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm) 17 18 { 18 assert(local_comm->is_ep); 19 19 if(!local_comm->is_ep) return MPI_Intercomm_create_mpi(local_comm, local_leader, peer_comm, remote_leader, tag, newintercomm); 20 21 int ep_rank = local_comm->ep_comm_ptr->size_rank_info[0].first; 22 23 24 // check if local leaders are in the same mpi proc 25 // by checking their mpi_rank in peer_comm 26 27 int mpi_rank_of_leader[2]; 28 29 if(ep_rank == local_leader) 30 { 31 mpi_rank_of_leader[0] = peer_comm->ep_comm_ptr->size_rank_info[2].first; 32 mpi_rank_of_leader[1] = peer_comm->ep_rank_map->at(remote_leader).second; 33 } 34 35 MPI_Bcast(mpi_rank_of_leader, 2, MPI_INT, local_leader, local_comm); 36 37 if(mpi_rank_of_leader[0] != mpi_rank_of_leader[1]) 38 { 39 Debug("calling MPI_Intercomm_create_kernel\n"); 40 return MPI_Intercomm_create_endpoint(local_comm, local_leader, peer_comm, remote_leader, tag, newintercomm); 41 } 42 else 43 { 44 printf("local leaders are in the same MPI proc. Routine not yet implemented\n"); 45 MPI_Abort(local_comm, 0); 46 } 47 } 48 49 50 51 int MPI_Intercomm_create_endpoint(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm) 52 { 20 53 int ep_rank, ep_rank_loc, mpi_rank; 21 54 int ep_size, num_ep, mpi_size; 22 55 23 ep_rank = local_comm->ep_comm_ptr->size_rank_info[0].first;56 ep_rank = local_comm->ep_comm_ptr->size_rank_info[0].first; 24 57 ep_rank_loc = local_comm->ep_comm_ptr->size_rank_info[1].first; 25 mpi_rank = local_comm->ep_comm_ptr->size_rank_info[2].first; 26 ep_size = local_comm->ep_comm_ptr->size_rank_info[0].second; 27 num_ep = local_comm->ep_comm_ptr->size_rank_info[1].second; 28 mpi_size = local_comm->ep_comm_ptr->size_rank_info[2].second; 29 30 int world_rank_and_num_ep[2]; 31 MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_and_num_ep[0]); 32 world_rank_and_num_ep[1] = num_ep; 33 34 int remote_mpi_size; 58 mpi_rank = local_comm->ep_comm_ptr->size_rank_info[2].first; 59 ep_size = local_comm->ep_comm_ptr->size_rank_info[0].second; 60 num_ep = local_comm->ep_comm_ptr->size_rank_info[1].second; 61 mpi_size = local_comm->ep_comm_ptr->size_rank_info[2].second; 62 63 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 64 // step 1 : local leaders exchange ep_size, leader_rank_in_peer, leader_rank_in_peer_mpi, leader_rank_in_world. // 65 // local leaders bcast results to all ep in local_comm // 66 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 67 68 bool is_local_leader = ep_rank==local_leader? true: false; 69 70 71 int local_leader_rank_in_peer; 72 int local_leader_rank_in_peer_mpi; 73 int local_leader_rank_in_world; 74 35 75 int remote_ep_size; 36 37 int *local_world_rank_and_num_ep; 38 int *remote_world_rank_and_num_ep; 39 int *summed_world_rank_and_num_ep; 40 41 42 bool is_leader = ep_rank==local_leader? true : false; 43 bool is_local_leader = is_leader? true: (ep_rank_loc==0 && mpi_rank!=local_comm->ep_rank_map->at(local_leader).second ? true : false); 44 bool priority; 45 46 if(is_leader) 47 { 48 int leader_mpi_rank_in_peer; 49 MPI_Comm_rank(peer_comm, &leader_mpi_rank_in_peer); 50 if(leader_mpi_rank_in_peer == remote_leader) 51 { 52 printf("same leader in peer_comm\n"); 53 exit(1); 54 } 55 priority = leader_mpi_rank_in_peer<remote_leader? true : false; 56 } 57 58 59 MPI_Bcast(&priority, 1, MPI_INT, local_leader, local_comm); 76 int remote_leader_rank_in_peer; 77 int remote_leader_rank_in_peer_mpi; 78 int remote_leader_rank_in_world; 79 80 int send_quadruple[4]; 81 int recv_quadruple[4]; 82 60 83 61 84 if(is_local_leader) 62 85 { 63 local_world_rank_and_num_ep = new int[2*mpi_size]; 64 ::MPI_Allgather(world_rank_and_num_ep, 2, to_mpi_type(MPI_INT), local_world_rank_and_num_ep, 2, to_mpi_type(MPI_INT), to_mpi_comm(local_comm->mpi_comm)); 65 } 66 67 68 69 if(is_leader) 70 { 86 MPI_Comm_rank(peer_comm, &local_leader_rank_in_peer); 87 ::MPI_Comm_rank(to_mpi_comm(peer_comm->mpi_comm), &local_leader_rank_in_peer_mpi); 88 ::MPI_Comm_rank(to_mpi_comm(MPI_COMM_WORLD->mpi_comm), &local_leader_rank_in_world); 89 90 send_quadruple[0] = ep_size; 91 send_quadruple[1] = local_leader_rank_in_peer; 92 send_quadruple[2] = local_leader_rank_in_peer_mpi; 93 send_quadruple[3] = local_leader_rank_in_world; 94 71 95 MPI_Request request; 72 96 MPI_Status status; 73 97 74 if(priority) 75 { 76 MPI_Isend(&mpi_size, 1, MPI_INT, remote_leader, tag, peer_comm, &request); 98 99 if(remote_leader > local_leader_rank_in_peer) 100 { 101 MPI_Isend(send_quadruple, 4, MPI_INT, remote_leader, tag, peer_comm, &request); 77 102 MPI_Wait(&request, &status); 78 103 79 MPI_Irecv(&remote_mpi_size, 1, MPI_INT, remote_leader, tag, peer_comm, &request); 80 MPI_Wait(&request, &status); 81 82 MPI_Isend(&ep_size, 1, MPI_INT, remote_leader, tag, peer_comm, &request); 83 MPI_Wait(&request, &status); 84 85 MPI_Irecv(&remote_ep_size, 1, MPI_INT, remote_leader, tag, peer_comm, &request); 104 MPI_Irecv(recv_quadruple, 4, MPI_INT, remote_leader, tag, peer_comm, &request); 86 105 MPI_Wait(&request, &status); 87 106 } 88 107 else 89 108 { 90 MPI_Irecv( &remote_mpi_size, 1, MPI_INT, remote_leader, tag, peer_comm, &request);109 MPI_Irecv(recv_quadruple, 4, MPI_INT, remote_leader, tag, peer_comm, &request); 91 110 MPI_Wait(&request, &status); 92 111 93 MPI_Isend(&mpi_size, 1, MPI_INT, remote_leader, tag, peer_comm, &request); 94 MPI_Wait(&request, &status); 95 96 MPI_Irecv(&remote_ep_size, 1, MPI_INT, remote_leader, tag, peer_comm, &request); 97 MPI_Wait(&request, &status); 98 99 MPI_Isend(&ep_size, 1, MPI_INT, remote_leader, tag, peer_comm, &request); 100 MPI_Wait(&request, &status); 101 } 102 } 103 104 MPI_Bcast(&remote_mpi_size, 1, MPI_INT, local_leader, local_comm); 105 MPI_Bcast(&remote_ep_size, 1, MPI_INT, local_leader, local_comm); 106 107 remote_world_rank_and_num_ep = new int[2*remote_mpi_size]; 108 109 110 if(is_leader) 112 MPI_Isend(send_quadruple, 4, MPI_INT, remote_leader, tag, peer_comm, &request); 113 MPI_Wait(&request, &status); 114 } 115 116 remote_ep_size = recv_quadruple[0]; 117 remote_leader_rank_in_peer = recv_quadruple[1]; 118 remote_leader_rank_in_peer_mpi = recv_quadruple[2]; 119 remote_leader_rank_in_world = recv_quadruple[3]; 120 #ifdef _showinfo 121 printf("peer_rank = %d, packed exchange OK\n", local_leader_rank_in_peer); 122 #endif 123 } 124 125 MPI_Bcast(send_quadruple, 4, MPI_INT, local_leader, local_comm); 126 MPI_Bcast(recv_quadruple, 4, MPI_INT, local_leader, local_comm); 127 128 if(!is_local_leader) 129 { 130 local_leader_rank_in_peer = send_quadruple[1]; 131 local_leader_rank_in_peer_mpi = send_quadruple[2]; 132 local_leader_rank_in_world = send_quadruple[3]; 133 134 remote_ep_size = recv_quadruple[0]; 135 remote_leader_rank_in_peer = recv_quadruple[1]; 136 remote_leader_rank_in_peer_mpi = recv_quadruple[2]; 137 remote_leader_rank_in_world = recv_quadruple[3]; 138 } 139 140 141 #ifdef _showinfo 142 MPI_Barrier(peer_comm); 143 MPI_Barrier(peer_comm); 144 printf("peer_rank = %d, ep_size = %d, remote_ep_size = %d\n", peer_comm->ep_comm_ptr->size_rank_info[0].first, ep_size, remote_ep_size); 145 MPI_Barrier(peer_comm); 146 MPI_Barrier(peer_comm); 147 #endif 148 149 /////////////////////////////////////////////////////////////////// 150 // step 2 : gather ranks in world for both local and remote comm // 151 /////////////////////////////////////////////////////////////////// 152 153 int rank_in_world; 154 ::MPI_Comm_rank(to_mpi_comm(MPI_COMM_WORLD->mpi_comm), &rank_in_world); 155 156 int *ranks_in_world_local = new int[ep_size]; 157 int *ranks_in_world_remote = new int[remote_ep_size]; 158 159 MPI_Allgather(&rank_in_world, 1, MPI_INT, ranks_in_world_local, 1, MPI_INT, local_comm); 160 161 if(is_local_leader) 111 162 { 112 163 MPI_Request request; 113 164 MPI_Status status; 114 165 115 if( priority)116 { 117 MPI_Isend( local_world_rank_and_num_ep, 2*mpi_size, MPI_INT, remote_leader, tag, peer_comm, &request);166 if(remote_leader > local_leader_rank_in_peer) 167 { 168 MPI_Isend(ranks_in_world_local, ep_size, MPI_INT, remote_leader, tag, peer_comm, &request); 118 169 MPI_Wait(&request, &status); 119 170 120 MPI_Irecv(r emote_world_rank_and_num_ep, 2*remote_mpi_size, MPI_INT, remote_leader, tag, peer_comm, &request);171 MPI_Irecv(ranks_in_world_remote, remote_ep_size, MPI_INT, remote_leader, tag, peer_comm, &request); 121 172 MPI_Wait(&request, &status); 122 173 } 123 174 else 124 175 { 125 MPI_Irecv(r emote_world_rank_and_num_ep, 2*remote_mpi_size, MPI_INT, remote_leader, tag, peer_comm, &request);176 MPI_Irecv(ranks_in_world_remote, remote_ep_size, MPI_INT, remote_leader, tag, peer_comm, &request); 126 177 MPI_Wait(&request, &status); 127 178 128 MPI_Isend(local_world_rank_and_num_ep, 2*mpi_size, MPI_INT, remote_leader, tag, peer_comm, &request); 129 MPI_Wait(&request, &status); 130 } 131 } 132 133 134 MPI_Bcast(remote_world_rank_and_num_ep, 2*remote_mpi_size, MPI_INT, local_leader, local_comm); 135 136 137 138 bool is_new_leader = is_local_leader; 139 140 if(is_local_leader && !priority) 141 { 142 for(int i=0; i<remote_mpi_size; i++) 143 { 144 if(world_rank_and_num_ep[0] == remote_world_rank_and_num_ep[2*i]) 179 MPI_Isend(ranks_in_world_local, ep_size, MPI_INT, remote_leader, tag, peer_comm, &request); 180 MPI_Wait(&request, &status); 181 } 182 #ifdef _showinfo 183 printf("peer_rank = %d, ranks_in_world exchange OK\n", local_leader_rank_in_peer); 184 #endif 185 } 186 187 MPI_Bcast(ranks_in_world_remote, remote_ep_size, MPI_INT, local_leader, local_comm); 188 189 #ifdef _showinfo 190 191 MPI_Barrier(peer_comm); 192 MPI_Barrier(peer_comm); 193 194 if(remote_leader == 4) 195 { 196 for(int i=0; i<ep_size; i++) 197 { 198 if(ep_rank == i) 145 199 { 146 is_new_leader = false; 147 break; 200 printf("peer_rank = %d, ranks_in_world_local = \n", peer_comm->ep_comm_ptr->size_rank_info[0].first); 201 for(int i=0; i<ep_size; i++) 202 { 203 printf("%d\t", ranks_in_world_local[i]); 204 } 205 206 printf("\npeer_rank = %d, ranks_in_world_remote = \n", peer_comm->ep_comm_ptr->size_rank_info[0].first); 207 for(int i=0; i<remote_ep_size; i++) 208 { 209 printf("%d\t", ranks_in_world_remote[i]); 210 } 211 printf("\n"); 212 148 213 } 149 } 150 } 151 152 153 ::MPI_Group *empty_group; 154 ::MPI_Group *local_group; 155 ::MPI_Group union_group; 156 157 if(is_local_leader) 158 { 159 160 int *ranks = new int[mpi_size]; 214 215 MPI_Barrier(local_comm); 216 MPI_Barrier(local_comm); 217 MPI_Barrier(local_comm); 218 } 219 } 220 221 MPI_Barrier(peer_comm); 222 MPI_Barrier(peer_comm); 223 MPI_Barrier(peer_comm); 224 225 if(remote_leader == 13) 226 { 227 for(int i=0; i<ep_size; i++) 228 { 229 if(ep_rank == i) 230 { 231 printf("peer_rank = %d, ranks_in_world_local = \n", peer_comm->ep_comm_ptr->size_rank_info[0].first); 232 for(int i=0; i<ep_size; i++) 233 { 234 printf("%d\t", ranks_in_world_local[i]); 235 } 236 237 printf("\npeer_rank = %d, ranks_in_world_remote = \n", peer_comm->ep_comm_ptr->size_rank_info[0].first); 238 for(int i=0; i<remote_ep_size; i++) 239 { 240 printf("%d\t", ranks_in_world_remote[i]); 241 } 242 printf("\n"); 243 244 } 245 246 MPI_Barrier(local_comm); 247 MPI_Barrier(local_comm); 248 MPI_Barrier(local_comm); 249 } 250 } 251 252 MPI_Barrier(peer_comm); 253 MPI_Barrier(peer_comm); 254 255 #endif 256 257 ////////////////////////////////////////////////////////////// 258 // step 3 : determine the priority and ownership of each ep // 259 ////////////////////////////////////////////////////////////// 260 261 bool priority = local_leader_rank_in_peer < remote_leader_rank_in_peer? true : false; 262 263 264 int ownership = priority; 265 266 if(rank_in_world == ranks_in_world_local[local_leader]) ownership = 1; 267 if(rank_in_world == remote_leader_rank_in_world) ownership = 0; 268 269 270 #ifdef _showinfo 271 MPI_Barrier(peer_comm); 272 MPI_Barrier(peer_comm); 273 printf("peer_rank = %d, priority = %d, local_leader_rank_in_peer = %d, remote_leader_rank_in_peer = %d\n", peer_comm->ep_comm_ptr->size_rank_info[0].first, priority, local_leader_rank_in_peer, remote_leader_rank_in_peer); 274 MPI_Barrier(peer_comm); 275 MPI_Barrier(peer_comm); 276 #endif 277 278 279 #ifdef _showinfo 280 MPI_Barrier(peer_comm); 281 MPI_Barrier(peer_comm); 282 printf("peer_rank = %d, priority = %d, ownership = %d\n", peer_comm->ep_comm_ptr->size_rank_info[0].first, priority, ownership); 283 MPI_Barrier(peer_comm); 284 MPI_Barrier(peer_comm); 285 #endif 286 287 ////////////////////////////////////////////////////// 288 // step 4 : extract local_comm and create intercomm // 289 ////////////////////////////////////////////////////// 290 291 bool is_involved = is_local_leader || (!is_local_leader && ep_rank_loc == 0 && rank_in_world != local_leader_rank_in_world); 292 293 #ifdef _showinfo 294 295 MPI_Barrier(peer_comm); 296 MPI_Barrier(peer_comm); 297 printf("peer_rank = %d, is_involved = %d\n", peer_comm->ep_comm_ptr->size_rank_info[0].first, is_involved); 298 MPI_Barrier(peer_comm); 299 MPI_Barrier(peer_comm); 300 301 #endif 302 303 if(is_involved) 304 { 305 ::MPI_Group local_group; 306 ::MPI_Group extracted_group; 307 ::MPI_Comm extracted_comm; 308 309 310 ::MPI_Comm_group(to_mpi_comm(local_comm->mpi_comm), &local_group); 311 312 int *ownership_list = new int[mpi_size]; 313 int *mpi_rank_list = new int[mpi_size]; 314 315 ::MPI_Allgather(&ownership, 1, to_mpi_type(MPI_INT), ownership_list, 1, to_mpi_type(MPI_INT), to_mpi_comm(local_comm->mpi_comm)); 316 ::MPI_Allgather(&mpi_rank, 1, to_mpi_type(MPI_INT), mpi_rank_list, 1, to_mpi_type(MPI_INT), to_mpi_comm(local_comm->mpi_comm)); 317 318 319 int n=0; 161 320 for(int i=0; i<mpi_size; i++) 162 { 163 ranks[i] = local_world_rank_and_num_ep[2*i]; 164 } 165 166 local_group = new ::MPI_Group; 167 ::MPI_Group_incl(MPI_GROUP_WORLD, mpi_size, ranks, local_group); 168 169 delete[] ranks; 170 321 { 322 n+=ownership_list[i]; 323 } 324 325 int *new_mpi_rank_list = new int[n]; 326 int j=0; 327 for(int i=0; i<mpi_size; i++) 328 { 329 if(ownership_list[i] !=0) 330 { 331 new_mpi_rank_list[j++] = mpi_rank_list[i]; 332 } 333 } 334 335 336 ::MPI_Group_incl(local_group, n, new_mpi_rank_list, &extracted_group); 337 338 ::MPI_Comm_create(to_mpi_comm(local_comm->mpi_comm), extracted_group, &extracted_comm); 339 340 ::MPI_Comm mpi_inter_comm; 341 342 int local_leader_rank_in_extracted_comm; 343 344 if(is_local_leader) 345 { 346 ::MPI_Comm_rank(extracted_comm, &local_leader_rank_in_extracted_comm); 347 } 348 349 ::MPI_Bcast(&local_leader_rank_in_extracted_comm, 1, to_mpi_type(MPI_INT), local_comm->ep_rank_map->at(local_leader).second, to_mpi_comm(local_comm->mpi_comm)); 350 351 ::MPI_Comm *intracomm = new ::MPI_Comm; 352 bool is_real_involved = ownership && extracted_comm != to_mpi_comm(MPI_COMM_NULL->mpi_comm); 353 354 if(is_real_involved) 355 { 356 ::MPI_Intercomm_create(extracted_comm, local_leader_rank_in_extracted_comm, to_mpi_comm(peer_comm->mpi_comm), remote_leader_rank_in_peer_mpi, tag, &mpi_inter_comm); 357 ::MPI_Intercomm_merge(mpi_inter_comm, !priority, intracomm); 358 } 359 360 361 362 //////////////////////////////////// 363 // step 5 :: determine new num_ep // 364 //////////////////////////////////// 365 366 int num_ep_count=0; 367 368 for(int i=0; i<ep_size; i++) 369 { 370 if(rank_in_world == ranks_in_world_local[i]) 371 num_ep_count++; 372 } 171 373 172 #pragma omp flush 173 #pragma omp critical (write_to_tag_group_map) 174 { 175 if(tag_group_map == 0) tag_group_map = new map< std::pair<int, int>, ::MPI_Group * >; 176 177 tag_group_map->insert(std::make_pair(std::make_pair(tag, priority? 1 : 2), local_group)); 178 } 179 } 180 181 MPI_Barrier(local_comm); 182 183 if(is_leader) 184 { 185 MPI_Request request; 186 MPI_Status status; 187 188 int send_signal=0; 189 int recv_signal; 190 191 if(priority) 192 { 193 MPI_Isend(&send_signal, 1, MPI_INT, remote_leader, tag, peer_comm, &request); 194 MPI_Wait(&request, &status); 195 196 MPI_Irecv(&recv_signal, 1, MPI_INT, remote_leader, tag, peer_comm, &request); 197 MPI_Wait(&request, &status); 198 } 199 else 200 { 201 MPI_Irecv(&recv_signal, 1, MPI_INT, remote_leader, tag, peer_comm, &request); 202 MPI_Wait(&request, &status); 203 204 MPI_Isend(&send_signal, 1, MPI_INT, remote_leader, tag, peer_comm, &request); 205 MPI_Wait(&request, &status); 206 } 207 } 208 209 210 MPI_Barrier(local_comm); 211 212 if(is_new_leader) 213 { 214 ::MPI_Group *group1; 215 ::MPI_Group *group2; 216 217 empty_group = new ::MPI_Group; 218 *empty_group = MPI_GROUP_EMPTY; 219 220 #pragma omp flush 221 #pragma omp critical (read_from_tag_group_map) 222 { 223 group1 = tag_group_map->find(make_pair(tag, 1)) != tag_group_map->end()? tag_group_map->at(std::make_pair(tag, 1)) : empty_group; 224 group2 = tag_group_map->find(make_pair(tag, 2)) != tag_group_map->end()? tag_group_map->at(std::make_pair(tag, 2)) : empty_group; 225 } 226 374 for(int i=0; i<remote_ep_size; i++) 375 { 376 if(rank_in_world == ranks_in_world_remote[i]) 377 num_ep_count++; 378 } 379 380 381 /////////////////////////////////////////////////// 382 // step 6 : create endpoints from extracted_comm // 383 /////////////////////////////////////////////////// 384 385 if(is_real_involved) 386 { 387 MPI_Comm *ep_comm; 388 MPI_Info info; 389 MPI_Comm_create_endpoints(intracomm, num_ep_count, info, ep_comm); 227 390 228 391 #ifdef _showinfo 229 230 int group1_rank, group1_size; 231 int group2_rank, group2_size; 232 ::MPI_Group_rank(*group1, &group1_rank); 233 ::MPI_Group_size(*group1, &group1_size); 234 ::MPI_Group_rank(*group2, &group2_rank); 235 ::MPI_Group_size(*group2, &group2_size); 236 237 #endif 238 239 ::MPI_Group_union(*group1, *group2, &union_group); 240 241 242 #pragma omp critical (read_from_tag_group_map) 243 { 244 tag_group_map->erase(make_pair(tag, 1)); 245 tag_group_map->erase(make_pair(tag, 2)); 246 } 247 248 #ifdef _showinfo 249 250 int group_rank, group_size; 251 ::MPI_Group_rank(union_group, &group_rank); 252 ::MPI_Group_size(union_group, &group_size); 253 printf("rank = %d : map = %p, group1_rank/size = %d/%d, group2_rank/size = %d/%d, union_rank/size = %d/%d\n", ep_rank, tag_group_map, group1_rank, group1_size, group2_rank, group2_size, group_rank, group_size); 254 #endif 255 256 } 257 258 int summed_world_rank_and_num_ep_size=mpi_size; 259 summed_world_rank_and_num_ep = new int[2*(mpi_size+remote_mpi_size)]; 260 261 262 if(is_leader) 263 { 264 265 for(int i=0; i<mpi_size; i++) 266 { 267 summed_world_rank_and_num_ep[2*i] = local_world_rank_and_num_ep[2*i]; 268 summed_world_rank_and_num_ep[2*i+1] = local_world_rank_and_num_ep[2*i+1]; 269 } 270 271 for(int i=0; i<remote_mpi_size; i++) 272 { 273 bool found=false; 274 for(int j=0; j<mpi_size; j++) 392 printf("new ep_comm->ep_comm_ptr->intercomm->mpi_inter_comm = %p\n", mpi_inter_comm); 393 #endif 394 395 #pragma omp critical (write_to_tag_list) 396 intercomm_list.push_back(make_pair( make_pair(tag, min(local_leader_rank_in_world, remote_leader_rank_in_world)) , make_pair(ep_comm , make_pair(num_ep_count, 0)))); 397 #pragma omp flush 398 #ifdef _showinfo 399 for(int i=0; i<num_ep_count; i++) 400 printf("peer_rank = %d, ep_comm = %p, ep_comm[%d] -> new_ep_rank = %d\n", peer_comm->ep_comm_ptr->size_rank_info[0].first, ep_comm, i, ep_comm[i]->ep_comm_ptr->size_rank_info[0].first); 401 #endif 402 ::MPI_Comm_free(intracomm); 403 delete intracomm; 404 } 405 406 407 delete ownership_list; 408 delete mpi_rank_list; 409 delete new_mpi_rank_list; 410 411 } 412 413 int repeated=0; 414 for(int i=0; i<remote_ep_size; i++) 415 { 416 if(rank_in_world == ranks_in_world_remote[i]) 417 repeated++; 418 } 419 420 int my_turn = ownership==1? ep_rank_loc : ep_rank_loc+repeated; 421 422 #ifdef _showinfo 423 424 MPI_Barrier(peer_comm); 425 MPI_Barrier(peer_comm); 426 printf("peer_rank = %d, ep_rank_loc = %d, ownership = %d, repeated = %d, my_turn = %d\n", peer_comm->ep_comm_ptr->size_rank_info[0].first, ep_rank_loc, ownership, repeated, my_turn); 427 MPI_Barrier(peer_comm); 428 MPI_Barrier(peer_comm); 429 430 #endif 431 432 433 #pragma omp flush 434 #pragma omp critical (read_from_intercomm_list) 435 { 436 bool flag=true; 437 while(flag) 438 { 439 for(std::list<std::pair<std::pair<int, int> , std::pair<MPI_Comm * , std::pair<int, int> > > >::iterator iter = intercomm_list.begin(); iter!=intercomm_list.end(); iter++) 275 440 { 276 if( remote_world_rank_and_num_ep[2*i] == local_world_rank_and_num_ep[2*j])441 if(iter->first == make_pair(tag, min(local_leader_rank_in_world, remote_leader_rank_in_world))) 277 442 { 278 found=true; 279 summed_world_rank_and_num_ep[2*j+1] += remote_world_rank_and_num_ep[2*i+1]; 443 *newintercomm = iter->second.first[my_turn]; 444 445 iter->second.second.second++; 446 447 if(iter->second.second.first == iter->second.second.second) 448 intercomm_list.erase(iter); 449 450 flag = false; 451 break; 280 452 } 281 453 } 282 if(!found) 283 { 284 summed_world_rank_and_num_ep[2*summed_world_rank_and_num_ep_size] = remote_world_rank_and_num_ep[2*i]; 285 summed_world_rank_and_num_ep[2*summed_world_rank_and_num_ep_size+1] = remote_world_rank_and_num_ep[2*i+1]; 286 summed_world_rank_and_num_ep_size++; 287 } 288 289 } 290 } 291 292 MPI_Bcast(&summed_world_rank_and_num_ep_size, 1, MPI_INT, local_leader, local_comm); 293 294 MPI_Bcast(summed_world_rank_and_num_ep, 2*summed_world_rank_and_num_ep_size, MPI_INT, local_leader, local_comm); 295 296 297 298 int remote_num_ep = 0; 299 for(int i=0; i<remote_mpi_size; i++) 300 { 301 if(remote_world_rank_and_num_ep[2*i] == world_rank_and_num_ep[0]) 302 { 303 remote_num_ep = remote_world_rank_and_num_ep[2*i+1]; 304 break; 305 } 306 } 307 308 int new_ep_rank_loc = priority? ep_rank_loc : ep_rank_loc+remote_num_ep; 309 310 #ifdef _showinfo 311 printf("rank = %d, priority = %d, remote_num_ep = %d, new_ep_rank_loc = %d\n", ep_rank, priority, remote_num_ep, new_ep_rank_loc); 312 #endif 313 314 if(is_new_leader) 315 { 316 int new_num_ep; 317 for(int i=0; i<summed_world_rank_and_num_ep_size; i++) 318 { 319 if(summed_world_rank_and_num_ep[2*i] == world_rank_and_num_ep[0]) 320 { 321 new_num_ep = summed_world_rank_and_num_ep[2*i+1]; 322 break; 323 } 324 } 325 326 ::MPI_Comm mpi_comm; 327 ::MPI_Comm_create_group(to_mpi_comm(MPI_COMM_WORLD->mpi_comm), union_group, tag, &mpi_comm); 328 329 330 MPI_Comm *ep_comm; 331 MPI_Info info; 332 MPI_Comm_create_endpoints(&mpi_comm, new_num_ep, info, ep_comm); 333 334 #pragma omp critical (write_to_tag_comm_map) 335 { 336 if(tag_comm_map == 0) tag_comm_map = new std::map<int, std::pair<ep_lib::MPI_Comm*, std::pair<int, int> > >; 337 tag_comm_map->insert(std::make_pair(tag, std::make_pair(ep_comm, std::make_pair(new_num_ep, 0)))); 338 } 339 #pragma omp flush 340 } 341 342 343 bool found=false; 344 while(!found) 345 { 346 #pragma omp flush 347 #pragma omp critical (read_from_tag_comm_map) 348 { 349 if(tag_comm_map!=0) 350 { 351 if(tag_comm_map->find(tag) != tag_comm_map->end()) 352 { 353 *newintercomm = tag_comm_map->at(tag).first[new_ep_rank_loc]; 354 355 tag_comm_map->at(tag).second.second++; 356 if(tag_comm_map->at(tag).second.second == tag_comm_map->at(tag).second.first) 357 { 358 tag_comm_map->erase(tag_comm_map->find(tag)); 359 } 360 361 found=true; 362 } 363 } 364 } 365 } 454 } 455 } 456 457 458 459 #ifdef _showinfo 460 461 MPI_Barrier(peer_comm); 462 MPI_Barrier(peer_comm); 463 printf("peer_rank = %d, test_rank = %d\n", peer_comm->ep_comm_ptr->size_rank_info[0].first, (*newintercomm)->ep_comm_ptr->size_rank_info[0].first); 464 MPI_Barrier(peer_comm); 465 MPI_Barrier(peer_comm); 466 467 #endif 468 469 ////////////////////////////////////////////////////////// 470 // step 7 : create intercomm_rank_map for local leaders // 471 ////////////////////////////////////////////////////////// 366 472 367 473 (*newintercomm)->is_intercomm = true; 368 369 370 371 474 372 475 (*newintercomm)->inter_rank_map = new INTER_RANK_MAP; 373 374 476 477 375 478 int rank_info[2]; 376 479 rank_info[0] = ep_rank; … … 386 489 MPI_Allgather(rank_info, 2, MPI_INT, local_rank_info, 2, MPI_INT, local_comm); 387 490 388 if(is_l eader)491 if(is_local_leader) 389 492 { 390 493 MPI_Request request; … … 415 518 (*newintercomm)->inter_rank_map->insert(make_pair(remote_rank_info[2*i], remote_rank_info[2*i+1])); 416 519 } 417 418 #ifdef _showinfo 419 if(ep_rank==4 && !priority) 420 { 421 for(std::map<int, int > :: iterator it=(*newintercomm)->inter_rank_map->begin(); it != (*newintercomm)->inter_rank_map->end(); it++) 422 { 423 printf("inter_rank_map[%d] = %d\n", it->first, it->second); 424 } 425 } 426 #endif 427 520 428 521 (*newintercomm)->ep_comm_ptr->size_rank_info[0] = local_comm->ep_comm_ptr->size_rank_info[0]; 429 522 430 if(is_local_leader) 431 { 432 delete[] local_world_rank_and_num_ep; 433 434 MPI_Group_free(local_group); 435 delete local_group; 436 } 437 438 if(is_new_leader) 439 { 440 MPI_Group_free(&union_group); 441 delete empty_group; 442 } 443 444 delete[] remote_world_rank_and_num_ep; 445 delete[] summed_world_rank_and_num_ep; 523 446 524 delete[] local_rank_info; 447 525 delete[] remote_rank_info; 448 449 526 delete[] ranks_in_world_local; 527 delete[] ranks_in_world_remote; 528 /* 529 if((*newintercomm)->ep_comm_ptr->size_rank_info[0].second == 1) 530 { 531 for(INTER_RANK_MAP::iterator it = (*newintercomm)->inter_rank_map->begin(); it != (*newintercomm)->inter_rank_map->end(); it++) 532 { 533 printf("inter_rank_map[%d] = %d\n", it->first, it->second); 534 } 535 } 536 */ 537 538 450 539 } 540 541 542 543 int MPI_Intercomm_create_mpi(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm) 544 { 545 printf("MPI_Intercomm_create_mpi not yet implemented\n"); 546 MPI_Abort(local_comm, 0); 547 } 451 548 452 549 } -
XIOS/dev/branch_openmp/extern/ep_dev/ep_lib_collective.hpp
r1527 r1538 47 47 int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm); 48 48 49 //int MPI_Intercomm_create_kernel(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm);49 50 50 51 51 -
XIOS/dev/branch_openmp/extern/ep_dev/ep_lib_endpoint.hpp
r1532 r1538 19 19 int MPI_Iprobe_endpoint(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status); 20 20 int MPI_Improbe_endpoint(int source, int tag, MPI_Comm comm, int *flag, MPI_Message *message, MPI_Status *status); 21 22 int MPI_Intercomm_create_endpoint(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm); 21 23 22 24 } -
XIOS/dev/branch_openmp/extern/ep_dev/ep_lib_mpi.hpp
r1527 r1538 29 29 30 30 int MPI_Barrier_mpi(MPI_Comm comm); 31 32 int MPI_Intercomm_create_mpi(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm); 31 33 32 34 } -
XIOS/dev/branch_openmp/extern/ep_dev/ep_message.cpp
r1532 r1538 46 46 } 47 47 48 if((*(*it))->state == 2) 49 { 50 EP_PendingRequests->erase(it); 51 52 memcheck("EP_PendingRequests["<<ep_rank<<"]->size() = " << EP_PendingRequests->size()); 53 it = EP_PendingRequests->begin(); 54 continue; 55 } 56 48 57 Message_Check(((*(*it))->comm)); 49 58 } … … 55 64 { 56 65 if(*(*it) == 0) 66 { 67 EP_PendingRequests->erase(it); 68 69 memcheck("EP_PendingRequests["<<ep_rank<<"]->size() = " << EP_PendingRequests->size()); 70 it = EP_PendingRequests->begin(); 71 continue; 72 } 73 74 if((*(*it))->state == 2) 57 75 { 58 76 EP_PendingRequests->erase(it); … … 196 214 continue; 197 215 } 216 217 if((*(*it))->state == 2) 218 { 219 EP_PendingRequests->erase(it); 220 221 memcheck("EP_PendingRequests["<<ep_rank<<"]->size() = " << EP_PendingRequests->size()); 222 it = EP_PendingRequests->begin(); 223 continue; 224 } 198 225 199 226 if((*(*it))->ep_src>6) -
XIOS/dev/branch_openmp/extern/ep_dev/ep_probe.cpp
r1532 r1538 56 56 if(comm->is_intercomm) 57 57 { 58 src = comm->inter_rank_map->at(src);58 if(src>=0) src = comm->inter_rank_map->at(src); 59 59 } 60 60 … … 85 85 status->ep_src = (*it)->ep_src; 86 86 status->ep_tag = (*it)->ep_tag; 87 88 if(comm->is_intercomm) 89 { 90 for(INTER_RANK_MAP::iterator iter = comm->inter_rank_map->begin(); iter != comm->inter_rank_map->end(); iter++) 91 { 92 if(iter->second == (*it)->ep_src) status->ep_src=iter->first; 93 } 94 } 87 95 88 96 *flag = true; -
XIOS/dev/branch_openmp/extern/ep_dev/ep_recv.cpp
r1532 r1538 43 43 if(comm->is_intercomm) 44 44 { 45 src = comm->inter_rank_map->at(src); 46 printf("new src = %d\n", src); 45 if(src>=0) src = comm->inter_rank_map->at(src); 47 46 } 48 47 … … 61 60 (*request)->type = 2; 62 61 (*request)->probed = false; 63 62 (*request)->state = 0; 64 63 65 64 (*request)->ep_src = src; … … 69 68 if(EP_PendingRequests == 0 ) EP_PendingRequests = new std::list< MPI_Request* >; 70 69 71 EP_PendingRequests->push_back(request); 70 EP_PendingRequests->push_back(request); 72 71 73 72 memcheck("EP_PendingRequests["<<ep_rank<<"]->size() = " << EP_PendingRequests->size()); … … 127 126 128 127 (*request)->probed = true; 128 (*request)->state = 1; 129 129 130 130 ::MPI_Imrecv(buf, count, to_mpi_type(datatype), to_mpi_message_ptr(*message), to_mpi_request_ptr(*request)); … … 139 139 140 140 141 return Request_Check();141 //return Request_Check(); 142 142 } 143 143 -
XIOS/dev/branch_openmp/extern/ep_dev/ep_request.hpp
r1527 r1538 18 18 // 3: Imrecv 19 19 20 //int state; // 0: new20 int state; // 0: new 21 21 // 1: imrecvd 22 22 // 2: tested or waited -
XIOS/dev/branch_openmp/extern/ep_dev/ep_test.cpp
r1527 r1538 48 48 status->ep_datatype = (*request)->ep_datatype; 49 49 50 //(*request)->state = 2;50 (*request)->state = 2; 51 51 52 52 memcheck("delete "<< (*request)->mpi_request << " : in ep_lib::MPI_Test, delete (*request)->mpi_request"); … … 97 97 array_of_statuses[i].ep_datatype = array_of_requests[i]->ep_datatype; 98 98 99 //array_of_requests[i]->state = 2;99 array_of_requests[i]->state = 2; 100 100 101 101 memcheck("delete "<< array_of_requests[i]->mpi_request <<" : in ep_lib::MPI_Testall, array_of_requests["<<i<<"]->mpi_request"); -
XIOS/dev/branch_openmp/extern/ep_dev/ep_wait.cpp
r1527 r1538 40 40 status->ep_datatype = (*request)->ep_datatype; 41 41 42 //(*request)->state = 2;42 (*request)->state = 2; 43 43 44 44 memcheck("delete "<< (*request)->mpi_request << " : in ep_lib::MPI_Wait, delete (*request)->mpi_request"); … … 94 94 array_of_statuses[i].ep_datatype = array_of_requests[i]->ep_datatype; 95 95 96 //array_of_requests[i]->state = 2;96 array_of_requests[i]->state = 2; 97 97 98 98 memcheck("delete "<< array_of_requests[i]->mpi_request <<" : in ep_lib::MPI_Waitall, array_of_requests["<<i<<"]->mpi_request"); -
XIOS/dev/branch_openmp/extern/remap/src/mapper.hpp
r1355 r1538 18 18 { 19 19 public: 20 //Mapper(ep_lib::MPI_Comm comm) : communicator(comm), verbose(SILENT), neighbourElements(NULL), sstree(comm) {} 21 Mapper(ep_lib::MPI_Comm comm) : verbose(SILENT), neighbourElements(NULL), sstree(comm) 22 { 23 ep_lib::MPI_Comm_dup(comm, &communicator); 24 } 20 Mapper(ep_lib::MPI_Comm comm) : communicator(comm), verbose(SILENT), neighbourElements(NULL), sstree(comm) {} 25 21 26 22 ~Mapper(); -
XIOS/dev/branch_openmp/extern/remap/src/mpi_cascade.hpp
r1355 r1538 12 12 { 13 13 public: 14 CCascadeLevel(ep_lib::MPI_Comm in_comm) : comm(in_comm)14 CCascadeLevel(ep_lib::MPI_Comm comm) : comm(comm) 15 15 { 16 ep_lib::MPI_Comm_dup(in_comm, &comm);17 16 ep_lib::MPI_Comm_size(comm, &size); 18 17 ep_lib::MPI_Comm_rank(comm, &rank); -
XIOS/dev/branch_openmp/extern/remap/src/parallel_tree.cpp
r1460 r1538 121 121 } 122 122 123 //CParallelTree::CParallelTree(MPI_Comm comm) : communicator(comm), cascade(MIN_NODE_SZ*MIN_NODE_SZ*2, comm) 124 CParallelTree::CParallelTree(MPI_Comm comm) : cascade(MAX_NODE_SZ*MAX_NODE_SZ*2, comm) 125 { 126 MPI_Comm_dup(comm, &communicator); 123 CParallelTree::CParallelTree(MPI_Comm comm) : communicator(comm), cascade(MIN_NODE_SZ*MIN_NODE_SZ*2, comm) 124 { 127 125 treeCascade.reserve(cascade.num_levels); 128 126 for (int lev = 0; lev < cascade.num_levels; lev++)
Note: See TracChangeset
for help on using the changeset viewer.