1 | #include "ep_lib.hpp" |
---|
2 | #include <mpi.h> |
---|
3 | #include "ep_declaration.hpp" |
---|
4 | #include "ep_mpi.hpp" |
---|
5 | |
---|
6 | using namespace std; |
---|
7 | |
---|
8 | |
---|
9 | namespace ep_lib |
---|
10 | { |
---|
11 | |
---|
12 | |
---|
13 | int MPI_Intercomm_create_unique_leader(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm) |
---|
14 | { |
---|
15 | //! mpi_size of local comm = 1 |
---|
16 | //! same world rank of leaders |
---|
17 | |
---|
18 | int ep_rank, ep_rank_loc, mpi_rank; |
---|
19 | int ep_size, num_ep, mpi_size; |
---|
20 | |
---|
21 | ep_rank = local_comm->ep_comm_ptr->size_rank_info[0].first; |
---|
22 | ep_rank_loc = local_comm->ep_comm_ptr->size_rank_info[1].first; |
---|
23 | mpi_rank = local_comm->ep_comm_ptr->size_rank_info[2].first; |
---|
24 | ep_size = local_comm->ep_comm_ptr->size_rank_info[0].second; |
---|
25 | num_ep = local_comm->ep_comm_ptr->size_rank_info[1].second; |
---|
26 | mpi_size = local_comm->ep_comm_ptr->size_rank_info[2].second; |
---|
27 | |
---|
28 | |
---|
29 | |
---|
30 | std::vector<int> rank_info[4]; //! 0->rank_in_world of local_comm, 1->rank_in_local_parent of local_comm |
---|
31 | //! 2->rank_in_world of remote_comm, 3->rank_in_local_parent of remote_comm |
---|
32 | |
---|
33 | int rank_in_world; |
---|
34 | |
---|
35 | int rank_in_peer_mpi[2]; |
---|
36 | |
---|
37 | ::MPI_Comm_rank(to_mpi_comm(MPI_COMM_WORLD->mpi_comm), &rank_in_world); |
---|
38 | |
---|
39 | |
---|
40 | int local_num_ep = num_ep; |
---|
41 | int remote_num_ep; |
---|
42 | int total_num_ep; |
---|
43 | |
---|
44 | int leader_rank_in_peer[2]; |
---|
45 | |
---|
46 | int my_position; |
---|
47 | int tag_label[2]; |
---|
48 | |
---|
49 | vector<int> send_buf(4); |
---|
50 | vector<int> recv_buf(4); |
---|
51 | |
---|
52 | |
---|
53 | if(ep_rank == local_leader) |
---|
54 | { |
---|
55 | MPI_Status status; |
---|
56 | |
---|
57 | |
---|
58 | |
---|
59 | MPI_Comm_rank(peer_comm, &leader_rank_in_peer[0]); |
---|
60 | |
---|
61 | send_buf[0] = local_num_ep; |
---|
62 | send_buf[1] = leader_rank_in_peer[0]; |
---|
63 | |
---|
64 | MPI_Request req_s, req_r; |
---|
65 | |
---|
66 | MPI_Isend(send_buf.data(), 2, MPI_INT, remote_leader, tag, peer_comm, &req_s); |
---|
67 | MPI_Irecv(recv_buf.data(), 2, MPI_INT, remote_leader, tag, peer_comm, &req_r); |
---|
68 | |
---|
69 | |
---|
70 | MPI_Wait(&req_s, &status); |
---|
71 | MPI_Wait(&req_r, &status); |
---|
72 | |
---|
73 | recv_buf[2] = leader_rank_in_peer[0]; |
---|
74 | |
---|
75 | } |
---|
76 | |
---|
77 | MPI_Bcast(recv_buf.data(), 3, MPI_INT, local_leader, local_comm); |
---|
78 | |
---|
79 | remote_num_ep = recv_buf[0]; |
---|
80 | leader_rank_in_peer[1] = recv_buf[1]; |
---|
81 | leader_rank_in_peer[0] = recv_buf[2]; |
---|
82 | |
---|
83 | total_num_ep = local_num_ep + remote_num_ep; |
---|
84 | |
---|
85 | |
---|
86 | if(leader_rank_in_peer[0] < leader_rank_in_peer[1]) |
---|
87 | { |
---|
88 | my_position = ep_rank_loc; |
---|
89 | //! LEADER create EP |
---|
90 | if(ep_rank == local_leader) |
---|
91 | { |
---|
92 | ::MPI_Comm *mpi_dup = new ::MPI_Comm; |
---|
93 | |
---|
94 | ::MPI_Comm_dup(to_mpi_comm(local_comm->mpi_comm), mpi_dup); |
---|
95 | |
---|
96 | MPI_Comm *ep_intercomm; |
---|
97 | MPI_Info info; |
---|
98 | MPI_Comm_create_endpoints(mpi_dup, total_num_ep, info, ep_intercomm); |
---|
99 | |
---|
100 | |
---|
101 | for(int i=0; i<total_num_ep; i++) |
---|
102 | { |
---|
103 | ep_intercomm[i]->is_intercomm = true; |
---|
104 | ep_intercomm[i]->ep_comm_ptr->intercomm = new ep_lib::ep_intercomm; |
---|
105 | ep_intercomm[i]->ep_comm_ptr->intercomm->mpi_inter_comm = 0; |
---|
106 | |
---|
107 | ep_intercomm[i]->ep_comm_ptr->comm_label = leader_rank_in_peer[0]; |
---|
108 | } |
---|
109 | |
---|
110 | tag_label[0] = TAG++; |
---|
111 | tag_label[1] = rank_in_world; |
---|
112 | |
---|
113 | #pragma omp critical (write_to_tag_list) |
---|
114 | tag_list.push_back(make_pair( make_pair(tag_label[0], tag_label[1]) , ep_intercomm)); |
---|
115 | |
---|
116 | MPI_Request req_s; |
---|
117 | MPI_Status sta_s; |
---|
118 | MPI_Isend(tag_label, 2, MPI_INT, remote_leader, tag, peer_comm, &req_s); |
---|
119 | |
---|
120 | MPI_Wait(&req_s, &sta_s); |
---|
121 | |
---|
122 | } |
---|
123 | } |
---|
124 | else |
---|
125 | { |
---|
126 | //! Wait for EP creation |
---|
127 | my_position = remote_num_ep + ep_rank_loc; |
---|
128 | if(ep_rank == local_leader) |
---|
129 | { |
---|
130 | MPI_Status status; |
---|
131 | MPI_Request req_r; |
---|
132 | MPI_Irecv(tag_label, 2, MPI_INT, remote_leader, tag, peer_comm, &req_r); |
---|
133 | MPI_Wait(&req_r, &status); |
---|
134 | } |
---|
135 | } |
---|
136 | |
---|
137 | MPI_Bcast(tag_label, 2, MPI_INT, local_leader, local_comm); |
---|
138 | |
---|
139 | |
---|
140 | |
---|
141 | |
---|
142 | #pragma omp critical (read_from_tag_list) |
---|
143 | { |
---|
144 | bool found = false; |
---|
145 | while(!found) |
---|
146 | { |
---|
147 | for(std::list<std::pair < std::pair<int,int>, MPI_Comm* > >::iterator iter = tag_list.begin(); iter!=tag_list.end(); iter++) |
---|
148 | { |
---|
149 | if((*iter).first == make_pair(tag_label[0], tag_label[1])) |
---|
150 | { |
---|
151 | *newintercomm = iter->second[my_position]; |
---|
152 | found = true; |
---|
153 | // tag_list.erase(iter); |
---|
154 | break; |
---|
155 | } |
---|
156 | } |
---|
157 | } |
---|
158 | } |
---|
159 | |
---|
160 | MPI_Barrier_local(local_comm); |
---|
161 | |
---|
162 | if(leader_rank_in_peer[0] < leader_rank_in_peer[1]) |
---|
163 | { |
---|
164 | for(std::list<std::pair < std::pair<int,int>, MPI_Comm* > >::iterator iter = tag_list.begin(); iter!=tag_list.end(); iter++) |
---|
165 | { |
---|
166 | if((*iter).first == make_pair(tag_label[0], tag_label[1])) |
---|
167 | { |
---|
168 | tag_list.erase(iter); |
---|
169 | break; |
---|
170 | } |
---|
171 | } |
---|
172 | } |
---|
173 | |
---|
174 | |
---|
175 | |
---|
176 | int intercomm_ep_rank, intercomm_ep_rank_loc, intercomm_mpi_rank; |
---|
177 | int intercomm_ep_size, intercomm_num_ep, intercomm_mpi_size; |
---|
178 | |
---|
179 | intercomm_ep_rank = (*newintercomm)->ep_comm_ptr->size_rank_info[0].first; |
---|
180 | intercomm_ep_rank_loc = (*newintercomm)->ep_comm_ptr->size_rank_info[1].first; |
---|
181 | intercomm_mpi_rank = (*newintercomm)->ep_comm_ptr->size_rank_info[2].first; |
---|
182 | intercomm_ep_size = (*newintercomm)->ep_comm_ptr->size_rank_info[0].second; |
---|
183 | intercomm_num_ep = (*newintercomm)->ep_comm_ptr->size_rank_info[1].second; |
---|
184 | intercomm_mpi_size = (*newintercomm)->ep_comm_ptr->size_rank_info[2].second; |
---|
185 | |
---|
186 | |
---|
187 | |
---|
188 | (*newintercomm)->ep_comm_ptr->intercomm->loc_rank_map = new RANK_MAP; |
---|
189 | (*newintercomm)->ep_comm_ptr->intercomm->remote_rank_map = new RANK_MAP; |
---|
190 | (*newintercomm)->ep_comm_ptr->intercomm->loc_rank_map->resize(local_num_ep); |
---|
191 | (*newintercomm)->ep_comm_ptr->intercomm->remote_rank_map->resize(remote_num_ep); |
---|
192 | |
---|
193 | (*newintercomm)->ep_comm_ptr->intercomm->size_rank_info[0] = local_comm->ep_comm_ptr->size_rank_info[0]; |
---|
194 | (*newintercomm)->ep_comm_ptr->intercomm->size_rank_info[1] = local_comm->ep_comm_ptr->size_rank_info[1]; |
---|
195 | (*newintercomm)->ep_comm_ptr->intercomm->size_rank_info[2] = local_comm->ep_comm_ptr->size_rank_info[2]; |
---|
196 | |
---|
197 | |
---|
198 | |
---|
199 | int local_rank_map_ele[2]; |
---|
200 | local_rank_map_ele[0] = intercomm_ep_rank; |
---|
201 | local_rank_map_ele[1] = (*newintercomm)->ep_comm_ptr->comm_label; |
---|
202 | |
---|
203 | MPI_Allgather(local_rank_map_ele, 2, MPI_INT, |
---|
204 | (*newintercomm)->ep_comm_ptr->intercomm->loc_rank_map->data(), 2, MPI_INT, local_comm); |
---|
205 | |
---|
206 | if(ep_rank == local_leader) |
---|
207 | { |
---|
208 | MPI_Status status; |
---|
209 | MPI_Request req_s, req_r; |
---|
210 | |
---|
211 | MPI_Isend((*newintercomm)->ep_comm_ptr->intercomm->loc_rank_map->data(), 2*local_num_ep, MPI_INT, remote_leader, tag, peer_comm, &req_s); |
---|
212 | MPI_Irecv((*newintercomm)->ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_num_ep, MPI_INT, remote_leader, tag, peer_comm, &req_r); |
---|
213 | |
---|
214 | |
---|
215 | MPI_Wait(&req_s, &status); |
---|
216 | MPI_Wait(&req_r, &status); |
---|
217 | |
---|
218 | } |
---|
219 | |
---|
220 | MPI_Bcast((*newintercomm)->ep_comm_ptr->intercomm->remote_rank_map->data(), 2*remote_num_ep, MPI_INT, local_leader, local_comm); |
---|
221 | //(*newintercomm)->ep_comm_ptr->intercomm->local_comm = (local_comm->ep_comm_ptr->comm_list[ep_rank_loc]); |
---|
222 | (*newintercomm)->ep_comm_ptr->intercomm->intercomm_tag = tag; |
---|
223 | |
---|
224 | |
---|
225 | return MPI_SUCCESS; |
---|
226 | } |
---|
227 | |
---|
228 | } |
---|