1 | #include "event_scheduler.hpp" |
---|
2 | #include "xios_spl.hpp" |
---|
3 | #include "mpi.hpp" |
---|
4 | #include "tracer.hpp" |
---|
5 | #include "cxios.hpp" |
---|
6 | |
---|
7 | namespace xios |
---|
8 | { |
---|
9 | |
---|
10 | |
---|
11 | CEventScheduler::CEventScheduler(const MPI_Comm& comm) |
---|
12 | { |
---|
13 | schedulerLevel_=0 ; |
---|
14 | parentScheduler_.reset(); |
---|
15 | childScheduler_.reset(); |
---|
16 | initialize(comm) ; |
---|
17 | } |
---|
18 | |
---|
19 | CEventScheduler::CEventScheduler(const MPI_Comm& comm, size_t schedulerLevel) |
---|
20 | { |
---|
21 | schedulerLevel_=schedulerLevel ; |
---|
22 | parentScheduler_.reset(); |
---|
23 | childScheduler_.reset(); |
---|
24 | initialize(comm) ; |
---|
25 | } |
---|
26 | |
---|
27 | void CEventScheduler::initialize(const MPI_Comm& comm) |
---|
28 | { |
---|
29 | xios::MPI_Comm_dup(comm, &communicator_) ; |
---|
30 | CXios::getMpiGarbageCollector().registerCommunicator(communicator_) ; |
---|
31 | MPI_Comm_size(communicator_,&mpiSize_) ; |
---|
32 | MPI_Comm_rank(communicator_,&mpiRank_); |
---|
33 | |
---|
34 | |
---|
35 | int maxChild=1 ; |
---|
36 | |
---|
37 | int m ; |
---|
38 | do |
---|
39 | { |
---|
40 | m=1 ; |
---|
41 | maxChild=maxChild+1 ; |
---|
42 | for(int i=0;i<maxChild;i++) m=m*maxChild ; |
---|
43 | } while(m<mpiSize_) ; |
---|
44 | |
---|
45 | |
---|
46 | int maxLevel=0 ; |
---|
47 | for(int size=1; size<=mpiSize_; size*=maxChild) maxLevel++ ; |
---|
48 | |
---|
49 | int begin, end, nb ; |
---|
50 | int pos, n ; |
---|
51 | |
---|
52 | parent_=vector<int>(maxLevel+1) ; |
---|
53 | child_=vector<vector<int> >(maxLevel+1,vector<int>(maxChild)) ; |
---|
54 | nbChild_=vector<int> (maxLevel+1) ; |
---|
55 | |
---|
56 | level_=0 ; |
---|
57 | begin=0 ; |
---|
58 | end=mpiSize_-1 ; |
---|
59 | nb=end-begin+1 ; |
---|
60 | |
---|
61 | do |
---|
62 | { |
---|
63 | n=0 ; |
---|
64 | pos=begin ; |
---|
65 | nbChild_[level_]=0 ; |
---|
66 | parent_[level_+1]=begin ; |
---|
67 | for(int i=0;i<maxChild && i<nb ;i++) |
---|
68 | { |
---|
69 | if (i<nb%maxChild) n = nb/maxChild + 1 ; |
---|
70 | else n = nb/maxChild ; |
---|
71 | |
---|
72 | if (mpiRank_>=pos && mpiRank_<pos+n) |
---|
73 | { |
---|
74 | begin=pos ; |
---|
75 | end=pos+n-1 ; |
---|
76 | } |
---|
77 | child_[level_][i]=pos ; |
---|
78 | pos=pos+n ; |
---|
79 | nbChild_[level_]++ ; |
---|
80 | } |
---|
81 | nb=end-begin+1 ; |
---|
82 | level_=level_+1 ; |
---|
83 | } while (nb>1) ; |
---|
84 | |
---|
85 | |
---|
86 | } |
---|
87 | |
---|
88 | CEventScheduler::~CEventScheduler() |
---|
89 | { |
---|
90 | while (!pendingSentParentRequest_.empty() || !pendingRecvParentRequest_.empty() || !pendingRecvChildRequest_.empty() || !pendingSentChildRequest_.empty()) |
---|
91 | { |
---|
92 | checkEvent_() ; |
---|
93 | } |
---|
94 | } |
---|
95 | |
---|
96 | void CEventScheduler::cleanSplitSchedulers() |
---|
97 | { |
---|
98 | // Cleaning is operated recursively going from parent to child |
---|
99 | if (parentScheduler_) |
---|
100 | { |
---|
101 | if (parentScheduler_->childScheduler_.get() == this) |
---|
102 | { |
---|
103 | parentScheduler_.reset(); |
---|
104 | } |
---|
105 | else // if orphan (due to splitScheduler) : clean parent tree (it does not have child) |
---|
106 | { |
---|
107 | parentScheduler_->cleanSplitSchedulers(); |
---|
108 | parentScheduler_.reset(); |
---|
109 | } |
---|
110 | } |
---|
111 | if (childScheduler_) |
---|
112 | { |
---|
113 | childScheduler_->cleanSplitSchedulers(); |
---|
114 | childScheduler_.reset(); |
---|
115 | } |
---|
116 | } |
---|
117 | |
---|
118 | void CEventScheduler::splitScheduler(const MPI_Comm& splittedComm, shared_ptr<CEventScheduler>& parent, shared_ptr<CEventScheduler>& child) |
---|
119 | { |
---|
120 | int color ; |
---|
121 | MPI_Comm newComm ; |
---|
122 | child = make_shared<CEventScheduler>(splittedComm, schedulerLevel_+ 1) ; |
---|
123 | if (child->isRoot()) color=1 ; |
---|
124 | else color=0 ; |
---|
125 | xios::MPI_Comm_split(communicator_, color, mpiRank_, &newComm) ; |
---|
126 | CXios::getMpiGarbageCollector().registerCommunicator(newComm) ; |
---|
127 | |
---|
128 | parent = make_shared<CEventScheduler>(newComm , schedulerLevel_) ; |
---|
129 | child->setParentScheduler(parent) ; |
---|
130 | parent->setChildScheduler(child) ; |
---|
131 | if (parentScheduler_) |
---|
132 | { |
---|
133 | parentScheduler_->setChildScheduler(parent) ; |
---|
134 | parent->setParentScheduler(parentScheduler_) ; |
---|
135 | } |
---|
136 | |
---|
137 | } |
---|
138 | |
---|
139 | void CEventScheduler::registerEvent(const size_t timeLine, const size_t contextHashId) |
---|
140 | { |
---|
141 | getBaseScheduler()->registerEvent(timeLine, contextHashId, schedulerLevel_) ; |
---|
142 | checkEvent_() ; |
---|
143 | } |
---|
144 | |
---|
145 | void CEventScheduler::registerEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel) |
---|
146 | { |
---|
147 | registerEvent(timeLine, contextHashId, schedulerLevel, level_) ; |
---|
148 | checkEvent_() ; |
---|
149 | } |
---|
150 | |
---|
151 | void CEventScheduler::registerEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel, const size_t lev) |
---|
152 | { |
---|
153 | |
---|
154 | traceOff() ; |
---|
155 | SPendingRequest* sentRequest=new SPendingRequest ; |
---|
156 | sentRequest->buffer[0]=timeLine ; |
---|
157 | sentRequest->buffer[1]=contextHashId ; |
---|
158 | sentRequest->buffer[2]=schedulerLevel ; |
---|
159 | sentRequest->buffer[3]=lev-1 ; |
---|
160 | |
---|
161 | pendingSentParentRequest_.push(sentRequest) ; |
---|
162 | // info(100)<<"CEventScheduler::registerEvent => send event to parent "<<parent_[lev]<<" of level" <<lev-1<<endl ; |
---|
163 | MPI_Isend(sentRequest->buffer,4, MPI_UNSIGNED_LONG, parent_[lev], 0, communicator_, &sentRequest->request) ; |
---|
164 | traceOn() ; |
---|
165 | } |
---|
166 | |
---|
167 | |
---|
168 | bool CEventScheduler::queryEvent_(const size_t timeLine, const size_t contextHashId) |
---|
169 | { |
---|
170 | checkEvent_() ; |
---|
171 | |
---|
172 | if (! eventStack_.empty() && eventStack_.front().first==timeLine && eventStack_.front().second==contextHashId) |
---|
173 | { |
---|
174 | return true ; |
---|
175 | } |
---|
176 | else return false ; |
---|
177 | } |
---|
178 | |
---|
179 | void CEventScheduler::checkEvent_(void) |
---|
180 | { |
---|
181 | |
---|
182 | if (parentScheduler_) parentScheduler_->checkEvent_() ; |
---|
183 | traceOff() ; |
---|
184 | checkChildRequest() ; |
---|
185 | checkParentRequest() ; |
---|
186 | traceOn() ; |
---|
187 | |
---|
188 | } |
---|
189 | |
---|
190 | void CEventScheduler::checkParentRequest(void) |
---|
191 | { |
---|
192 | int completed ; |
---|
193 | MPI_Status status ; |
---|
194 | int received ; |
---|
195 | SPendingRequest* recvRequest ; |
---|
196 | completed=true ; |
---|
197 | |
---|
198 | // check sent request to parent |
---|
199 | while (! pendingSentParentRequest_.empty() && completed) |
---|
200 | { |
---|
201 | MPI_Test( & pendingSentParentRequest_.front()->request, &completed, &status) ; |
---|
202 | if (completed) |
---|
203 | { |
---|
204 | delete pendingSentParentRequest_.front() ; |
---|
205 | pendingSentParentRequest_.pop() ; |
---|
206 | } |
---|
207 | } |
---|
208 | |
---|
209 | // probe if a message is coming from parent |
---|
210 | received=true ; |
---|
211 | while(received) |
---|
212 | { |
---|
213 | MPI_Iprobe(MPI_ANY_SOURCE,1,communicator_,&received, &status) ; |
---|
214 | if (received) |
---|
215 | { |
---|
216 | recvRequest=new SPendingRequest ; |
---|
217 | MPI_Irecv(recvRequest->buffer, 4, MPI_UNSIGNED_LONG, MPI_ANY_SOURCE, 1, communicator_, &(recvRequest->request)) ; |
---|
218 | pendingRecvParentRequest_.push(recvRequest) ; |
---|
219 | } |
---|
220 | } |
---|
221 | |
---|
222 | // check sent request from parent |
---|
223 | completed=true ; |
---|
224 | while (! pendingRecvParentRequest_.empty() && completed) |
---|
225 | { |
---|
226 | recvRequest=pendingRecvParentRequest_.front() ; |
---|
227 | MPI_Test( &(recvRequest->request), &completed, &status) ; |
---|
228 | |
---|
229 | if (completed) |
---|
230 | { |
---|
231 | size_t timeLine=recvRequest->buffer[0] ; |
---|
232 | size_t hashId=recvRequest->buffer[1] ; |
---|
233 | size_t schedulerLevel=recvRequest->buffer[2] ; |
---|
234 | size_t lev=recvRequest->buffer[3] ; |
---|
235 | delete recvRequest ; |
---|
236 | pendingRecvParentRequest_.pop() ; |
---|
237 | |
---|
238 | // info(100)<<"CEventScheduler::checkParentRequest => receive event from parent "<< status.MPI_SOURCE<<"at level"<< lev<< endl ; |
---|
239 | |
---|
240 | if (lev==level_) |
---|
241 | { |
---|
242 | if (childScheduler_) |
---|
243 | { |
---|
244 | // info(100)<<"CEventScheduler::checkParentRequest => bcast event to child scheduler "<<endl; |
---|
245 | childScheduler_->bcastEvent(timeLine, hashId, schedulerLevel, 0) ; |
---|
246 | } |
---|
247 | else |
---|
248 | { |
---|
249 | // info(100)<<"CEventScheduler::checkParentRequest => put event to stack : timeLine : "<<timeLine<<" hashId : "<<hashId<<endl; |
---|
250 | eventStack_.push(pair<size_t,size_t>(timeLine,hashId)) ; |
---|
251 | } |
---|
252 | } |
---|
253 | else |
---|
254 | { |
---|
255 | // info(100)<<"CEventScheduler::checkParentRequest => bcast event to child process "<<endl; |
---|
256 | bcastEvent(timeLine, hashId, schedulerLevel, lev) ; |
---|
257 | } |
---|
258 | } |
---|
259 | } |
---|
260 | |
---|
261 | } |
---|
262 | |
---|
263 | void CEventScheduler::checkChildRequest(void) |
---|
264 | { |
---|
265 | // function call only by parent mpi process |
---|
266 | |
---|
267 | MPI_Status status ; |
---|
268 | int received ; |
---|
269 | received=true ; |
---|
270 | SPendingRequest* recvRequest ; |
---|
271 | |
---|
272 | // check for posted requests and make the corresponding receive |
---|
273 | while(received) |
---|
274 | { |
---|
275 | MPI_Iprobe(MPI_ANY_SOURCE,0,communicator_,&received, &status) ; |
---|
276 | if (received) |
---|
277 | { |
---|
278 | recvRequest=new SPendingRequest ; |
---|
279 | MPI_Irecv(recvRequest->buffer, 4, MPI_UNSIGNED_LONG, MPI_ANY_SOURCE, 0, communicator_, &recvRequest->request) ; |
---|
280 | pendingRecvChildRequest_.push_back(recvRequest) ; |
---|
281 | } |
---|
282 | } |
---|
283 | |
---|
284 | // check if receive request is achieved |
---|
285 | |
---|
286 | for(list<SPendingRequest*>::iterator it=pendingRecvChildRequest_.begin(); it!=pendingRecvChildRequest_.end() ; ) |
---|
287 | { |
---|
288 | MPI_Test(&((*it)->request),&received,&status) ; |
---|
289 | if (received) |
---|
290 | { |
---|
291 | size_t timeLine=(*it)->buffer[0] ; |
---|
292 | size_t hashId=(*it)->buffer[1] ; |
---|
293 | size_t schedulerLevel=(*it)->buffer[2] ; |
---|
294 | size_t lev=(*it)->buffer[3] ; |
---|
295 | |
---|
296 | // info(100)<<"CEventScheduler::checkChildRequest => received event from child "<<status.MPI_SOURCE<<" at level "<<lev<<endl; |
---|
297 | |
---|
298 | SEvent event={timeLine, hashId, schedulerLevel, lev} ; |
---|
299 | delete *it ; // free mem |
---|
300 | it=pendingRecvChildRequest_.erase(it) ; // get out of the list |
---|
301 | |
---|
302 | map< SEvent,int>::iterator itEvent=recvEvent_.find(event) ; |
---|
303 | if (itEvent==recvEvent_.end()) |
---|
304 | { |
---|
305 | itEvent=(recvEvent_.insert(pair< SEvent ,int > (event,1))).first ; |
---|
306 | |
---|
307 | } |
---|
308 | else (itEvent->second)++ ; |
---|
309 | if (itEvent->second==nbChild_[lev]) |
---|
310 | { |
---|
311 | if (lev==0) |
---|
312 | { |
---|
313 | if (schedulerLevel==schedulerLevel_) |
---|
314 | { |
---|
315 | // info(100)<<"CEventScheduler::checkChildRequest => bcastEvent to child"<<endl ; |
---|
316 | bcastEvent(timeLine, hashId, schedulerLevel, lev) ; |
---|
317 | } |
---|
318 | else |
---|
319 | { |
---|
320 | // info(100)<<"CEventScheduler::checkChildRequest => register event to parent scheduler"<<endl ; |
---|
321 | parentScheduler_->registerEvent(timeLine, hashId, schedulerLevel) ; |
---|
322 | } |
---|
323 | recvEvent_.erase(itEvent) ; |
---|
324 | } |
---|
325 | else |
---|
326 | { |
---|
327 | // info(100)<<"CEventScheduler::checkChildRequest => register event to parent process"<<endl ; |
---|
328 | registerEvent( timeLine,hashId, schedulerLevel, lev) ; |
---|
329 | recvEvent_.erase(itEvent) ; |
---|
330 | } |
---|
331 | } |
---|
332 | } |
---|
333 | else ++it ; |
---|
334 | } |
---|
335 | |
---|
336 | // check if bcast request is achieved |
---|
337 | |
---|
338 | for(list<SPendingRequest*>::iterator it=pendingSentChildRequest_.begin(); it!=pendingSentChildRequest_.end() ; ) |
---|
339 | { |
---|
340 | MPI_Test(&(*it)->request,&received,&status) ; |
---|
341 | if (received) |
---|
342 | { |
---|
343 | delete *it ; // free memory |
---|
344 | it = pendingSentChildRequest_.erase(it) ; // get out of the list |
---|
345 | |
---|
346 | } |
---|
347 | else ++it ; |
---|
348 | |
---|
349 | } |
---|
350 | } |
---|
351 | |
---|
352 | void CEventScheduler::bcastEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel, const size_t lev) |
---|
353 | { |
---|
354 | SPendingRequest* sentRequest ; |
---|
355 | |
---|
356 | |
---|
357 | for(int i=0; i<nbChild_[lev];i++) |
---|
358 | { |
---|
359 | sentRequest=new SPendingRequest ; |
---|
360 | sentRequest->buffer[0]=timeLine ; |
---|
361 | sentRequest->buffer[1]=contextHashId ; |
---|
362 | sentRequest->buffer[2]=schedulerLevel ; |
---|
363 | sentRequest->buffer[3]=lev+1 ; |
---|
364 | MPI_Isend(sentRequest->buffer,4, MPI_UNSIGNED_LONG, child_[lev][i], 1, communicator_, & sentRequest->request) ; |
---|
365 | pendingSentChildRequest_.push_back(sentRequest) ; |
---|
366 | } |
---|
367 | } |
---|
368 | |
---|
369 | |
---|
370 | } |
---|