source: XIOS3/trunk/src/transport/legacy_context_client.cpp @ 2592

Last change on this file since 2592 was 2592, checked in by jderouillat, 7 months ago

Free communicators in legacy and one_sided transport layer

  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 16.4 KB
Line 
1#include "xios_spl.hpp"
2#include "legacy_context_client.hpp"
3#include "context_server.hpp"
4#include "event_client.hpp"
5#include "buffer_out.hpp"
6#include "buffer_client.hpp"
7#include "type.hpp"
8#include "event_client.hpp"
9#include "context.hpp"
10#include "mpi.hpp"
11#include "timer.hpp"
12#include "cxios.hpp"
13#include "server.hpp"
14#include "services.hpp"
15#include "ressources_manager.hpp"
16#include <boost/functional/hash.hpp>
17#include <random>
18#include <chrono>
19
20namespace xios
21{
22    /*!
23    \param [in] parent Pointer to context on client side
24    \param [in] intraComm_ communicator of group client
25    \param [in] interComm_ communicator of group server
26    \cxtSer [in] cxtSer Pointer to context of server side. (It is only used in case of attached mode --> obsolete).
27    */
28    CLegacyContextClient::CLegacyContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer)
29                         : CContextClient(parent, intraComm_, interComm_, cxtSer),
30                           mapBufferSize_(),  maxBufferedEvents(4)
31    {
32      pureOneSided=CXios::getin<bool>("pure_one_sided",false); // pure one sided communication (for test)
33      xios::MPI_Intercomm_merge(interComm_,false, &interCommMerged_) ;
34      CXios::getMpiGarbageCollector().registerCommunicator(interCommMerged_) ;
35      xios::MPI_Comm_split(intraComm_,clientRank,clientRank, &commSelf_) ; // for windows
36      CXios::getMpiGarbageCollector().registerCommunicator(commSelf_) ;
37      eventScheduler_ = parent->getEventScheduler() ; 
38      timeLine = 1;
39    }
40
41    CContextClient::ETransport getType(void) {return CContextClient::legacy ;}
42
43    /*!
44    \param [in] event Event sent to server
45    */
46    void CLegacyContextClient::sendEvent(CEventClient& event)
47    {
48      list<int> ranks = event.getRanks();
49 
50//      ostringstream str ;
51//      for(auto& rank : ranks) str<<rank<<" ; " ;
52//      info(100)<<"Event "<<timeLine<<" of context "<<context_->getId()<<"  for ranks : "<<str.str()<<endl ;
53
54      if (CXios::checkEventSync)
55      {
56        int typeId, classId, typeId_in, classId_in;
57        long long timeLine_out;
58        long long timeLine_in( timeLine );
59        typeId_in=event.getTypeId() ;
60        classId_in=event.getClassId() ;
61//        MPI_Allreduce(&timeLine,&timeLine_out, 1, MPI_UINT64_T, MPI_SUM, intraComm) ; // MPI_UINT64_T standardized by MPI 3
62        MPI_Allreduce(&timeLine_in,&timeLine_out, 1, MPI_LONG_LONG_INT, MPI_SUM, intraComm) ; 
63        MPI_Allreduce(&typeId_in,&typeId, 1, MPI_INT, MPI_SUM, intraComm) ;
64        MPI_Allreduce(&classId_in,&classId, 1, MPI_INT, MPI_SUM, intraComm) ;
65        if (typeId/clientSize!=event.getTypeId() || classId/clientSize!=event.getClassId() || timeLine_out/clientSize!=timeLine)
66        {
67           ERROR("void CLegacyContextClient::sendEvent(CEventClient& event)",
68               << "Event are not coherent between client for timeline = "<<timeLine);
69        }
70       
71        vector<int> servers(serverSize,0) ;
72        auto ranks=event.getRanks() ;
73        for(auto& rank : ranks) servers[rank]=1 ;
74        MPI_Allreduce(MPI_IN_PLACE, servers.data(), serverSize,MPI_INT,MPI_SUM,intraComm) ;
75        ostringstream osstr ;
76        for(int i=0;i<serverSize;i++)  if (servers[i]==0) osstr<<i<<" , " ;
77        if (!osstr.str().empty())
78        {
79          ERROR("void CLegacyContextClient::sendEvent(CEventClient& event)",
80                 <<" Some servers will not receive the message for timeline = "<<timeLine<<endl
81                 <<"Servers are : "<<osstr.str()) ;
82        }
83
84
85      }
86
87      if (!event.isEmpty())
88      {
89        list<int> sizes = event.getSizes();
90
91         // We force the getBuffers call to be non-blocking on classical servers
92        list<CBufferOut*> buffList;
93        getBuffers(timeLine, ranks, sizes, buffList) ;
94
95        event.send(timeLine, sizes, buffList);
96       
97        //for (auto itRank = ranks.begin(); itRank != ranks.end(); itRank++) buffers[*itRank]->infoBuffer() ;
98
99        unlockBuffers(ranks) ;
100        checkBuffers(ranks);
101       
102      }
103     
104      synchronize() ;
105      timeLine++;
106    }
107
108
109    /*!
110     * Get buffers for each connection to the servers. This function blocks until there is enough room in the buffers unless
111     * it is explicitly requested to be non-blocking.
112     *
113     *
114     * \param [in] timeLine time line of the event which will be sent to servers
115     * \param [in] serverList list of rank of connected server
116     * \param [in] sizeList size of message corresponding to each connection
117     * \param [out] retBuffers list of buffers that can be used to store an event
118     * \param [in] nonBlocking whether this function should be non-blocking
119     * \return whether the already allocated buffers could be used
120    */
121    void CLegacyContextClient::getBuffers(const size_t timeLine, const list<int>& serverList, const list<int>& sizeList, list<CBufferOut*>& retBuffers)
122    {
123      list<int>::const_iterator itServer, itSize;
124      list<CClientBuffer*> bufferList;
125      map<int,CClientBuffer*>::const_iterator it;
126      list<CClientBuffer*>::iterator itBuffer;
127      bool areBuffersFree;
128/*     
129      for (itServer = serverList.begin(); itServer != serverList.end(); itServer++)
130      {
131        it = buffers.find(*itServer);
132        if (it == buffers.end())
133        {
134          CTokenManager* tokenManager = CXios::getRessourcesManager()->getTokenManager() ;
135          size_t token = tokenManager->getToken() ;
136          while (!tokenManager->checkToken(token)) callGlobalEventLoop() ;
137          newBuffer(*itServer);
138          it = buffers.find(*itServer);
139          checkAttachWindows(it->second,it->first) ;
140          tokenManager->updateToken(token) ;
141        }
142        bufferList.push_back(it->second);
143      }
144*/
145      map<int,MPI_Request> attachList ;
146     
147      for (itServer = serverList.begin(); itServer != serverList.end(); itServer++)
148      {
149        it = buffers.find(*itServer);
150        if (it == buffers.end())
151        {
152          newBuffer(*itServer);
153          it = buffers.find(*itServer);
154          checkAttachWindows(it->second, it->first, attachList) ;
155        }
156        bufferList.push_back(it->second);
157      }
158     
159      while(!attachList.empty())
160      {
161        auto it = attachList.begin() ;
162        while(it!=attachList.end())
163        {
164          if (checkAttachWindows(buffers[it->first], it->first, attachList)) it=attachList.erase(it) ;
165          else ++it ;
166        }
167
168        yield() ;
169      }
170
171
172      double lastTimeBuffersNotFree=0. ;
173      double time ;
174      bool doUnlockBuffers ;
175      CTimer::get("Blocking time").resume();
176      do
177      {
178        areBuffersFree = true;
179        doUnlockBuffers=false ;
180        time=MPI_Wtime() ;
181        if (time-lastTimeBuffersNotFree > latency_)
182        {
183          for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
184          {
185            areBuffersFree &= (*itBuffer)->isBufferFree(*itSize);
186          }
187          if (!areBuffersFree)
188          {
189            lastTimeBuffersNotFree = time ;
190            doUnlockBuffers=true ;
191          }         
192        }
193        else areBuffersFree = false ;
194
195        if (!areBuffersFree)
196        {
197          if (doUnlockBuffers) for (itBuffer = bufferList.begin(); itBuffer != bufferList.end(); itBuffer++) (*itBuffer)->unlockBuffer();
198          checkBuffers();
199
200          yield() ;
201        }
202
203      } while (!areBuffersFree);
204      CTimer::get("Blocking time").suspend();
205
206      for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
207        retBuffers.push_back((*itBuffer)->getBuffer(timeLine, *itSize));
208   }
209
210
211   bool CLegacyContextClient::checkAttachWindows(CClientBuffer* buffer, int rank, map<int, MPI_Request>& attachList)
212   {
213      int dummy;
214      bool ret=true; 
215
216      if (!buffer->isAttachedWindows())
217      {
218           // create windows dynamically for one-sided
219          /*
220          CTimer::get("create Windows").resume() ;
221          MPI_Comm interComm ;
222          int tag = 0 ;
223          xios::MPI_Intercomm_create(commSelf_, 0, interCommMerged_, clientSize+rank, tag, &interComm) ;
224          xios::MPI_Intercomm_merge(interComm, false, &winComm_[rank]) ;
225          xios::MPI_Comm_free(&interComm) ;
226               
227          buffer->attachWindows(winComm_[rank]) ;
228          CXios::getMpiGarbageCollector().registerCommunicator(winComm_[rank]) ;
229          MPI_Barrier(winComm_[rank]) ;
230        */
231        if (attachList.count(rank)==0) 
232        {
233          MPI_Irecv(&dummy,0,MPI_INT,clientSize+rank, 21, interCommMerged_, &attachList[rank]) ;
234          ret = false ;
235        }
236        else
237        {
238          MPI_Status status ;
239          int flag ;
240          MPI_Test(&attachList[rank],&flag, &status) ;
241          if (flag)
242          {
243            CTimer::get("create Windows").resume() ;
244            MPI_Comm interComm ;
245            int tag = 0 ;
246            xios::MPI_Intercomm_create(commSelf_, 0, interCommMerged_, clientSize+rank, tag, &interComm) ;
247            xios::MPI_Intercomm_merge(interComm, false, &winComm_[rank]) ;
248            xios::MPI_Comm_free(&interComm) ;
249             
250            buffer->attachWindows(winComm_[rank]) ;
251            CXios::getMpiGarbageCollector().registerCommunicator(winComm_[rank]) ;
252            MPI_Barrier(winComm_[rank]) ;
253            ret = true ;
254          }
255          else ret=false ;
256        }
257      }
258      return ret ;
259    }
260
261
262   void CLegacyContextClient::eventLoop(void)
263   {
264      if (!locked_) checkBuffers() ;
265   }
266
267   void CLegacyContextClient::callGlobalEventLoop(void)
268   {
269     locked_=true ;
270     context_->yield() ;
271     locked_=false ;
272   }
273
274   void CLegacyContextClient::yield(void)
275   {
276     locked_=true ;
277     context_->yield() ;
278     locked_=false ;
279   }
280
281   void CLegacyContextClient::synchronize(void)
282   {
283     if (context_->getServiceType()!=CServicesManager::CLIENT)
284     {
285       locked_=true ;
286       context_->synchronize() ;
287       locked_=false ;
288     }   
289   }
290   /*!
291   Make a new buffer for a certain connection to server with specific rank
292   \param [in] rank rank of connected server
293   */
294   void CLegacyContextClient::newBuffer(int rank)
295   {
296      if (!mapBufferSize_.count(rank))
297      {
298        error(0) << "WARNING: Unexpected request for buffer to communicate with server " << rank << std::endl;
299        mapBufferSize_[rank] = CXios::minBufferSize;
300        maxEventSizes[rank] = CXios::minBufferSize;
301      }
302      bool hasWindows = true ;
303      CClientBuffer* buffer = buffers[rank] = new CClientBuffer(interCommMerged_, clientSize+rank, mapBufferSize_[rank], hasWindows);
304      if (isGrowableBuffer_) buffer->setGrowableBuffer(1.2) ;
305      else buffer->fixBuffer() ;
306      // Notify the server
307     
308      CBufferOut* bufOut = buffer->getBuffer(0, 4*sizeof(MPI_Aint));
309      MPI_Aint sendBuff[4] ;
310      sendBuff[0]=hashId_;
311      sendBuff[1]=mapBufferSize_[rank];
312      sendBuff[2]=buffers[rank]->getWinBufferAddress(0); 
313      sendBuff[3]=buffers[rank]->getWinBufferAddress(1); 
314      info(100)<<"CLegacyContextClient::newBuffer : rank "<<rank<<" winAdress[0] "<<buffers[rank]->getWinBufferAddress(0)<<" winAdress[1] "<<buffers[rank]->getWinBufferAddress(1)<<endl;
315      bufOut->put(sendBuff,4); 
316      buffer->checkBuffer(true);
317
318   }
319
320 
321 
322   /*!
323   Verify state of buffers. Buffer is under pending state if there is no message on it
324   \return state of buffers, pending(true), ready(false)
325   */
326   bool CLegacyContextClient::checkBuffers(void)
327   {
328      map<int,CClientBuffer*>::iterator itBuff;
329      bool pending = false;
330      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
331        pending |= itBuff->second->checkBuffer(!pureOneSided);
332      return pending;
333   }
334
335   //! Release all buffers
336   void CLegacyContextClient::releaseBuffers()
337   {
338      map<int,CClientBuffer*>::iterator itBuff;
339      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
340      {
341         delete itBuff->second;
342      }
343      buffers.clear();
344
345      for(auto& it : winComm_)
346      {
347        int rank = it.first ;
348      }
349   }
350
351     
352  /*!
353   Lock the buffers for one sided communications
354   \param [in] ranks list rank of server to which client connects to
355   */
356   void CLegacyContextClient::lockBuffers(list<int>& ranks)
357   {
358      list<int>::iterator it;
359      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->lockBuffer();
360   }
361
362  /*!
363   Unlock the buffers for one sided communications
364   \param [in] ranks list rank of server to which client connects to
365   */
366   void CLegacyContextClient::unlockBuffers(list<int>& ranks)
367   {
368      list<int>::iterator it;
369      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->unlockBuffer();
370   }
371     
372   /*!
373   Verify state of buffers corresponding to a connection
374   \param [in] ranks list rank of server to which client connects to
375   \return state of buffers, pending(true), ready(false)
376   */
377   bool CLegacyContextClient::checkBuffers(list<int>& ranks)
378   {
379      list<int>::iterator it;
380      bool pending = false;
381      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->checkBuffer(!pureOneSided);
382      return pending;
383   }
384
385   /*!
386    * Set the buffer size for each connection. Warning: This function is collective.
387    *
388    * \param [in] mapSize maps the rank of the connected servers to the size of the correspoinding buffer
389    * \param [in] maxEventSize maps the rank of the connected servers to the size of the biggest event
390   */
391   void CLegacyContextClient::setBufferSize(const std::map<int,StdSize>& mapSize)
392   {
393     setFixedBuffer() ;
394     for(auto& it : mapSize)
395     {
396      size_t size=std::max(CXios::minBufferSize*1.0,std::min(it.second*CXios::bufferSizeFactor*1.01,CXios::maxBufferSize*1.0)) ;
397      mapBufferSize_[it.first]=size ;
398      if (buffers.count(it.first)>0) buffers[it.first]->fixBufferSize(size);
399     }
400   }
401
402   /*!
403   * Finalize context client and do some reports. Function is non-blocking.
404   */
405  void CLegacyContextClient::finalize(void)
406  {
407    map<int,CClientBuffer*>::iterator itBuff;
408    std::list<int>::iterator ItServerLeader; 
409   
410    bool stop = false;
411
412    int* nbServerConnectionLocal  = new int[serverSize] ;
413    int* nbServerConnectionGlobal  = new int[serverSize] ;
414    for(int i=0;i<serverSize;++i) nbServerConnectionLocal[i]=0 ;
415    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)  nbServerConnectionLocal[itBuff->first]=1 ;
416    for (ItServerLeader = ranksServerLeader.begin(); ItServerLeader != ranksServerLeader.end(); ItServerLeader++)  nbServerConnectionLocal[*ItServerLeader]=1 ;
417   
418    MPI_Allreduce(nbServerConnectionLocal, nbServerConnectionGlobal, serverSize, MPI_INT, MPI_SUM, intraComm);
419   
420    CEventClient event(CContext::GetType(), CContext::EVENT_ID_CONTEXT_FINALIZE);
421    CMessage msg;
422
423    for (int i=0;i<serverSize;++i) if (nbServerConnectionLocal[i]==1) event.push(i, nbServerConnectionGlobal[i], msg) ;
424    sendEvent(event);
425
426    delete[] nbServerConnectionLocal ;
427    delete[] nbServerConnectionGlobal ;
428
429
430    CTimer::get("Blocking time").resume();
431    checkBuffers();
432    CTimer::get("Blocking time").suspend();
433
434    std::map<int,StdSize>::const_iterator itbMap = mapBufferSize_.begin(),
435                                          iteMap = mapBufferSize_.end(), itMap;
436
437    StdSize totalBuf = 0;
438    for (itMap = itbMap; itMap != iteMap; ++itMap)
439    {
440      report(10) << " Memory report : Context <" << context_->getId() << "> : client side : memory used for buffer of each connection to server" << endl
441                 << "  +) To server with rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
442      totalBuf += itMap->second;
443    }
444    report(0) << " Memory report : Context <" << context_->getId() << "> : client side : total memory used for buffer " << totalBuf << " bytes" << endl;
445
446  }
447
448
449  /*!
450  */
451  bool CLegacyContextClient::havePendingRequests(void)
452  {
453    bool pending = false;
454    map<int,CClientBuffer*>::iterator itBuff;
455    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
456      pending |= itBuff->second->hasPendingRequest();
457    return pending;
458  }
459 
460  bool CLegacyContextClient::havePendingRequests(list<int>& ranks)
461  {
462      list<int>::iterator it;
463      bool pending = false;
464      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->hasPendingRequest();
465      return pending;
466  }
467
468  bool CLegacyContextClient::isNotifiedFinalized(void)
469  {
470    bool finalized = true;
471    map<int,CClientBuffer*>::iterator itBuff;
472    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
473      finalized &= itBuff->second->isNotifiedFinalized();
474    return finalized;
475  }
476
477}
Note: See TracBrowser for help on using the repository browser.