source: XIOS/dev/dev_ym/XIOS_COUPLING/src/context_client.cpp @ 2222

Last change on this file since 2222 was 2222, checked in by ymipsl, 5 months ago

Fix problem at MPI dynamic window creation with intelMPI :
Freeing communicator before using windows make a crash. Bug of intelMPI library or not ? Work with OpenMPI

YM

  • Property copyright set to
    Software name : XIOS (Xml I/O Server)
    http://forge.ipsl.jussieu.fr/ioserver
    Creation date : January 2009
    Licence : CeCCIL version2
    see license file in root directory : Licence_CeCILL_V2-en.txt
    or http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
    Holder : CEA/LSCE (Laboratoire des Sciences du CLimat et de l'Environnement)
    CNRS/IPSL (Institut Pierre Simon Laplace)
    Project Manager : Yann Meurdesoif
    yann.meurdesoif@cea.fr
  • Property svn:eol-style set to native
File size: 17.2 KB
Line 
1#include "xios_spl.hpp"
2#include "context_client.hpp"
3#include "context_server.hpp"
4#include "event_client.hpp"
5#include "buffer_out.hpp"
6#include "buffer_client.hpp"
7#include "type.hpp"
8#include "event_client.hpp"
9#include "context.hpp"
10#include "mpi.hpp"
11#include "timer.hpp"
12#include "cxios.hpp"
13#include "server.hpp"
14#include "services.hpp"
15#include <boost/functional/hash.hpp>
16#include <random>
17#include <chrono>
18
19namespace xios
20{
21    /*!
22    \param [in] parent Pointer to context on client side
23    \param [in] intraComm_ communicator of group client
24    \param [in] interComm_ communicator of group server
25    \cxtSer [in] cxtSer Pointer to context of server side. (It is only used in case of attached mode).
26    */
27    CContextClient::CContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer)
28     : mapBufferSize_(), parentServer(cxtSer), maxBufferedEvents(4), associatedServer_(nullptr)
29    {
30     
31      context_ = parent;
32      intraComm = intraComm_;
33      interComm = interComm_;
34      MPI_Comm_rank(intraComm, &clientRank);
35      MPI_Comm_size(intraComm, &clientSize);
36
37      int flag;
38      MPI_Comm_test_inter(interComm, &flag);
39      if (flag) isAttached_=false ;
40      else  isAttached_=true ;
41
42      pureOneSided=CXios::getin<bool>("pure_one_sided",false); // pure one sided communication (for test)
43      if (isAttachedModeEnabled()) pureOneSided=false ; // no one sided in attach mode
44     
45
46
47      if (flag) MPI_Comm_remote_size(interComm, &serverSize);
48      else  MPI_Comm_size(interComm, &serverSize);
49
50      computeLeader(clientRank, clientSize, serverSize, ranksServerLeader, ranksServerNotLeader);
51
52      if (flag) MPI_Intercomm_merge(interComm_,false,&interCommMerged) ;
53     
54      if (!isAttachedModeEnabled())
55      { 
56        windows.resize(serverSize) ;
57        MPI_Comm winComm ;
58        for(int rank=0; rank<serverSize; rank++)
59        {
60          windows[rank].resize(2) ;
61          MPI_Comm_split(interCommMerged, rank, clientRank, &winComm);
62          int myRank ;
63          MPI_Comm_rank(winComm,&myRank);
64          MPI_Win_create_dynamic(MPI_INFO_NULL, winComm, &windows[rank][0]);
65          MPI_Win_create_dynamic(MPI_INFO_NULL, winComm, &windows[rank][1]);
66//       ym : Warning : intelMPI doesn't support that communicator of windows be deallocated before the windows deallocation, crash at MPI_Win_lock
67//            Bug or not ?         
68//        MPI_Comm_free(&winComm) ;
69        }
70      }
71
72      MPI_Comm_split(intraComm_,clientRank,clientRank, &commSelf) ;
73
74      auto time=chrono::system_clock::now().time_since_epoch().count() ;
75      std::default_random_engine rd(time); // not reproducible from a run to another
76      std::uniform_int_distribution<size_t> dist;
77      hashId_=dist(rd) ;
78      MPI_Bcast(&hashId_,1,MPI_SIZE_T,0,intraComm) ; // Bcast to all server of the context
79
80      timeLine = 1;
81    }
82
83    void CContextClient::computeLeader(int clientRank, int clientSize, int serverSize,
84                                       std::list<int>& rankRecvLeader,
85                                       std::list<int>& rankRecvNotLeader)
86    {
87      if ((0 == clientSize) || (0 == serverSize)) return;
88
89      if (clientSize < serverSize)
90      {
91        int serverByClient = serverSize / clientSize;
92        int remain = serverSize % clientSize;
93        int rankStart = serverByClient * clientRank;
94
95        if (clientRank < remain)
96        {
97          serverByClient++;
98          rankStart += clientRank;
99        }
100        else
101          rankStart += remain;
102
103        for (int i = 0; i < serverByClient; i++)
104          rankRecvLeader.push_back(rankStart + i);
105
106        rankRecvNotLeader.resize(0);
107      }
108      else
109      {
110        int clientByServer = clientSize / serverSize;
111        int remain = clientSize % serverSize;
112
113        if (clientRank < (clientByServer + 1) * remain)
114        {
115          if (clientRank % (clientByServer + 1) == 0)
116            rankRecvLeader.push_back(clientRank / (clientByServer + 1));
117          else
118            rankRecvNotLeader.push_back(clientRank / (clientByServer + 1));
119        }
120        else
121        {
122          int rank = clientRank - (clientByServer + 1) * remain;
123          if (rank % clientByServer == 0)
124            rankRecvLeader.push_back(remain + rank / clientByServer);
125          else
126            rankRecvNotLeader.push_back(remain + rank / clientByServer);
127        }
128      }
129    }
130
131    /*!
132    In case of attached mode, the current context must be reset to context for client
133    \param [in] event Event sent to server
134    */
135    void CContextClient::sendEvent(CEventClient& event)
136    {
137      list<int> ranks = event.getRanks();
138      info(100)<<"Event "<<timeLine<<" of context "<<context_->getId()<<endl ;
139      if (CXios::checkEventSync)
140      {
141        int typeId, classId, typeId_in, classId_in;
142        long long timeLine_out;
143        long long timeLine_in( timeLine );
144        typeId_in=event.getTypeId() ;
145        classId_in=event.getClassId() ;
146//        MPI_Allreduce(&timeLine,&timeLine_out, 1, MPI_UINT64_T, MPI_SUM, intraComm) ; // MPI_UINT64_T standardized by MPI 3
147        MPI_Allreduce(&timeLine_in,&timeLine_out, 1, MPI_LONG_LONG_INT, MPI_SUM, intraComm) ; 
148        MPI_Allreduce(&typeId_in,&typeId, 1, MPI_INT, MPI_SUM, intraComm) ;
149        MPI_Allreduce(&classId_in,&classId, 1, MPI_INT, MPI_SUM, intraComm) ;
150        if (typeId/clientSize!=event.getTypeId() || classId/clientSize!=event.getClassId() || timeLine_out/clientSize!=timeLine)
151        {
152           ERROR("void CContextClient::sendEvent(CEventClient& event)",
153               << "Event are not coherent between client.");
154        }
155      }
156
157      if (!event.isEmpty())
158      {
159        list<int> sizes = event.getSizes();
160
161         // We force the getBuffers call to be non-blocking on classical servers
162        list<CBufferOut*> buffList;
163        getBuffers(timeLine, ranks, sizes, buffList) ;
164
165        event.send(timeLine, sizes, buffList);
166       
167        //for (auto itRank = ranks.begin(); itRank != ranks.end(); itRank++) buffers[*itRank]->infoBuffer() ;
168
169        unlockBuffers(ranks) ;
170        info(100)<<"Event "<<timeLine<<" of context "<<context_->getId()<<"  sent"<<endl ;
171         
172        checkBuffers(ranks);
173      }
174     
175      if (isAttachedModeEnabled()) // couldBuffer is always true in attached mode
176      {
177        while (checkBuffers(ranks)) context_->globalEventLoop() ;
178     
179        CXios::getDaemonsManager()->scheduleContext(hashId_) ;
180        while (CXios::getDaemonsManager()->isScheduledContext(hashId_)) context_->globalEventLoop() ;
181      }
182     
183      timeLine++;
184    }
185
186    /*!
187    If client is also server (attached mode), after sending event, it should process right away
188    the incoming event.
189    \param [in] ranks list rank of server connected this client
190    */
191    void CContextClient::waitEvent(list<int>& ranks)
192    {
193      while (checkBuffers(ranks))
194      {
195        context_->eventLoop() ;
196      }
197
198      MPI_Request req ;
199      MPI_Status status ;
200
201      MPI_Ibarrier(intraComm,&req) ;
202      int flag=false ;
203
204      do 
205      {
206        CXios::getDaemonsManager()->eventLoop() ;
207        MPI_Test(&req,&flag,&status) ;
208      } while (!flag) ;
209
210
211    }
212
213
214    void CContextClient::waitEvent_old(list<int>& ranks)
215    {
216      parentServer->server->setPendingEvent();
217      while (checkBuffers(ranks))
218      {
219        parentServer->server->listen();
220        parentServer->server->checkPendingRequest();
221      }
222
223      while (parentServer->server->hasPendingEvent())
224      {
225       parentServer->server->eventLoop();
226      }
227    }
228
229    /*!
230     * Get buffers for each connection to the servers. This function blocks until there is enough room in the buffers unless
231     * it is explicitly requested to be non-blocking.
232     *
233     *
234     * \param [in] timeLine time line of the event which will be sent to servers
235     * \param [in] serverList list of rank of connected server
236     * \param [in] sizeList size of message corresponding to each connection
237     * \param [out] retBuffers list of buffers that can be used to store an event
238     * \param [in] nonBlocking whether this function should be non-blocking
239     * \return whether the already allocated buffers could be used
240    */
241    bool CContextClient::getBuffers(const size_t timeLine, const list<int>& serverList, const list<int>& sizeList, list<CBufferOut*>& retBuffers,
242                                    bool nonBlocking /*= false*/)
243    {
244      list<int>::const_iterator itServer, itSize;
245      list<CClientBuffer*> bufferList;
246      map<int,CClientBuffer*>::const_iterator it;
247      list<CClientBuffer*>::iterator itBuffer;
248      bool areBuffersFree;
249
250      for (itServer = serverList.begin(); itServer != serverList.end(); itServer++)
251      {
252        it = buffers.find(*itServer);
253        if (it == buffers.end())
254        {
255          newBuffer(*itServer);
256          it = buffers.find(*itServer);
257        }
258        bufferList.push_back(it->second);
259      }
260
261      CTimer::get("Blocking time").resume();
262      do
263      {
264        areBuffersFree = true;
265        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
266        {
267          areBuffersFree &= (*itBuffer)->isBufferFree(*itSize);
268        }
269
270        if (!areBuffersFree)
271        {
272          for (itBuffer = bufferList.begin(); itBuffer != bufferList.end(); itBuffer++) (*itBuffer)->unlockBuffer();
273          checkBuffers();
274/*         
275          context->server->listen();
276
277          if (context->serverPrimServer.size()>0)
278          {
279            for (int i = 0; i < context->serverPrimServer.size(); ++i)  context->serverPrimServer[i]->listen();
280 //ym           CServer::contextEventLoop(false) ; // avoid dead-lock at finalize...
281            context->globalEventLoop() ;
282          }
283*/
284           context_->globalEventLoop() ;
285        }
286
287      } while (!areBuffersFree && !nonBlocking);
288      CTimer::get("Blocking time").suspend();
289
290      if (areBuffersFree)
291      {
292        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
293          retBuffers.push_back((*itBuffer)->getBuffer(timeLine, *itSize));
294      }
295      return areBuffersFree;
296   }
297
298   /*!
299   Make a new buffer for a certain connection to server with specific rank
300   \param [in] rank rank of connected server
301   */
302   void CContextClient::newBuffer(int rank)
303   {
304      if (!mapBufferSize_.count(rank))
305      {
306        error(0) << "WARNING: Unexpected request for buffer to communicate with server " << rank << std::endl;
307        mapBufferSize_[rank] = CXios::minBufferSize;
308        maxEventSizes[rank] = CXios::minBufferSize;
309      }
310     
311      vector<MPI_Win> Wins(2,MPI_WIN_NULL) ;
312      if (!isAttachedModeEnabled()) Wins=windows[rank] ;
313 
314      CClientBuffer* buffer = buffers[rank] = new CClientBuffer(interComm, Wins, clientRank, rank, mapBufferSize_[rank], maxEventSizes[rank]);
315      if (isGrowableBuffer_) buffer->setGrowableBuffer(1.2) ;
316      else buffer->fixBuffer() ;
317      // Notify the server
318      CBufferOut* bufOut = buffer->getBuffer(0, 4*sizeof(MPI_Aint));
319      MPI_Aint sendBuff[4] ;
320      sendBuff[0]=hashId_;
321      sendBuff[1]=mapBufferSize_[rank];
322      sendBuff[2]=buffers[rank]->getWinAddress(0); 
323      sendBuff[3]=buffers[rank]->getWinAddress(1); 
324      info(100)<<"CContextClient::newBuffer : rank "<<rank<<" winAdress[0] "<<buffers[rank]->getWinAddress(0)<<" winAdress[1] "<<buffers[rank]->getWinAddress(1)<<endl;
325      bufOut->put(sendBuff, 4); 
326      buffer->checkBuffer(true);
327
328   }
329
330   /*!
331   Verify state of buffers. Buffer is under pending state if there is no message on it
332   \return state of buffers, pending(true), ready(false)
333   */
334   bool CContextClient::checkBuffers(void)
335   {
336      map<int,CClientBuffer*>::iterator itBuff;
337      bool pending = false;
338      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
339        pending |= itBuff->second->checkBuffer(!pureOneSided);
340      return pending;
341   }
342
343   //! Release all buffers
344   void CContextClient::releaseBuffers()
345   {
346      map<int,CClientBuffer*>::iterator itBuff;
347      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
348      {
349         delete itBuff->second;
350      }
351      buffers.clear();
352
353/* don't know when release windows
354
355      if (!isAttachedModeEnabled())
356      { 
357        for(int rank=0; rank<serverSize; rank++)
358        {
359          MPI_Win_free(&windows[rank][0]);
360          MPI_Win_free(&windows[rank][1]);
361        }
362      }
363*/
364   }
365
366     
367  /*!
368   Lock the buffers for one sided communications
369   \param [in] ranks list rank of server to which client connects to
370   */
371   void CContextClient::lockBuffers(list<int>& ranks)
372   {
373      list<int>::iterator it;
374      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->lockBuffer();
375   }
376
377  /*!
378   Unlock the buffers for one sided communications
379   \param [in] ranks list rank of server to which client connects to
380   */
381   void CContextClient::unlockBuffers(list<int>& ranks)
382   {
383      list<int>::iterator it;
384      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->unlockBuffer();
385   }
386     
387   /*!
388   Verify state of buffers corresponding to a connection
389   \param [in] ranks list rank of server to which client connects to
390   \return state of buffers, pending(true), ready(false)
391   */
392   bool CContextClient::checkBuffers(list<int>& ranks)
393   {
394      list<int>::iterator it;
395      bool pending = false;
396      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->checkBuffer(!pureOneSided);
397      return pending;
398   }
399
400   /*!
401    * Set the buffer size for each connection. Warning: This function is collective.
402    *
403    * \param [in] mapSize maps the rank of the connected servers to the size of the correspoinding buffer
404    * \param [in] maxEventSize maps the rank of the connected servers to the size of the biggest event
405   */
406   void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize)
407   {
408     for(auto& it : mapSize) 
409      buffers[it.first]->fixBufferSize(std::max(CXios::minBufferSize*1.0,std::min(it.second*CXios::bufferSizeFactor*1.01,CXios::maxBufferSize*1.0)));
410   }
411
412  /*!
413  Get leading server in the group of connected server
414  \return ranks of leading servers
415  */
416  const std::list<int>& CContextClient::getRanksServerNotLeader(void) const
417  {
418    return ranksServerNotLeader;
419  }
420
421  /*!
422  Check if client connects to leading server
423  \return connected(true), not connected (false)
424  */
425  bool CContextClient::isServerNotLeader(void) const
426  {
427    return !ranksServerNotLeader.empty();
428  }
429
430  /*!
431  Get leading server in the group of connected server
432  \return ranks of leading servers
433  */
434  const std::list<int>& CContextClient::getRanksServerLeader(void) const
435  {
436    return ranksServerLeader;
437  }
438
439  /*!
440  Check if client connects to leading server
441  \return connected(true), not connected (false)
442  */
443  bool CContextClient::isServerLeader(void) const
444  {
445    return !ranksServerLeader.empty();
446  }
447
448   /*!
449   * Finalize context client and do some reports. Function is non-blocking.
450   */
451  void CContextClient::finalize(void)
452  {
453    map<int,CClientBuffer*>::iterator itBuff;
454    std::list<int>::iterator ItServerLeader; 
455   
456    bool stop = false;
457
458    int* nbServerConnectionLocal  = new int[serverSize] ;
459    int* nbServerConnectionGlobal  = new int[serverSize] ;
460    for(int i=0;i<serverSize;++i) nbServerConnectionLocal[i]=0 ;
461    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)  nbServerConnectionLocal[itBuff->first]=1 ;
462    for (ItServerLeader = ranksServerLeader.begin(); ItServerLeader != ranksServerLeader.end(); ItServerLeader++)  nbServerConnectionLocal[*ItServerLeader]=1 ;
463   
464    MPI_Allreduce(nbServerConnectionLocal, nbServerConnectionGlobal, serverSize, MPI_INT, MPI_SUM, intraComm);
465   
466    CEventClient event(CContext::GetType(), CContext::EVENT_ID_CONTEXT_FINALIZE);
467    CMessage msg;
468
469    for (int i=0;i<serverSize;++i) if (nbServerConnectionLocal[i]==1) event.push(i, nbServerConnectionGlobal[i], msg) ;
470    sendEvent(event);
471
472    delete[] nbServerConnectionLocal ;
473    delete[] nbServerConnectionGlobal ;
474
475
476    CTimer::get("Blocking time").resume();
477    checkBuffers();
478    CTimer::get("Blocking time").suspend();
479
480    std::map<int,StdSize>::const_iterator itbMap = mapBufferSize_.begin(),
481                                          iteMap = mapBufferSize_.end(), itMap;
482
483    StdSize totalBuf = 0;
484    for (itMap = itbMap; itMap != iteMap; ++itMap)
485    {
486      report(10) << " Memory report : Context <" << context_->getId() << "> : client side : memory used for buffer of each connection to server" << endl
487                 << "  +) To server with rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
488      totalBuf += itMap->second;
489    }
490    report(0) << " Memory report : Context <" << context_->getId() << "> : client side : total memory used for buffer " << totalBuf << " bytes" << endl;
491
492  }
493
494
495  /*!
496  */
497  bool CContextClient::havePendingRequests(void)
498  {
499    bool pending = false;
500    map<int,CClientBuffer*>::iterator itBuff;
501    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
502      pending |= itBuff->second->hasPendingRequest();
503    return pending;
504  }
505 
506  bool CContextClient::isNotifiedFinalized(void)
507  {
508    if (isAttachedModeEnabled()) return true ;
509
510    bool finalized = true;
511    map<int,CClientBuffer*>::iterator itBuff;
512    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
513      finalized &= itBuff->second->isNotifiedFinalized();
514    return finalized;
515  }
516
517}
Note: See TracBrowser for help on using the repository browser.