source: XIOS/dev/dev_ym/XIOS_COUPLING/src/context_client.cpp @ 2173

Last change on this file since 2173 was 2130, checked in by ymipsl, 3 years ago

New management of client-server buffers.

  • buffers can grow automatically in intialization phase
  • buffers is evaluated after the close context definition phase and fixed at optimal value.

YM

  • Property copyright set to
    Software name : XIOS (Xml I/O Server)
    http://forge.ipsl.jussieu.fr/ioserver
    Creation date : January 2009
    Licence : CeCCIL version2
    see license file in root directory : Licence_CeCILL_V2-en.txt
    or http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
    Holder : CEA/LSCE (Laboratoire des Sciences du CLimat et de l'Environnement)
    CNRS/IPSL (Institut Pierre Simon Laplace)
    Project Manager : Yann Meurdesoif
    yann.meurdesoif@cea.fr
  • Property svn:eol-style set to native
File size: 17.0 KB
RevLine 
[591]1#include "xios_spl.hpp"
[300]2#include "context_client.hpp"
3#include "context_server.hpp"
4#include "event_client.hpp"
5#include "buffer_out.hpp"
6#include "buffer_client.hpp"
7#include "type.hpp"
8#include "event_client.hpp"
9#include "context.hpp"
[382]10#include "mpi.hpp"
[347]11#include "timer.hpp"
[401]12#include "cxios.hpp"
[1130]13#include "server.hpp"
[2130]14#include "services.hpp"
15#include <boost/functional/hash.hpp>
16#include <random>
17#include <chrono>
[300]18
[335]19namespace xios
[300]20{
[512]21    /*!
22    \param [in] parent Pointer to context on client side
23    \param [in] intraComm_ communicator of group client
24    \param [in] interComm_ communicator of group server
[983]25    \cxtSer [in] cxtSer Pointer to context of server side. (It is only used in case of attached mode).
[512]26    */
[1639]27    CContextClient::CContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer)
[1853]28     : mapBufferSize_(), parentServer(cxtSer), maxBufferedEvents(4), associatedServer_(nullptr)
[300]29    {
[1757]30     
[2130]31      context_ = parent;
[595]32      intraComm = intraComm_;
33      interComm = interComm_;
[1639]34      MPI_Comm_rank(intraComm, &clientRank);
35      MPI_Comm_size(intraComm, &clientSize);
[509]36
[595]37      int flag;
[1639]38      MPI_Comm_test_inter(interComm, &flag);
[1761]39      if (flag) isAttached_=false ;
40      else  isAttached_=true ;
41
42      pureOneSided=CXios::getin<bool>("pure_one_sided",false); // pure one sided communication (for test)
43      if (isAttachedModeEnabled()) pureOneSided=false ; // no one sided in attach mode
44     
45
46
[1639]47      if (flag) MPI_Comm_remote_size(interComm, &serverSize);
48      else  MPI_Comm_size(interComm, &serverSize);
[509]49
[1232]50      computeLeader(clientRank, clientSize, serverSize, ranksServerLeader, ranksServerNotLeader);
51
[1757]52      if (flag) MPI_Intercomm_merge(interComm_,false,&interCommMerged) ;
53     
54      if (!isAttachedModeEnabled())
55      { 
56        windows.resize(serverSize) ;
57        MPI_Comm winComm ;
58        for(int rank=0; rank<serverSize; rank++)
59        {
60          windows[rank].resize(2) ;
61          MPI_Comm_split(interCommMerged, rank, clientRank, &winComm);
62          int myRank ;
63          MPI_Comm_rank(winComm,&myRank);
64          MPI_Win_create_dynamic(MPI_INFO_NULL, winComm, &windows[rank][0]);
65          MPI_Win_create_dynamic(MPI_INFO_NULL, winComm, &windows[rank][1]);
66          MPI_Comm_free(&winComm) ;
67        }
68      }
69
70      MPI_Comm_split(intraComm_,clientRank,clientRank, &commSelf) ;
71
[2130]72      auto time=chrono::system_clock::now().time_since_epoch().count() ;
73      std::default_random_engine rd(time); // not reproducible from a run to another
74      std::uniform_int_distribution<size_t> dist;
75      hashId_=dist(rd) ;
76      MPI_Bcast(&hashId_,1,MPI_SIZE_T,0,intraComm) ; // Bcast to all server of the context
77
[1757]78      timeLine = 1;
[1232]79    }
80
81    void CContextClient::computeLeader(int clientRank, int clientSize, int serverSize,
82                                       std::list<int>& rankRecvLeader,
83                                       std::list<int>& rankRecvNotLeader)
84    {
85      if ((0 == clientSize) || (0 == serverSize)) return;
86
[595]87      if (clientSize < serverSize)
88      {
89        int serverByClient = serverSize / clientSize;
90        int remain = serverSize % clientSize;
91        int rankStart = serverByClient * clientRank;
[300]92
[595]93        if (clientRank < remain)
94        {
95          serverByClient++;
96          rankStart += clientRank;
97        }
98        else
99          rankStart += remain;
100
101        for (int i = 0; i < serverByClient; i++)
[1232]102          rankRecvLeader.push_back(rankStart + i);
[1021]103
[1232]104        rankRecvNotLeader.resize(0);
[1158]105      }
[595]106      else
107      {
108        int clientByServer = clientSize / serverSize;
109        int remain = clientSize % serverSize;
110
111        if (clientRank < (clientByServer + 1) * remain)
112        {
113          if (clientRank % (clientByServer + 1) == 0)
[1232]114            rankRecvLeader.push_back(clientRank / (clientByServer + 1));
[1021]115          else
[1232]116            rankRecvNotLeader.push_back(clientRank / (clientByServer + 1));
[595]117        }
118        else
119        {
120          int rank = clientRank - (clientByServer + 1) * remain;
121          if (rank % clientByServer == 0)
[1232]122            rankRecvLeader.push_back(remain + rank / clientByServer);
[1021]123          else
[1232]124            rankRecvNotLeader.push_back(remain + rank / clientByServer);
[595]125        }
126      }
[300]127    }
128
[512]129    /*!
130    In case of attached mode, the current context must be reset to context for client
131    \param [in] event Event sent to server
132    */
[300]133    void CContextClient::sendEvent(CEventClient& event)
134    {
[731]135      list<int> ranks = event.getRanks();
[2130]136      info(100)<<"Event "<<timeLine<<" of context "<<context_->getId()<<endl ;
[1377]137      if (CXios::checkEventSync)
138      {
139        int typeId, classId, typeId_in, classId_in, timeLine_out;
140        typeId_in=event.getTypeId() ;
141        classId_in=event.getClassId() ;
[1475]142//        MPI_Allreduce(&timeLine,&timeLine_out, 1, MPI_UINT64_T, MPI_SUM, intraComm) ; // MPI_UINT64_T standardized by MPI 3
[1639]143        MPI_Allreduce(&timeLine,&timeLine_out, 1, MPI_LONG_LONG_INT, MPI_SUM, intraComm) ; 
144        MPI_Allreduce(&typeId_in,&typeId, 1, MPI_INT, MPI_SUM, intraComm) ;
145        MPI_Allreduce(&classId_in,&classId, 1, MPI_INT, MPI_SUM, intraComm) ;
[1377]146        if (typeId/clientSize!=event.getTypeId() || classId/clientSize!=event.getClassId() || timeLine_out/clientSize!=timeLine)
147        {
148           ERROR("void CContextClient::sendEvent(CEventClient& event)",
149               << "Event are not coherent between client.");
150        }
151      }
152
[595]153      if (!event.isEmpty())
[300]154      {
[731]155        list<int> sizes = event.getSizes();
[300]156
[1757]157         // We force the getBuffers call to be non-blocking on classical servers
[1054]158        list<CBufferOut*> buffList;
[1757]159        getBuffers(timeLine, ranks, sizes, buffList) ;
[509]160
[1757]161        event.send(timeLine, sizes, buffList);
162       
163        //for (auto itRank = ranks.begin(); itRank != ranks.end(); itRank++) buffers[*itRank]->infoBuffer() ;
[731]164
[1757]165        unlockBuffers(ranks) ;
[2130]166        info(100)<<"Event "<<timeLine<<" of context "<<context_->getId()<<"  sent"<<endl ;
[1757]167         
168        checkBuffers(ranks);
[300]169      }
[1761]170     
171      if (isAttachedModeEnabled()) // couldBuffer is always true in attached mode
172      {
[2130]173        while (checkBuffers(ranks)) context_->globalEventLoop() ;
174     
175        CXios::getDaemonsManager()->scheduleContext(hashId_) ;
176        while (CXios::getDaemonsManager()->isScheduledContext(hashId_)) context_->globalEventLoop() ;
[1761]177      }
178     
[1054]179      timeLine++;
180    }
181
182    /*!
[512]183    If client is also server (attached mode), after sending event, it should process right away
184    the incoming event.
185    \param [in] ranks list rank of server connected this client
186    */
[300]187    void CContextClient::waitEvent(list<int>& ranks)
188    {
[1761]189      while (checkBuffers(ranks))
190      {
[2130]191        context_->eventLoop() ;
[1761]192      }
193
194      MPI_Request req ;
195      MPI_Status status ;
196
197      MPI_Ibarrier(intraComm,&req) ;
198      int flag=false ;
199
200      do 
201      {
202        CXios::getDaemonsManager()->eventLoop() ;
203        MPI_Test(&req,&flag,&status) ;
204      } while (!flag) ;
205
206
207    }
208
209
210    void CContextClient::waitEvent_old(list<int>& ranks)
211    {
[595]212      parentServer->server->setPendingEvent();
213      while (checkBuffers(ranks))
[300]214      {
[595]215        parentServer->server->listen();
216        parentServer->server->checkPendingRequest();
[300]217      }
[386]218
[595]219      while (parentServer->server->hasPendingEvent())
[386]220      {
[595]221       parentServer->server->eventLoop();
[386]222      }
[300]223    }
224
[512]225    /*!
[1054]226     * Get buffers for each connection to the servers. This function blocks until there is enough room in the buffers unless
227     * it is explicitly requested to be non-blocking.
228     *
[1757]229     *
230     * \param [in] timeLine time line of the event which will be sent to servers
[1054]231     * \param [in] serverList list of rank of connected server
232     * \param [in] sizeList size of message corresponding to each connection
233     * \param [out] retBuffers list of buffers that can be used to store an event
234     * \param [in] nonBlocking whether this function should be non-blocking
235     * \return whether the already allocated buffers could be used
[512]236    */
[1757]237    bool CContextClient::getBuffers(const size_t timeLine, const list<int>& serverList, const list<int>& sizeList, list<CBufferOut*>& retBuffers,
[1071]238                                    bool nonBlocking /*= false*/)
[300]239    {
[1054]240      list<int>::const_iterator itServer, itSize;
[595]241      list<CClientBuffer*> bufferList;
[1054]242      map<int,CClientBuffer*>::const_iterator it;
[595]243      list<CClientBuffer*>::iterator itBuffer;
[884]244      bool areBuffersFree;
[300]245
[595]246      for (itServer = serverList.begin(); itServer != serverList.end(); itServer++)
[300]247      {
[595]248        it = buffers.find(*itServer);
249        if (it == buffers.end())
[300]250        {
[595]251          newBuffer(*itServer);
252          it = buffers.find(*itServer);
[509]253        }
[595]254        bufferList.push_back(it->second);
[300]255      }
[347]256
257      CTimer::get("Blocking time").resume();
[884]258      do
[300]259      {
[884]260        areBuffersFree = true;
[595]261        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
[1757]262        {
[884]263          areBuffersFree &= (*itBuffer)->isBufferFree(*itSize);
[1757]264        }
[884]265
266        if (!areBuffersFree)
[300]267        {
[1757]268          for (itBuffer = bufferList.begin(); itBuffer != bufferList.end(); itBuffer++) (*itBuffer)->unlockBuffer();
[884]269          checkBuffers();
[2130]270/*         
[1761]271          context->server->listen();
272
273          if (context->serverPrimServer.size()>0)
[1130]274          {
[1757]275            for (int i = 0; i < context->serverPrimServer.size(); ++i)  context->serverPrimServer[i]->listen();
[1764]276 //ym           CServer::contextEventLoop(false) ; // avoid dead-lock at finalize...
277            context->globalEventLoop() ;
[1130]278          }
[2130]279*/
280           context_->globalEventLoop() ;
281        }
[1130]282
[1054]283      } while (!areBuffersFree && !nonBlocking);
[347]284      CTimer::get("Blocking time").suspend();
285
[1054]286      if (areBuffersFree)
[300]287      {
[1054]288        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
[1757]289          retBuffers.push_back((*itBuffer)->getBuffer(timeLine, *itSize));
[300]290      }
[1054]291      return areBuffersFree;
[300]292   }
[509]293
[512]294   /*!
295   Make a new buffer for a certain connection to server with specific rank
296   \param [in] rank rank of connected server
297   */
[300]298   void CContextClient::newBuffer(int rank)
299   {
[1201]300      if (!mapBufferSize_.count(rank))
301      {
302        error(0) << "WARNING: Unexpected request for buffer to communicate with server " << rank << std::endl;
303        mapBufferSize_[rank] = CXios::minBufferSize;
304        maxEventSizes[rank] = CXios::minBufferSize;
305      }
[1757]306     
307      vector<MPI_Win> Wins(2,MPI_WIN_NULL) ;
308      if (!isAttachedModeEnabled()) Wins=windows[rank] ;
309 
310      CClientBuffer* buffer = buffers[rank] = new CClientBuffer(interComm, Wins, clientRank, rank, mapBufferSize_[rank], maxEventSizes[rank]);
[2130]311      if (isGrowableBuffer_) buffer->setGrowableBuffer(1.2) ;
312      else buffer->fixBuffer() ;
[1201]313      // Notify the server
[2130]314      CBufferOut* bufOut = buffer->getBuffer(0, 4*sizeof(MPI_Aint));
315      MPI_Aint sendBuff[4] ;
316      sendBuff[0]=hashId_;
317      sendBuff[1]=mapBufferSize_[rank];
318      sendBuff[2]=buffers[rank]->getWinAddress(0); 
319      sendBuff[3]=buffers[rank]->getWinAddress(1); 
[1757]320      info(100)<<"CContextClient::newBuffer : rank "<<rank<<" winAdress[0] "<<buffers[rank]->getWinAddress(0)<<" winAdress[1] "<<buffers[rank]->getWinAddress(1)<<endl;
[2130]321      bufOut->put(sendBuff, 4); 
[1757]322      buffer->checkBuffer(true);
323
[509]324   }
[300]325
[512]326   /*!
327   Verify state of buffers. Buffer is under pending state if there is no message on it
328   \return state of buffers, pending(true), ready(false)
329   */
[300]330   bool CContextClient::checkBuffers(void)
331   {
[595]332      map<int,CClientBuffer*>::iterator itBuff;
333      bool pending = false;
[1130]334      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
[1757]335        pending |= itBuff->second->checkBuffer(!pureOneSided);
[595]336      return pending;
[509]337   }
[300]338
[512]339   //! Release all buffers
[1071]340   void CContextClient::releaseBuffers()
[300]341   {
[595]342      map<int,CClientBuffer*>::iterator itBuff;
[1077]343      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
[1139]344      {
[1757]345         delete itBuff->second;
[1139]346      }
[1077]347      buffers.clear();
[1757]348
349/* don't know when release windows
350
351      if (!isAttachedModeEnabled())
352      { 
353        for(int rank=0; rank<serverSize; rank++)
354        {
355          MPI_Win_free(&windows[rank][0]);
356          MPI_Win_free(&windows[rank][1]);
357        }
358      }
[1761]359*/
[509]360   }
[1761]361
[1757]362     
363  /*!
364   Lock the buffers for one sided communications
365   \param [in] ranks list rank of server to which client connects to
366   */
367   void CContextClient::lockBuffers(list<int>& ranks)
368   {
369      list<int>::iterator it;
370      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->lockBuffer();
371   }
[300]372
[1757]373  /*!
374   Unlock the buffers for one sided communications
375   \param [in] ranks list rank of server to which client connects to
376   */
377   void CContextClient::unlockBuffers(list<int>& ranks)
378   {
379      list<int>::iterator it;
380      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->unlockBuffer();
381   }
382     
[512]383   /*!
384   Verify state of buffers corresponding to a connection
385   \param [in] ranks list rank of server to which client connects to
386   \return state of buffers, pending(true), ready(false)
387   */
[300]388   bool CContextClient::checkBuffers(list<int>& ranks)
389   {
[595]390      list<int>::iterator it;
391      bool pending = false;
[1757]392      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->checkBuffer(!pureOneSided);
[595]393      return pending;
[509]394   }
[300]395
[512]396   /*!
[917]397    * Set the buffer size for each connection. Warning: This function is collective.
398    *
399    * \param [in] mapSize maps the rank of the connected servers to the size of the correspoinding buffer
400    * \param [in] maxEventSize maps the rank of the connected servers to the size of the biggest event
[512]401   */
[2130]402   void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize)
[509]403   {
[2130]404     for(auto& it : mapSize) {buffers[it.first]->fixBufferSize(std::min(it.second*CXios::bufferSizeFactor*1.01,CXios::maxBufferSize*1.0));}
[509]405   }
406
[1158]407  /*!
408  Get leading server in the group of connected server
409  \return ranks of leading servers
410  */
411  const std::list<int>& CContextClient::getRanksServerNotLeader(void) const
412  {
413    return ranksServerNotLeader;
414  }
[1021]415
[1158]416  /*!
417  Check if client connects to leading server
418  \return connected(true), not connected (false)
419  */
420  bool CContextClient::isServerNotLeader(void) const
421  {
422    return !ranksServerNotLeader.empty();
423  }
[1021]424
[595]425  /*!
426  Get leading server in the group of connected server
427  \return ranks of leading servers
428  */
429  const std::list<int>& CContextClient::getRanksServerLeader(void) const
430  {
431    return ranksServerLeader;
432  }
[509]433
[595]434  /*!
435  Check if client connects to leading server
436  \return connected(true), not connected (false)
437  */
438  bool CContextClient::isServerLeader(void) const
439  {
440    return !ranksServerLeader.empty();
441  }
[300]442
[512]443   /*!
[1130]444   * Finalize context client and do some reports. Function is non-blocking.
[512]445   */
[1130]446  void CContextClient::finalize(void)
[1054]447  {
448    map<int,CClientBuffer*>::iterator itBuff;
[1757]449    std::list<int>::iterator ItServerLeader; 
450   
[1054]451    bool stop = false;
[731]452
[1757]453    int* nbServerConnectionLocal  = new int[serverSize] ;
454    int* nbServerConnectionGlobal  = new int[serverSize] ;
455    for(int i=0;i<serverSize;++i) nbServerConnectionLocal[i]=0 ;
456    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)  nbServerConnectionLocal[itBuff->first]=1 ;
457    for (ItServerLeader = ranksServerLeader.begin(); ItServerLeader != ranksServerLeader.end(); ItServerLeader++)  nbServerConnectionLocal[*ItServerLeader]=1 ;
458   
459    MPI_Allreduce(nbServerConnectionLocal, nbServerConnectionGlobal, serverSize, MPI_INT, MPI_SUM, intraComm);
460   
461    CEventClient event(CContext::GetType(), CContext::EVENT_ID_CONTEXT_FINALIZE);
462    CMessage msg;
[509]463
[1757]464    for (int i=0;i<serverSize;++i) if (nbServerConnectionLocal[i]==1) event.push(i, nbServerConnectionGlobal[i], msg) ;
465    sendEvent(event);
466
467    delete[] nbServerConnectionLocal ;
468    delete[] nbServerConnectionGlobal ;
[509]469
[1765]470
[1054]471    CTimer::get("Blocking time").resume();
[1757]472    checkBuffers();
[1054]473    CTimer::get("Blocking time").suspend();
474
475    std::map<int,StdSize>::const_iterator itbMap = mapBufferSize_.begin(),
476                                          iteMap = mapBufferSize_.end(), itMap;
[1071]477
[1054]478    StdSize totalBuf = 0;
479    for (itMap = itbMap; itMap != iteMap; ++itMap)
480    {
[2130]481      report(10) << " Memory report : Context <" << context_->getId() << "> : client side : memory used for buffer of each connection to server" << endl
[1054]482                 << "  +) To server with rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
483      totalBuf += itMap->second;
484    }
[2130]485    report(0) << " Memory report : Context <" << context_->getId() << "> : client side : total memory used for buffer " << totalBuf << " bytes" << endl;
[1054]486
487  }
[1130]488
[1139]489
490  /*!
491  */
[1130]492  bool CContextClient::havePendingRequests(void)
493  {
494    bool pending = false;
495    map<int,CClientBuffer*>::iterator itBuff;
496    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
497      pending |= itBuff->second->hasPendingRequest();
498    return pending;
499  }
[1757]500 
501  bool CContextClient::isNotifiedFinalized(void)
502  {
503    if (isAttachedModeEnabled()) return true ;
[1130]504
[1757]505    bool finalized = true;
506    map<int,CClientBuffer*>::iterator itBuff;
507    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
508      finalized &= itBuff->second->isNotifiedFinalized();
509    return finalized;
510  }
[1130]511
[509]512}
Note: See TracBrowser for help on using the repository browser.