source: XIOS/trunk/src/context_client.cpp @ 2195

Last change on this file since 2195 was 2078, checked in by ymipsl, 3 years ago

bug fix when checking event synchronisation
YM

  • Property copyright set to
    Software name : XIOS (Xml I/O Server)
    http://forge.ipsl.jussieu.fr/ioserver
    Creation date : January 2009
    Licence : CeCCIL version2
    see license file in root directory : Licence_CeCILL_V2-en.txt
    or http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
    Holder : CEA/LSCE (Laboratoire des Sciences du CLimat et de l'Environnement)
    CNRS/IPSL (Institut Pierre Simon Laplace)
    Project Manager : Yann Meurdesoif
    yann.meurdesoif@cea.fr
  • Property svn:eol-style set to native
File size: 16.2 KB
RevLine 
[591]1#include "xios_spl.hpp"
[300]2#include "context_client.hpp"
3#include "context_server.hpp"
4#include "event_client.hpp"
5#include "buffer_out.hpp"
6#include "buffer_client.hpp"
7#include "type.hpp"
8#include "event_client.hpp"
9#include "context.hpp"
[382]10#include "mpi.hpp"
[347]11#include "timer.hpp"
[401]12#include "cxios.hpp"
[1130]13#include "server.hpp"
[300]14
[335]15namespace xios
[300]16{
[512]17    /*!
18    \param [in] parent Pointer to context on client side
19    \param [in] intraComm_ communicator of group client
20    \param [in] interComm_ communicator of group server
[983]21    \cxtSer [in] cxtSer Pointer to context of server side. (It is only used in case of attached mode).
[512]22    */
[1639]23    CContextClient::CContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer)
[917]24     : mapBufferSize_(), parentServer(cxtSer), maxBufferedEvents(4)
[300]25    {
[595]26      context = parent;
27      intraComm = intraComm_;
28      interComm = interComm_;
[1639]29      MPI_Comm_rank(intraComm, &clientRank);
30      MPI_Comm_size(intraComm, &clientSize);
[509]31
[595]32      int flag;
[1639]33      MPI_Comm_test_inter(interComm, &flag);
34      if (flag) MPI_Comm_remote_size(interComm, &serverSize);
35      else  MPI_Comm_size(interComm, &serverSize);
[509]36
[1232]37      computeLeader(clientRank, clientSize, serverSize, ranksServerLeader, ranksServerNotLeader);
38
39      timeLine = 0;
40    }
41
42    void CContextClient::computeLeader(int clientRank, int clientSize, int serverSize,
43                                       std::list<int>& rankRecvLeader,
44                                       std::list<int>& rankRecvNotLeader)
45    {
46      if ((0 == clientSize) || (0 == serverSize)) return;
47
[595]48      if (clientSize < serverSize)
49      {
50        int serverByClient = serverSize / clientSize;
51        int remain = serverSize % clientSize;
52        int rankStart = serverByClient * clientRank;
[300]53
[595]54        if (clientRank < remain)
55        {
56          serverByClient++;
57          rankStart += clientRank;
58        }
59        else
60          rankStart += remain;
61
62        for (int i = 0; i < serverByClient; i++)
[1232]63          rankRecvLeader.push_back(rankStart + i);
[1021]64
[1232]65        rankRecvNotLeader.resize(0);
[1158]66      }
[595]67      else
68      {
69        int clientByServer = clientSize / serverSize;
70        int remain = clientSize % serverSize;
71
72        if (clientRank < (clientByServer + 1) * remain)
73        {
74          if (clientRank % (clientByServer + 1) == 0)
[1232]75            rankRecvLeader.push_back(clientRank / (clientByServer + 1));
[1021]76          else
[1232]77            rankRecvNotLeader.push_back(clientRank / (clientByServer + 1));
[595]78        }
79        else
80        {
81          int rank = clientRank - (clientByServer + 1) * remain;
82          if (rank % clientByServer == 0)
[1232]83            rankRecvLeader.push_back(remain + rank / clientByServer);
[1021]84          else
[1232]85            rankRecvNotLeader.push_back(remain + rank / clientByServer);
[595]86        }
87      }
[300]88    }
89
[512]90    /*!
91    In case of attached mode, the current context must be reset to context for client
92    \param [in] event Event sent to server
93    */
[300]94    void CContextClient::sendEvent(CEventClient& event)
95    {
[731]96      list<int> ranks = event.getRanks();
[1615]97      info(100)<<"Event "<<timeLine<<" of context "<<context->getId()<<endl ;
[1377]98      if (CXios::checkEventSync)
99      {
[2078]100        int typeId, classId, typeId_in, classId_in ;
101        size_t timeLine_out;
[1377]102        typeId_in=event.getTypeId() ;
103        classId_in=event.getClassId() ;
[1475]104//        MPI_Allreduce(&timeLine,&timeLine_out, 1, MPI_UINT64_T, MPI_SUM, intraComm) ; // MPI_UINT64_T standardized by MPI 3
[2078]105        MPI_Allreduce(&timeLine,&timeLine_out, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, intraComm) ; 
[1639]106        MPI_Allreduce(&typeId_in,&typeId, 1, MPI_INT, MPI_SUM, intraComm) ;
107        MPI_Allreduce(&classId_in,&classId, 1, MPI_INT, MPI_SUM, intraComm) ;
[1377]108        if (typeId/clientSize!=event.getTypeId() || classId/clientSize!=event.getClassId() || timeLine_out/clientSize!=timeLine)
109        {
110           ERROR("void CContextClient::sendEvent(CEventClient& event)",
111               << "Event are not coherent between client.");
112        }
113      }
114
[595]115      if (!event.isEmpty())
[300]116      {
[731]117        list<int> sizes = event.getSizes();
[300]118
[1130]119        // We force the getBuffers call to be non-blocking on classical servers
[1054]120        list<CBufferOut*> buffList;
[1130]121        bool couldBuffer = getBuffers(ranks, sizes, buffList, (!CXios::isClient && (CServer::serverLevel == 0) ));
[1184]122//        bool couldBuffer = getBuffers(ranks, sizes, buffList, CXios::isServer );
[509]123
[1054]124        if (couldBuffer)
125        {
126          event.send(timeLine, sizes, buffList);
[1615]127          info(100)<<"Event "<<timeLine<<" of context "<<context->getId()<<"  sent"<<endl ;
[731]128
[1054]129          checkBuffers(ranks);
130
131          if (isAttachedModeEnabled()) // couldBuffer is always true in attached mode
132          {
133            waitEvent(ranks);
134            CContext::setCurrent(context->getId());
135          }
136        }
137        else
138        {
139          tmpBufferedEvent.ranks = ranks;
140          tmpBufferedEvent.sizes = sizes;
141
142          for (list<int>::const_iterator it = sizes.begin(); it != sizes.end(); it++)
143            tmpBufferedEvent.buffers.push_back(new CBufferOut(*it));
[1377]144          info(100)<<"DEBUG : temporaly event created : timeline "<<timeLine<<endl ;
[1054]145          event.send(timeLine, tmpBufferedEvent.sizes, tmpBufferedEvent.buffers);
[1615]146          info(100)<<"Event "<<timeLine<<" of context "<<context->getId()<<"  sent"<<endl ;
[1054]147        }
[300]148      }
149
[1054]150      timeLine++;
151    }
152
153    /*!
154     * Send the temporarily buffered event (if any).
155     *
[1158]156     * \return true if a temporarily buffered event could be sent, false otherwise
[1054]157     */
158    bool CContextClient::sendTemporarilyBufferedEvent()
159    {
160      bool couldSendTmpBufferedEvent = false;
161
162      if (hasTemporarilyBufferedEvent())
[511]163      {
[1054]164        list<CBufferOut*> buffList;
165        if (getBuffers(tmpBufferedEvent.ranks, tmpBufferedEvent.sizes, buffList, true)) // Non-blocking call
166        {
167          list<CBufferOut*>::iterator it, itBuffer;
168
169          for (it = tmpBufferedEvent.buffers.begin(), itBuffer = buffList.begin(); it != tmpBufferedEvent.buffers.end(); it++, itBuffer++)
170            (*itBuffer)->put((char*)(*it)->start(), (*it)->count());
171
[1377]172          info(100)<<"DEBUG : temporaly event sent "<<endl ;
[1054]173          checkBuffers(tmpBufferedEvent.ranks);
174
175          tmpBufferedEvent.clear();
176
177          couldSendTmpBufferedEvent = true;
178        }
[511]179      }
180
[1054]181      return couldSendTmpBufferedEvent;
[300]182    }
[509]183
[512]184    /*!
185    If client is also server (attached mode), after sending event, it should process right away
186    the incoming event.
187    \param [in] ranks list rank of server connected this client
188    */
[300]189    void CContextClient::waitEvent(list<int>& ranks)
190    {
[595]191      parentServer->server->setPendingEvent();
192      while (checkBuffers(ranks))
[300]193      {
[595]194        parentServer->server->listen();
195        parentServer->server->checkPendingRequest();
[300]196      }
[386]197
[595]198      while (parentServer->server->hasPendingEvent())
[386]199      {
[595]200       parentServer->server->eventLoop();
[386]201      }
[300]202    }
203
[512]204    /*!
[1054]205     * Get buffers for each connection to the servers. This function blocks until there is enough room in the buffers unless
206     * it is explicitly requested to be non-blocking.
207     *
208     * \param [in] serverList list of rank of connected server
209     * \param [in] sizeList size of message corresponding to each connection
210     * \param [out] retBuffers list of buffers that can be used to store an event
211     * \param [in] nonBlocking whether this function should be non-blocking
212     * \return whether the already allocated buffers could be used
[512]213    */
[1071]214    bool CContextClient::getBuffers(const list<int>& serverList, const list<int>& sizeList, list<CBufferOut*>& retBuffers,
215                                    bool nonBlocking /*= false*/)
[300]216    {
[1054]217      list<int>::const_iterator itServer, itSize;
[595]218      list<CClientBuffer*> bufferList;
[1054]219      map<int,CClientBuffer*>::const_iterator it;
[595]220      list<CClientBuffer*>::iterator itBuffer;
[884]221      bool areBuffersFree;
[300]222
[595]223      for (itServer = serverList.begin(); itServer != serverList.end(); itServer++)
[300]224      {
[595]225        it = buffers.find(*itServer);
226        if (it == buffers.end())
[300]227        {
[595]228          newBuffer(*itServer);
229          it = buffers.find(*itServer);
[509]230        }
[595]231        bufferList.push_back(it->second);
[300]232      }
[347]233
234      CTimer::get("Blocking time").resume();
[884]235      do
[300]236      {
[884]237        areBuffersFree = true;
[595]238        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
[884]239          areBuffersFree &= (*itBuffer)->isBufferFree(*itSize);
240
241        if (!areBuffersFree)
[300]242        {
[884]243          checkBuffers();
[1130]244          if (CServer::serverLevel == 0)
245            context->server->listen();
[1071]246
[1130]247          else if (CServer::serverLevel == 1)
248          {
249            context->server->listen();
250            for (int i = 0; i < context->serverPrimServer.size(); ++i)
251              context->serverPrimServer[i]->listen();
[1378]252            CServer::contextEventLoop(false) ; // avoid dead-lock at finalize...
[1130]253          }
254
255          else if (CServer::serverLevel == 2)
256            context->server->listen();
257
[300]258        }
[1054]259      } while (!areBuffersFree && !nonBlocking);
[1130]260
[347]261      CTimer::get("Blocking time").suspend();
262
[1054]263      if (areBuffersFree)
[300]264      {
[1054]265        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
266          retBuffers.push_back((*itBuffer)->getBuffer(*itSize));
[300]267      }
[1054]268
269      return areBuffersFree;
[300]270   }
[509]271
[512]272   /*!
273   Make a new buffer for a certain connection to server with specific rank
274   \param [in] rank rank of connected server
275   */
[300]276   void CContextClient::newBuffer(int rank)
277   {
[1201]278      if (!mapBufferSize_.count(rank))
279      {
280        error(0) << "WARNING: Unexpected request for buffer to communicate with server " << rank << std::endl;
281        mapBufferSize_[rank] = CXios::minBufferSize;
282        maxEventSizes[rank] = CXios::minBufferSize;
283      }
284      CClientBuffer* buffer = buffers[rank] = new CClientBuffer(interComm, rank, mapBufferSize_[rank], maxEventSizes[rank], maxBufferedEvents);
285      // Notify the server
286      CBufferOut* bufOut = buffer->getBuffer(sizeof(StdSize));
287      bufOut->put(mapBufferSize_[rank]); // Stupid C++
288      buffer->checkBuffer();
[509]289   }
[300]290
[512]291   /*!
292   Verify state of buffers. Buffer is under pending state if there is no message on it
293   \return state of buffers, pending(true), ready(false)
294   */
[300]295   bool CContextClient::checkBuffers(void)
296   {
[595]297      map<int,CClientBuffer*>::iterator itBuff;
298      bool pending = false;
[1130]299      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
300        pending |= itBuff->second->checkBuffer();
[595]301      return pending;
[509]302   }
[300]303
[512]304   //! Release all buffers
[1071]305   void CContextClient::releaseBuffers()
[300]306   {
[595]307      map<int,CClientBuffer*>::iterator itBuff;
[1077]308      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
[1139]309      {
[1077]310          delete itBuff->second;
[1139]311      }
[1077]312      buffers.clear();
[509]313   }
[300]314
[512]315   /*!
316   Verify state of buffers corresponding to a connection
317   \param [in] ranks list rank of server to which client connects to
318   \return state of buffers, pending(true), ready(false)
319   */
[300]320   bool CContextClient::checkBuffers(list<int>& ranks)
321   {
[595]322      list<int>::iterator it;
323      bool pending = false;
324      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->checkBuffer();
325      return pending;
[509]326   }
[300]327
[512]328   /*!
[917]329    * Set the buffer size for each connection. Warning: This function is collective.
330    *
331    * \param [in] mapSize maps the rank of the connected servers to the size of the correspoinding buffer
332    * \param [in] maxEventSize maps the rank of the connected servers to the size of the biggest event
[512]333   */
[917]334   void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize, const std::map<int,StdSize>& maxEventSize)
[509]335   {
336     mapBufferSize_ = mapSize;
[1201]337     maxEventSizes = maxEventSize;
[917]338
339     // Compute the maximum number of events that can be safely buffered.
340     double minBufferSizeEventSizeRatio = std::numeric_limits<double>::max();
341     for (std::map<int,StdSize>::const_iterator it = mapSize.begin(), ite = mapSize.end(); it != ite; ++it)
342     {
[1474]343       double ratio = double(it->second) / maxEventSizes[it->first];
[917]344       if (ratio < minBufferSizeEventSizeRatio) minBufferSizeEventSizeRatio = ratio;
345     }
[1639]346     MPI_Allreduce(MPI_IN_PLACE, &minBufferSizeEventSizeRatio, 1, MPI_DOUBLE, MPI_MIN, intraComm);
[917]347
348     if (minBufferSizeEventSizeRatio < 1.0)
[1201]349     {
[917]350       ERROR("void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize, const std::map<int,StdSize>& maxEventSize)",
351             << "The buffer sizes and the maximum events sizes are incoherent.");
[1201]352     }
353     else if (minBufferSizeEventSizeRatio == std::numeric_limits<double>::max())
354       minBufferSizeEventSizeRatio = 1.0; // In this case, maxBufferedEvents will never be used but we want to avoid any floating point exception
[917]355
356     maxBufferedEvents = size_t(2 * minBufferSizeEventSizeRatio) // there is room for two local buffers on the server
357                          + size_t(minBufferSizeEventSizeRatio)  // one local buffer can always be fully used
358                          + 1;                                   // the other local buffer might contain only one event
[509]359   }
360
[1158]361  /*!
362  Get leading server in the group of connected server
363  \return ranks of leading servers
364  */
365  const std::list<int>& CContextClient::getRanksServerNotLeader(void) const
366  {
367    return ranksServerNotLeader;
368  }
[1021]369
[1158]370  /*!
371  Check if client connects to leading server
372  \return connected(true), not connected (false)
373  */
374  bool CContextClient::isServerNotLeader(void) const
375  {
376    return !ranksServerNotLeader.empty();
377  }
[1021]378
[595]379  /*!
380  Get leading server in the group of connected server
381  \return ranks of leading servers
382  */
383  const std::list<int>& CContextClient::getRanksServerLeader(void) const
384  {
385    return ranksServerLeader;
386  }
[509]387
[595]388  /*!
389  Check if client connects to leading server
390  \return connected(true), not connected (false)
391  */
392  bool CContextClient::isServerLeader(void) const
393  {
394    return !ranksServerLeader.empty();
395  }
[300]396
[704]397  /*!
398   * Check if the attached mode is used.
399   *
400   * \return true if and only if attached mode is used
401   */
402  bool CContextClient::isAttachedModeEnabled() const
403  {
404    return (parentServer != 0);
405  }
[697]406
[512]407   /*!
[1130]408   * Finalize context client and do some reports. Function is non-blocking.
[512]409   */
[1130]410  void CContextClient::finalize(void)
[1054]411  {
412    map<int,CClientBuffer*>::iterator itBuff;
413    bool stop = false;
[731]414
[1054]415    CTimer::get("Blocking time").resume();
416    while (hasTemporarilyBufferedEvent())
417    {
418      checkBuffers();
419      sendTemporarilyBufferedEvent();
420    }
421    CTimer::get("Blocking time").suspend();
[509]422
[1054]423    CEventClient event(CContext::GetType(), CContext::EVENT_ID_CONTEXT_FINALIZE);
424    if (isServerLeader())
425    {
426      CMessage msg;
427      const std::list<int>& ranks = getRanksServerLeader();
428      for (std::list<int>::const_iterator itRank = ranks.begin(), itRankEnd = ranks.end(); itRank != itRankEnd; ++itRank)
[1377]429      {
430        info(100)<<"DEBUG : Sent context Finalize event to rank "<<*itRank<<endl ;
[1054]431        event.push(*itRank, 1, msg);
[1377]432      }
[1054]433      sendEvent(event);
434    }
435    else sendEvent(event);
[509]436
[1054]437    CTimer::get("Blocking time").resume();
[1130]438//    while (!stop)
[1054]439    {
440      checkBuffers();
441      if (hasTemporarilyBufferedEvent())
442        sendTemporarilyBufferedEvent();
[511]443
[1054]444      stop = true;
[1130]445//      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++) stop &= !itBuff->second->hasPendingRequest();
[1054]446    }
447    CTimer::get("Blocking time").suspend();
448
449    std::map<int,StdSize>::const_iterator itbMap = mapBufferSize_.begin(),
450                                          iteMap = mapBufferSize_.end(), itMap;
[1071]451
[1054]452    StdSize totalBuf = 0;
453    for (itMap = itbMap; itMap != iteMap; ++itMap)
454    {
455      report(10) << " Memory report : Context <" << context->getId() << "> : client side : memory used for buffer of each connection to server" << endl
456                 << "  +) To server with rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
457      totalBuf += itMap->second;
458    }
459    report(0) << " Memory report : Context <" << context->getId() << "> : client side : total memory used for buffer " << totalBuf << " bytes" << endl;
460
[1130]461    //releaseBuffers(); // moved to CContext::finalize()
[1054]462  }
[1130]463
[1139]464
465  /*!
466  */
[1130]467  bool CContextClient::havePendingRequests(void)
468  {
469    bool pending = false;
470    map<int,CClientBuffer*>::iterator itBuff;
471    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
472      pending |= itBuff->second->hasPendingRequest();
473    return pending;
474  }
475
476
[509]477}
Note: See TracBrowser for help on using the repository browser.