source: XIOS/trunk/src/context_client.cpp @ 1059

Last change on this file since 1059 was 1033, checked in by rlacroix, 7 years ago

Make the XIOS server(s) completely non-blocking.

This fixes some deadlocks caused by bugs in the communication protocol when using inputs and multiple contexts.

  • Property copyright set to
    Software name : XIOS (Xml I/O Server)
    http://forge.ipsl.jussieu.fr/ioserver
    Creation date : January 2009
    Licence : CeCCIL version2
    see license file in root directory : Licence_CeCILL_V2-en.txt
    or http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
    Holder : CEA/LSCE (Laboratoire des Sciences du CLimat et de l'Environnement)
    CNRS/IPSL (Institut Pierre Simon Laplace)
    Project Manager : Yann Meurdesoif
    yann.meurdesoif@cea.fr
  • Property svn:eol-style set to native
File size: 13.2 KB
RevLine 
[591]1#include "xios_spl.hpp"
[300]2#include "context_client.hpp"
3#include "context_server.hpp"
4#include "event_client.hpp"
5#include "buffer_out.hpp"
6#include "buffer_client.hpp"
7#include "type.hpp"
8#include "event_client.hpp"
9#include "context.hpp"
[382]10#include "mpi.hpp"
[347]11#include "timer.hpp"
[401]12#include "cxios.hpp"
[300]13
[335]14namespace xios
[300]15{
[512]16    /*!
17    \param [in] parent Pointer to context on client side
18    \param [in] intraComm_ communicator of group client
19    \param [in] interComm_ communicator of group server
20    \cxtSer [in] cxtSer Pointer to context of server side. (It is only used on case of attached mode)
21    */
[595]22    CContextClient::CContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer)
[917]23     : mapBufferSize_(), parentServer(cxtSer), maxBufferedEvents(4)
[300]24    {
[595]25      context = parent;
26      intraComm = intraComm_;
27      interComm = interComm_;
28      MPI_Comm_rank(intraComm, &clientRank);
29      MPI_Comm_size(intraComm, &clientSize);
[509]30
[595]31      int flag;
32      MPI_Comm_test_inter(interComm, &flag);
33      if (flag) MPI_Comm_remote_size(interComm, &serverSize);
34      else  MPI_Comm_size(interComm, &serverSize);
[509]35
[595]36      if (clientSize < serverSize)
37      {
38        int serverByClient = serverSize / clientSize;
39        int remain = serverSize % clientSize;
40        int rankStart = serverByClient * clientRank;
[300]41
[595]42        if (clientRank < remain)
43        {
44          serverByClient++;
45          rankStart += clientRank;
46        }
47        else
48          rankStart += remain;
49
50        for (int i = 0; i < serverByClient; i++)
51          ranksServerLeader.push_back(rankStart + i);
[988]52
53        ranksServerNotLeader.resize(0);
[595]54      }
55      else
56      {
57        int clientByServer = clientSize / serverSize;
58        int remain = clientSize % serverSize;
59
60        if (clientRank < (clientByServer + 1) * remain)
61        {
62          if (clientRank % (clientByServer + 1) == 0)
63            ranksServerLeader.push_back(clientRank / (clientByServer + 1));
[988]64          else
65            ranksServerNotLeader.push_back(clientRank / (clientByServer + 1));
[595]66        }
67        else
68        {
69          int rank = clientRank - (clientByServer + 1) * remain;
70          if (rank % clientByServer == 0)
71            ranksServerLeader.push_back(remain + rank / clientByServer);
[988]72          else
73            ranksServerNotLeader.push_back(remain + rank / clientByServer);
74        }       
[595]75      }
76
77      timeLine = 0;
[300]78    }
79
[512]80    /*!
81    In case of attached mode, the current context must be reset to context for client
82    \param [in] event Event sent to server
83    */
[300]84    void CContextClient::sendEvent(CEventClient& event)
85    {
[731]86      list<int> ranks = event.getRanks();
[1033]87
[595]88      if (!event.isEmpty())
[300]89      {
[731]90        list<int> sizes = event.getSizes();
[300]91
[1033]92        // We force the getBuffers call to be non-blocking on the servers
93        list<CBufferOut*> buffList;
94        bool couldBuffer = getBuffers(ranks, sizes, buffList, !CXios::isClient);
[509]95
[1033]96        if (couldBuffer)
97        {
98          event.send(timeLine, sizes, buffList);
[731]99
[1033]100          checkBuffers(ranks);
101
102          if (isAttachedModeEnabled()) // couldBuffer is always true in attached mode
103          {
104            waitEvent(ranks);
105            CContext::setCurrent(context->getId());
106          }
107        }
108        else
109        {
110          tmpBufferedEvent.ranks = ranks;
111          tmpBufferedEvent.sizes = sizes;
112
113          for (list<int>::const_iterator it = sizes.begin(); it != sizes.end(); it++)
114            tmpBufferedEvent.buffers.push_back(new CBufferOut(*it));
115
116          event.send(timeLine, tmpBufferedEvent.sizes, tmpBufferedEvent.buffers);
117        }
[300]118      }
119
[1033]120      timeLine++;
121    }
122
123    /*!
124     * Send the temporarily buffered event (if any).
125     *
126     * \return true if a temporarily buffered event could be sent, false otherwise
127     */
128    bool CContextClient::sendTemporarilyBufferedEvent()
129    {
130      bool couldSendTmpBufferedEvent = false;
131
132      if (hasTemporarilyBufferedEvent())
[511]133      {
[1033]134        list<CBufferOut*> buffList;
135        if (getBuffers(tmpBufferedEvent.ranks, tmpBufferedEvent.sizes, buffList, true)) // Non-blocking call
136        {
137          list<CBufferOut*>::iterator it, itBuffer;
138
139          for (it = tmpBufferedEvent.buffers.begin(), itBuffer = buffList.begin(); it != tmpBufferedEvent.buffers.end(); it++, itBuffer++)
140            (*itBuffer)->put((char*)(*it)->start(), (*it)->count());
141
142          checkBuffers(tmpBufferedEvent.ranks);
143
144          tmpBufferedEvent.clear();
145
146          couldSendTmpBufferedEvent = true;
147        }
[511]148      }
149
[1033]150      return couldSendTmpBufferedEvent;
[300]151    }
[509]152
[512]153    /*!
154    If client is also server (attached mode), after sending event, it should process right away
155    the incoming event.
156    \param [in] ranks list rank of server connected this client
157    */
[300]158    void CContextClient::waitEvent(list<int>& ranks)
159    {
[595]160      parentServer->server->setPendingEvent();
161      while (checkBuffers(ranks))
[300]162      {
[595]163        parentServer->server->listen();
164        parentServer->server->checkPendingRequest();
[300]165      }
[386]166
[595]167      while (parentServer->server->hasPendingEvent())
[386]168      {
[595]169       parentServer->server->eventLoop();
[386]170      }
[300]171    }
172
[512]173    /*!
[1033]174     * Get buffers for each connection to the servers. This function blocks until there is enough room in the buffers unless
175     * it is explicitly requested to be non-blocking.
176     *
177     * \param [in] serverList list of rank of connected server
178     * \param [in] sizeList size of message corresponding to each connection
179     * \param [out] retBuffers list of buffers that can be used to store an event
180     * \param [in] nonBlocking whether this function should be non-blocking
181     * \return whether the already allocated buffers could be used
[512]182    */
[1033]183    bool CContextClient::getBuffers(const list<int>& serverList, const list<int>& sizeList, list<CBufferOut*>& retBuffers, bool nonBlocking /*= false*/)
[300]184    {
[1033]185      list<int>::const_iterator itServer, itSize;
[595]186      list<CClientBuffer*> bufferList;
[1033]187      map<int,CClientBuffer*>::const_iterator it;
[595]188      list<CClientBuffer*>::iterator itBuffer;
[884]189      bool areBuffersFree;
[300]190
[595]191      for (itServer = serverList.begin(); itServer != serverList.end(); itServer++)
[300]192      {
[595]193        it = buffers.find(*itServer);
194        if (it == buffers.end())
[300]195        {
[595]196          newBuffer(*itServer);
197          it = buffers.find(*itServer);
[509]198        }
[595]199        bufferList.push_back(it->second);
[300]200      }
[347]201
202      CTimer::get("Blocking time").resume();
[884]203      do
[300]204      {
[884]205        areBuffersFree = true;
[595]206        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
[884]207          areBuffersFree &= (*itBuffer)->isBufferFree(*itSize);
208
209        if (!areBuffersFree)
[300]210        {
[884]211          checkBuffers();
212          context->server->listen();
[300]213        }
[1033]214      } while (!areBuffersFree && !nonBlocking);
[347]215      CTimer::get("Blocking time").suspend();
216
[1033]217      if (areBuffersFree)
[300]218      {
[1033]219        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
220          retBuffers.push_back((*itBuffer)->getBuffer(*itSize));
[300]221      }
[1033]222
223      return areBuffersFree;
[300]224   }
[509]225
[512]226   /*!
227   Make a new buffer for a certain connection to server with specific rank
228   \param [in] rank rank of connected server
229   */
[300]230   void CContextClient::newBuffer(int rank)
231   {
[724]232      if (!mapBufferSize_.count(rank))
233      {
234        error(0) << "WARNING: Unexpected request for buffer to communicate with server " << rank << std::endl;
235        mapBufferSize_[rank] = CXios::minBufferSize;
236      }
[917]237      CClientBuffer* buffer = buffers[rank] = new CClientBuffer(interComm, rank, mapBufferSize_[rank], maxBufferedEvents);
[725]238      // Notify the server
239      CBufferOut* bufOut = buffer->getBuffer(sizeof(StdSize));
240      bufOut->put(mapBufferSize_[rank]); // Stupid C++
241      buffer->checkBuffer();
[509]242   }
[300]243
[512]244   /*!
245   Verify state of buffers. Buffer is under pending state if there is no message on it
246   \return state of buffers, pending(true), ready(false)
247   */
[300]248   bool CContextClient::checkBuffers(void)
249   {
[595]250      map<int,CClientBuffer*>::iterator itBuff;
251      bool pending = false;
252      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++) pending |= itBuff->second->checkBuffer();
253      return pending;
[509]254   }
[300]255
[512]256   //! Release all buffers
[300]257   void CContextClient::releaseBuffers(void)
258   {
[595]259      map<int,CClientBuffer*>::iterator itBuff;
260      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++) delete itBuff->second;
[509]261   }
[300]262
[512]263   /*!
264   Verify state of buffers corresponding to a connection
265   \param [in] ranks list rank of server to which client connects to
266   \return state of buffers, pending(true), ready(false)
267   */
[300]268   bool CContextClient::checkBuffers(list<int>& ranks)
269   {
[595]270      list<int>::iterator it;
271      bool pending = false;
272      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->checkBuffer();
273      return pending;
[509]274   }
[300]275
[512]276   /*!
[917]277    * Set the buffer size for each connection. Warning: This function is collective.
278    *
279    * \param [in] mapSize maps the rank of the connected servers to the size of the correspoinding buffer
280    * \param [in] maxEventSize maps the rank of the connected servers to the size of the biggest event
[512]281   */
[917]282   void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize, const std::map<int,StdSize>& maxEventSize)
[509]283   {
284     mapBufferSize_ = mapSize;
[917]285
286     // Compute the maximum number of events that can be safely buffered.
287     double minBufferSizeEventSizeRatio = std::numeric_limits<double>::max();
288     for (std::map<int,StdSize>::const_iterator it = mapSize.begin(), ite = mapSize.end(); it != ite; ++it)
289     {
290       double ratio = double(it->second) / maxEventSize.at(it->first);
291       if (ratio < minBufferSizeEventSizeRatio) minBufferSizeEventSizeRatio = ratio;
292     }
293     MPI_Allreduce(MPI_IN_PLACE, &minBufferSizeEventSizeRatio, 1, MPI_DOUBLE, MPI_MIN, intraComm);
294
295     if (minBufferSizeEventSizeRatio < 1.0)
296       ERROR("void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize, const std::map<int,StdSize>& maxEventSize)",
297             << "The buffer sizes and the maximum events sizes are incoherent.");
298
299     maxBufferedEvents = size_t(2 * minBufferSizeEventSizeRatio) // there is room for two local buffers on the server
300                          + size_t(minBufferSizeEventSizeRatio)  // one local buffer can always be fully used
301                          + 1;                                   // the other local buffer might contain only one event
[509]302   }
303
[595]304  /*!
305  Get leading server in the group of connected server
306  \return ranks of leading servers
307  */
[988]308  const std::list<int>& CContextClient::getRanksServerNotLeader(void) const
309  {
310    return ranksServerNotLeader;
311  }
312
313  /*!
314  Check if client connects to leading server
315  \return connected(true), not connected (false)
316  */
317  bool CContextClient::isServerNotLeader(void) const
318  {
319    return !ranksServerNotLeader.empty();
320  }
321
322  /*!
323  Get leading server in the group of connected server
324  \return ranks of leading servers
325  */
[595]326  const std::list<int>& CContextClient::getRanksServerLeader(void) const
327  {
328    return ranksServerLeader;
329  }
[509]330
[595]331  /*!
332  Check if client connects to leading server
333  \return connected(true), not connected (false)
334  */
335  bool CContextClient::isServerLeader(void) const
336  {
337    return !ranksServerLeader.empty();
338  }
[300]339
[704]340  /*!
341   * Check if the attached mode is used.
342   *
343   * \return true if and only if attached mode is used
344   */
345  bool CContextClient::isAttachedModeEnabled() const
346  {
347    return (parentServer != 0);
348  }
[697]349
[512]350   /*!
351   Finalize context client and do some reports
352   */
[300]353   void CContextClient::finalize(void)
354   {
[595]355     map<int,CClientBuffer*>::iterator itBuff;
[1033]356     bool stop = false;
[731]357
[1033]358     CTimer::get("Blocking time").resume();
359     while (hasTemporarilyBufferedEvent())
360     {
361       checkBuffers();
362       sendTemporarilyBufferedEvent();
363     }
364     CTimer::get("Blocking time").suspend();
365
[595]366     CEventClient event(CContext::GetType(), CContext::EVENT_ID_CONTEXT_FINALIZE);
[300]367     if (isServerLeader())
368     {
[595]369       CMessage msg;
370       const std::list<int>& ranks = getRanksServerLeader();
371       for (std::list<int>::const_iterator itRank = ranks.begin(), itRankEnd = ranks.end(); itRank != itRankEnd; ++itRank)
372         event.push(*itRank, 1, msg);
373       sendEvent(event);
[300]374     }
[595]375     else sendEvent(event);
[509]376
[347]377     CTimer::get("Blocking time").resume();
[1033]378     while (!stop)
[300]379     {
[595]380       checkBuffers();
[1033]381       if (hasTemporarilyBufferedEvent())
382         sendTemporarilyBufferedEvent();
383
384       stop = true;
385       for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++) stop &= !itBuff->second->hasPendingRequest();
[300]386     }
[347]387     CTimer::get("Blocking time").suspend();
[509]388
[595]389     std::map<int,StdSize>::const_iterator itbMap = mapBufferSize_.begin(),
390                                           iteMap = mapBufferSize_.end(), itMap;
[511]391     StdSize totalBuf = 0;
392     for (itMap = itbMap; itMap != iteMap; ++itMap)
393     {
[595]394       report(10) << " Memory report : Context <" << context->getId() << "> : client side : memory used for buffer of each connection to server" << endl
395                  << "  +) To server with rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
[511]396       totalBuf += itMap->second;
397     }
[595]398     report(0) << " Memory report : Context <" << context->getId() << "> : client side : total memory used for buffer " << totalBuf << " bytes" << endl;
[511]399
[595]400     releaseBuffers();
[300]401   }
[509]402}
Note: See TracBrowser for help on using the repository browser.