Changeset 917


Ignore:
Timestamp:
08/04/16 16:24:20 (5 years ago)
Author:
rlacroix
Message:

Fix the client/server communication protocol.

In the some extreme cases a deadlock could occur. To fix this, the number of buffered events must be properly limited.

If you noticed decreased performance due to this commit, please let us know about it.

Fixes ticket #91.

Location:
XIOS/trunk/src
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • XIOS/trunk/src/buffer_client.cpp

    r732 r917  
    1212  size_t CClientBuffer::maxRequestSize = 0; 
    1313 
    14   CClientBuffer::CClientBuffer(MPI_Comm interComm_, int serverRank_, StdSize bfSize) 
     14  CClientBuffer::CClientBuffer(MPI_Comm interComm, int serverRank, StdSize bufferSize, StdSize maxBufferedEvents) 
     15    : interComm(interComm) 
     16    , serverRank(serverRank) 
     17    , bufferSize(bufferSize) 
     18    , current(0) 
     19    , count(0) 
     20    , bufferedEvents(0) 
     21    , maxBufferedEvents(maxBufferedEvents) 
     22    , pending(false) 
    1523  { 
    16     interComm = interComm_; 
    17     serverRank = serverRank_; 
    18     bufferSize = bfSize; 
    1924    buffer[0] = new char[bufferSize]; // transform it with MPI_ALLOC_MEM later 
    2025    buffer[1] = new char[bufferSize]; 
    21     current = 0; 
    22     count = 0; 
    23     pending = false; 
    2426    retBuffer = new CBufferOut(buffer[current], bufferSize); 
    25     info(10) << "CClientBuffer: allocated " << bufferSize << " bytes for server " << serverRank_ << endl; 
     27    info(10) << "CClientBuffer: allocated 2 x " << bufferSize << " bytes for server " << serverRank << " with a maximum of " << maxBufferedEvents << " buffered events" << endl; 
    2628  } 
    2729 
     
    4648            << "The requested size (" << size << " bytes) is too big to fit the buffer (" << bufferSize << " bytes), please increase the client buffer size." << endl); 
    4749 
    48     return (size <= remain()); 
     50    return (size <= remain() && bufferedEvents < maxBufferedEvents); 
    4951  } 
    5052 
     
    5658      retBuffer->realloc(buffer[current] + count, size); 
    5759      count += size; 
     60      bufferedEvents++; 
    5861      return retBuffer; 
    5962    } 
     
    8891        else current = 1; 
    8992        count = 0; 
     93        bufferedEvents = 0; 
    9094      } 
    9195    } 
  • XIOS/trunk/src/buffer_client.hpp

    r732 r917  
    1414      static size_t maxRequestSize; 
    1515 
    16       CClientBuffer(MPI_Comm intercomm,int serverRank, StdSize bfSize = 0); 
     16      CClientBuffer(MPI_Comm intercomm, int serverRank, StdSize bufferSize, StdSize maxBufferedEvents); 
    1717      ~CClientBuffer(); 
    1818 
     
    2828      int current; 
    2929      int count; 
    30       int bufferSize; 
    31       int serverRank; 
     30      int bufferedEvents; 
     31      const int maxBufferedEvents; 
     32      const int bufferSize; 
     33      const int serverRank; 
    3234      bool pending; 
    3335 
     
    3537 
    3638      CBufferOut* retBuffer; 
    37       MPI_Comm interComm; 
     39      const MPI_Comm interComm; 
    3840  }; 
    3941} 
    40  
    4142#endif 
  • XIOS/trunk/src/context_client.cpp

    r884 r917  
    2121    */ 
    2222    CContextClient::CContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer) 
    23      : mapBufferSize_(), parentServer(cxtSer) 
     23     : mapBufferSize_(), parentServer(cxtSer), maxBufferedEvents(4) 
    2424    { 
    2525      context = parent; 
     
    7070 
    7171      timeLine = 0; 
    72  
    7372    } 
    7473 
     
    179178        mapBufferSize_[rank] = CXios::minBufferSize; 
    180179      } 
    181       CClientBuffer* buffer = buffers[rank] = new CClientBuffer(interComm, rank, mapBufferSize_[rank]); 
     180      CClientBuffer* buffer = buffers[rank] = new CClientBuffer(interComm, rank, mapBufferSize_[rank], maxBufferedEvents); 
    182181      // Notify the server 
    183182      CBufferOut* bufOut = buffer->getBuffer(sizeof(StdSize)); 
     
    219218 
    220219   /*! 
    221    Set buffer size for each connection 
    222    \param [in] mapSize mapping rank of connected server to size of allocated buffer 
    223    */ 
    224    void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize) 
     220    * Set the buffer size for each connection. Warning: This function is collective. 
     221    * 
     222    * \param [in] mapSize maps the rank of the connected servers to the size of the correspoinding buffer 
     223    * \param [in] maxEventSize maps the rank of the connected servers to the size of the biggest event 
     224   */ 
     225   void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize, const std::map<int,StdSize>& maxEventSize) 
    225226   { 
    226227     mapBufferSize_ = mapSize; 
     228 
     229     // Compute the maximum number of events that can be safely buffered. 
     230     double minBufferSizeEventSizeRatio = std::numeric_limits<double>::max(); 
     231     for (std::map<int,StdSize>::const_iterator it = mapSize.begin(), ite = mapSize.end(); it != ite; ++it) 
     232     { 
     233       double ratio = double(it->second) / maxEventSize.at(it->first); 
     234       if (ratio < minBufferSizeEventSizeRatio) minBufferSizeEventSizeRatio = ratio; 
     235     } 
     236     MPI_Allreduce(MPI_IN_PLACE, &minBufferSizeEventSizeRatio, 1, MPI_DOUBLE, MPI_MIN, intraComm); 
     237 
     238     if (minBufferSizeEventSizeRatio < 1.0) 
     239       ERROR("void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize, const std::map<int,StdSize>& maxEventSize)", 
     240             << "The buffer sizes and the maximum events sizes are incoherent."); 
     241 
     242     maxBufferedEvents = size_t(2 * minBufferSizeEventSizeRatio) // there is room for two local buffers on the server 
     243                          + size_t(minBufferSizeEventSizeRatio)  // one local buffer can always be fully used 
     244                          + 1;                                   // the other local buffer might contain only one event 
    227245   } 
    228246 
  • XIOS/trunk/src/context_client.hpp

    r726 r917  
    4949      void finalize(void); 
    5050 
    51       void setBufferSize(const std::map<int,StdSize>& mapSize); 
     51      void setBufferSize(const std::map<int,StdSize>& mapSize, const std::map<int,StdSize>& maxEventSize); 
    5252 
    5353    public: 
     
    7171      //! Mapping of server and buffer size for each connection to server 
    7272      std::map<int,StdSize> mapBufferSize_; 
     73      //! Maximum number of events that can be buffered 
     74      StdSize maxBufferedEvents; 
    7375 
    7476      //! Context for server (Only used in attached mode) 
  • XIOS/trunk/src/node/context.cpp

    r909 r917  
    274274#undef DECLARE_NODE_PAR 
    275275 
    276      std::map<int, StdSize> bufferSize = getAttributesBufferSize(); 
    277      std::map<int, StdSize> dataBufferSize = getDataBufferSize(); 
     276     std::map<int, StdSize> maxEventSize; 
     277     std::map<int, StdSize> bufferSize = getAttributesBufferSize(maxEventSize); 
     278     std::map<int, StdSize> dataBufferSize = getDataBufferSize(maxEventSize); 
    278279 
    279280     std::map<int, StdSize>::iterator it, ite = dataBufferSize.end(); 
     
    288289     } 
    289290 
     291     // We consider that the minimum buffer size is also the minimum event size 
     292     ite = maxEventSize.end(); 
     293     for (it = maxEventSize.begin(); it != ite; ++it) 
     294       if (it->second < minBufferSize) it->second = minBufferSize; 
     295 
    290296     if (client->isServerLeader()) 
    291297     { 
    292298       const std::list<int>& ranks = client->getRanksServerLeader(); 
    293299       for (std::list<int>::const_iterator itRank = ranks.begin(), itRankEnd = ranks.end(); itRank != itRankEnd; ++itRank) 
    294          if (!bufferSize.count(*itRank)) bufferSize[*itRank] = minBufferSize; 
    295      } 
    296  
    297      if (!bufferSize.empty()) 
    298        client->setBufferSize(bufferSize); 
     300         if (!bufferSize.count(*itRank)) bufferSize[*itRank] = maxEventSize[*itRank] = minBufferSize; 
     301     } 
     302 
     303     client->setBufferSize(bufferSize, maxEventSize); 
    299304   } 
    300305 
     
    837842   } 
    838843 
    839    std::map<int, StdSize> CContext::getAttributesBufferSize() 
     844   /*! 
     845    * Compute the required buffer size to send the attributes (mostly those grid related). 
     846    * 
     847    * \param maxEventSize [in/out] the size of the bigger event for each connected server 
     848    */ 
     849   std::map<int, StdSize> CContext::getAttributesBufferSize(std::map<int, StdSize>& maxEventSize) 
    840850   { 
    841851     std::map<int, StdSize> attributesSize; 
     
    860870             if (attributesSize[it->first] < it->second) 
    861871               attributesSize[it->first] = it->second; 
     872 
     873             if (maxEventSize[it->first] < it->second) 
     874               maxEventSize[it->first] = it->second; 
    862875           } 
    863876         } 
     
    868881   } 
    869882 
    870    std::map<int, StdSize> CContext::getDataBufferSize() 
     883   /*! 
     884    * Compute the required buffer size to send the fields data. 
     885    * 
     886    * \param maxEventSize [in/out] the size of the bigger event for each connected server 
     887    */ 
     888   std::map<int, StdSize> CContext::getDataBufferSize(std::map<int, StdSize>& maxEventSize) 
    871889   { 
    872890     CFile::mode_attr::t_enum mode = hasClient ? CFile::mode_attr::write : CFile::mode_attr::read; 
     
    897915             else if (dataSize[it->first] < it->second) 
    898916               dataSize[it->first] = it->second; 
     917 
     918             if (maxEventSize[it->first] < it->second) 
     919               maxEventSize[it->first] = it->second; 
    899920           } 
    900921         } 
  • XIOS/trunk/src/node/context.hpp

    r823 r917  
    123123         void postProcessing(); 
    124124 
    125          std::map<int, StdSize> getAttributesBufferSize(); 
    126          std::map<int, StdSize> getDataBufferSize(); 
     125         std::map<int, StdSize> getAttributesBufferSize(std::map<int, StdSize>& maxEventSize); 
     126         std::map<int, StdSize> getDataBufferSize(std::map<int, StdSize>& maxEventSize); 
    127127         void setClientServerBuffer(); 
    128128 
Note: See TracChangeset for help on using the changeset viewer.