source: XIOS/trunk/src/context_server.cpp @ 1114

Last change on this file since 1114 was 1033, checked in by rlacroix, 7 years ago

Make the XIOS server(s) completely non-blocking.

This fixes some deadlocks caused by bugs in the communication protocol when using inputs and multiple contexts.

  • Property copyright set to
    Software name : XIOS (Xml I/O Server)
    http://forge.ipsl.jussieu.fr/ioserver
    Creation date : January 2009
    Licence : CeCCIL version2
    see license file in root directory : Licence_CeCILL_V2-en.txt
    or http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
    Holder : CEA/LSCE (Laboratoire des Sciences du CLimat et de l'Environnement)
    CNRS/IPSL (Institut Pierre Simon Laplace)
    Project Manager : Yann Meurdesoif
    yann.meurdesoif@cea.fr
  • Property svn:eol-style set to native
File size: 7.9 KB
RevLine 
[300]1#include "context_server.hpp"
2#include "buffer_in.hpp"
3#include "type.hpp"
4#include "context.hpp"
[352]5#include "object_template.hpp"
6#include "group_template.hpp"
7#include "attribute_template.hpp"
[300]8#include "domain.hpp"
[352]9#include "field.hpp"
10#include "file.hpp"
11#include "grid.hpp"
[382]12#include "mpi.hpp"
[347]13#include "tracer.hpp"
14#include "timer.hpp"
[401]15#include "cxios.hpp"
[492]16#include "event_scheduler.hpp"
17#include "server.hpp"
18#include <boost/functional/hash.hpp>
[300]19
20
21
[335]22namespace xios
[300]23{
24
[345]25  CContextServer::CContextServer(CContext* parent,MPI_Comm intraComm_,MPI_Comm interComm_)
[300]26  {
[549]27    context=parent;
28    intraComm=intraComm_;
29    MPI_Comm_size(intraComm,&intraCommSize);
30    MPI_Comm_rank(intraComm,&intraCommRank);
31    interComm=interComm_;
32    int flag;
33    MPI_Comm_test_inter(interComm,&flag);
[300]34    if (flag) MPI_Comm_remote_size(interComm,&commSize);
[549]35    else  MPI_Comm_size(interComm,&commSize);
36    currentTimeLine=0;
37    scheduled=false;
38    finished=false;
[509]39
[549]40    boost::hash<string> hashString;
41    hashId=hashString(context->getId());
[492]42
[300]43  }
44  void CContextServer::setPendingEvent(void)
45  {
[549]46    pendingEvent=true;
[300]47  }
[489]48
[300]49  bool CContextServer::hasPendingEvent(void)
50  {
[549]51    return pendingEvent;
[300]52  }
[489]53
[597]54  bool CContextServer::hasFinished(void)
55  {
56    return finished;
57  }
58
[1033]59  bool CContextServer::eventLoop(bool enableEventsProcessing /*= true*/)
[300]60  {
[549]61    listen();
62    checkPendingRequest();
[1033]63    if (enableEventsProcessing)
64      processEvents();
[549]65    return finished;
[300]66  }
67
68  void CContextServer::listen(void)
69  {
70    int rank;
[549]71    int flag;
72    int count;
73    char * addr;
[489]74    MPI_Status status;
[300]75    map<int,CServerBuffer*>::iterator it;
[489]76
[300]77    for(rank=0;rank<commSize;rank++)
78    {
79      if (pendingRequest.find(rank)==pendingRequest.end())
80      {
[549]81        traceOff();
[489]82        MPI_Iprobe(rank,20,interComm,&flag,&status);
[549]83        traceOn();
[300]84        if (flag==true)
85        {
[549]86          it=buffers.find(rank);
[597]87          if (it==buffers.end()) // Receive the buffer size and allocate the buffer
[300]88          {
[509]89            StdSize buffSize = 0;
90            MPI_Recv(&buffSize, 1, MPI_LONG, rank, 20, interComm, &status);
[511]91            mapBufferSize_.insert(std::make_pair(rank, buffSize));
[549]92            it=(buffers.insert(pair<int,CServerBuffer*>(rank,new CServerBuffer(buffSize)))).first;
[300]93          }
[509]94          else
95          {
[549]96            MPI_Get_count(&status,MPI_CHAR,&count);
[509]97            if (it->second->isBufferFree(count))
98            {
[549]99              addr=(char*)it->second->getBuffer(count);
100              MPI_Irecv(addr,count,MPI_CHAR,rank,20,interComm,&pendingRequest[rank]);
101              bufferRequest[rank]=addr;
[509]102            }
103          }
[300]104        }
105      }
106    }
107  }
[489]108
[300]109  void CContextServer::checkPendingRequest(void)
110  {
111    map<int,MPI_Request>::iterator it;
[549]112    list<int> recvRequest;
[300]113    list<int>::iterator itRecv;
[549]114    int rank;
115    int flag;
116    int count;
117    MPI_Status status;
[489]118
[300]119    for(it=pendingRequest.begin();it!=pendingRequest.end();it++)
120    {
[549]121      rank=it->first;
122      traceOff();
123      MPI_Test(& it->second, &flag, &status);
124      traceOn();
[300]125      if (flag==true)
126      {
[549]127        recvRequest.push_back(rank);
128        MPI_Get_count(&status,MPI_CHAR,&count);
129        processRequest(rank,bufferRequest[rank],count);
[300]130      }
131    }
[489]132
133    for(itRecv=recvRequest.begin();itRecv!=recvRequest.end();itRecv++)
[300]134    {
[549]135      pendingRequest.erase(*itRecv);
136      bufferRequest.erase(*itRecv);
[300]137    }
138  }
[489]139
[300]140  void CContextServer::processRequest(int rank, char* buff,int count)
141  {
[489]142
[549]143    CBufferIn buffer(buff,count);
144    char* startBuffer,endBuffer;
145    int size, offset;
146    size_t timeLine;
147    map<size_t,CEventServer*>::iterator it;
[489]148
[300]149    while(count>0)
150    {
[549]151      char* startBuffer=(char*)buffer.ptr();
152      CBufferIn newBuffer(startBuffer,buffer.remain());
153      newBuffer>>size>>timeLine;
[300]154
[549]155      it=events.find(timeLine);
156      if (it==events.end()) it=events.insert(pair<int,CEventServer*>(timeLine,new CEventServer)).first;
157      it->second->push(rank,buffers[rank],startBuffer,size);
[300]158
[549]159      buffer.advance(size);
160      count=buffer.remain();
[489]161    }
162
[300]163  }
[489]164
[300]165  void CContextServer::processEvents(void)
166  {
[549]167    map<size_t,CEventServer*>::iterator it;
168    CEventServer* event;
[489]169
[549]170    it=events.find(currentTimeLine);
[489]171    if (it!=events.end())
[300]172    {
[549]173      event=it->second;
[509]174
[300]175      if (event->isFull())
176      {
[597]177        if (!scheduled && CServer::eventScheduler) // Skip event scheduling for attached mode and reception on client side
[492]178        {
[549]179          CServer::eventScheduler->registerEvent(currentTimeLine,hashId);
180          scheduled=true;
[492]181        }
[597]182        else if (!CServer::eventScheduler || CServer::eventScheduler->queryEvent(currentTimeLine,hashId) )
[492]183        {
[851]184         // When using attached mode, synchronise the processes to avoid that differents event be scheduled by differents processes
185         // The best way to properly solve this problem will be to use the event scheduler also in attached mode
186         // for now just set up a MPI barrier
[998]187         if (!CServer::eventScheduler && CXios::isServer) MPI_Barrier(intraComm) ;
[851]188
[549]189         CTimer::get("Process events").resume();
190         dispatchEvent(*event);
191         CTimer::get("Process events").suspend();
192         pendingEvent=false;
193         delete event;
194         events.erase(it);
195         currentTimeLine++;
196         scheduled = false;
[492]197        }
198      }
199    }
200  }
[489]201
[300]202  CContextServer::~CContextServer()
203  {
[549]204    map<int,CServerBuffer*>::iterator it;
205    for(it=buffers.begin();it!=buffers.end();++it) delete it->second;
[489]206  }
[300]207
208
209  void CContextServer::dispatchEvent(CEventServer& event)
210  {
[549]211    string contextName;
212    string buff;
213    int MsgSize;
214    int rank;
215    list<CEventServer::SSubEvent>::iterator it;
216    CContext::setCurrent(context->getId());
[489]217
[300]218    if (event.classId==CContext::GetType() && event.type==CContext::EVENT_ID_CONTEXT_FINALIZE)
219    {
[597]220      finished=true;
[549]221      info(20)<<"Server Side context <"<<context->getId()<<"> finalized"<<endl;
[511]222      std::map<int, StdSize>::const_iterator itbMap = mapBufferSize_.begin(),
223                                             iteMap = mapBufferSize_.end(), itMap;
224      StdSize totalBuf = 0;
225      for (itMap = itbMap; itMap != iteMap; ++itMap)
226      {
227        report(10)<< " Memory report : Context <"<<context->getId()<<"> : server side : memory used for buffer of each connection to client" << endl
228                  << "  +) With client of rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
229        totalBuf += itMap->second;
230      }
[549]231      context->finalize();
232      report(0)<< " Memory report : Context <"<<context->getId()<<"> : server side : total memory used for buffer "<<totalBuf<<" bytes"<<endl;
[300]233    }
[549]234    else if (event.classId==CContext::GetType()) CContext::dispatchEvent(event);
235    else if (event.classId==CContextGroup::GetType()) CContextGroup::dispatchEvent(event);
236    else if (event.classId==CCalendarWrapper::GetType()) CCalendarWrapper::dispatchEvent(event);
237    else if (event.classId==CDomain::GetType()) CDomain::dispatchEvent(event);
238    else if (event.classId==CDomainGroup::GetType()) CDomainGroup::dispatchEvent(event);
239    else if (event.classId==CAxis::GetType()) CAxis::dispatchEvent(event);
240    else if (event.classId==CAxisGroup::GetType()) CAxisGroup::dispatchEvent(event);
[887]241    else if (event.classId==CScalar::GetType()) CScalar::dispatchEvent(event);
242    else if (event.classId==CScalarGroup::GetType()) CScalarGroup::dispatchEvent(event);
[549]243    else if (event.classId==CGrid::GetType()) CGrid::dispatchEvent(event);
244    else if (event.classId==CGridGroup::GetType()) CGridGroup::dispatchEvent(event);
245    else if (event.classId==CField::GetType()) CField::dispatchEvent(event);
246    else if (event.classId==CFieldGroup::GetType()) CFieldGroup::dispatchEvent(event);
247    else if (event.classId==CFile::GetType()) CFile::dispatchEvent(event);
248    else if (event.classId==CFileGroup::GetType()) CFileGroup::dispatchEvent(event);
249    else if (event.classId==CVariable::GetType()) CVariable::dispatchEvent(event);
[300]250    else
251    {
[549]252      ERROR("void CContextServer::dispatchEvent(CEventServer& event)",<<" Bad event class Id"<<endl);
[300]253    }
254  }
255}
Note: See TracBrowser for help on using the repository browser.