Ignore:
Timestamp:
05/31/17 10:36:33 (7 years ago)
Author:
oabramkina
Message:

Fixing a bug in context initialization. Now register context is scheduled by event scheduler.
Tests on Curie: test_complete and test_xios2_cmip6.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • XIOS/dev/dev_olga/src/server.cpp

    r1142 r1148  
    2828    int CServer::nbPools = 0; 
    2929    int CServer::poolId = 0; 
    30     int CServer::nbContexts_ = 0; 
     30    int CServer::nbContexts = 0; 
    3131    bool CServer::isRoot = false ; 
    3232    int CServer::rank_ = INVALID_RANK; 
     
    4444 * In case of secondary servers intraComm is created for each secondary server pool. 
    4545 * (For now the assumption is that there is one proc per pool.) 
    46  * Creates the following lists of interComms: 
     46 * Creates interComm and stores them into the following lists: 
    4747 *   classical server -- interCommLeft 
    4848 *   primary server -- interCommLeft and interCommRight 
    49  *   secondary server -- interComm for each pool. 
     49 *   secondary server -- interCommLeft for each pool. 
    5050 */ 
    5151    void CServer::initialize(void) 
     
    8585        map<unsigned long, int> colors ; 
    8686        map<unsigned long, int> leaders ; 
    87         map<unsigned long, int> lastProcesses ;  // needed in case of two server levels 
    8887        map<unsigned long, int>::iterator it ; 
    8988 
     
    9796            c++ ; 
    9897          } 
    99           if (hashAll[i] == hashServer) ++nbSrv; 
    100           //if (hashAll[i+1] != hashAll[i])  // Potential bug here! 
    101           //  lastProcesses[hashAll[i]]=i ; // It seems that lastprocesses is only used for calculating the server size. Can we count server size directly? 
     98          if (hashAll[i] == hashServer) ++serverSize_; 
    10299        } 
    103100 
     
    107104        { 
    108105          int serverRank = rank_ - leaders[hashServer]; // server proc rank starting 0 
    109           serverSize_ = nbSrv; //lastProcesses[hashServer] - leaders[hashServer] + 1; 
    110 //          serverSize_ = lastProcesses - leaders[hashServer]; 
    111106          nbPools = serverSize_ * CXios::ratioServer2 / 100; 
    112107          if ( serverRank < (serverSize_ - nbPools) ) 
     
    324319         { 
    325320           listenContext(); 
     321           listenRootContext(); 
    326322           if (!finished) listenFinalize() ; 
    327323         } 
     
    467463         int size ; 
    468464         MPI_Comm_size(intraComm,&size) ; 
    469          MPI_Request* requests= new MPI_Request[size-1] ; 
    470          MPI_Status* status= new MPI_Status[size-1] ; 
    471  
    472          for(int i=1;i<size;i++) 
    473          { 
    474             MPI_Isend(buff,count,MPI_CHAR,i,2,intraComm,&requests[i-1]) ; 
    475          } 
    476          MPI_Waitall(size-1,requests,status) ; 
    477          registerContext(buff,count,it->second.leaderRank) ; 
     465//         MPI_Request* requests= new MPI_Request[size-1] ; 
     466//         MPI_Status* status= new MPI_Status[size-1] ; 
     467         MPI_Request* requests= new MPI_Request[size] ; 
     468         MPI_Status* status= new MPI_Status[size] ; 
     469 
     470         CMessage msg ; 
     471         msg<<id<<it->second.leaderRank; 
     472         int messageSize=msg.size() ; 
     473         void * sendBuff = new char[messageSize] ; 
     474         CBufferOut sendBuffer(sendBuff,messageSize) ; 
     475         sendBuffer<<msg ; 
     476 
     477         // Include root itself in order not to have a divergence 
     478         for(int i=0; i<size; i++) 
     479         { 
     480           MPI_Isend(sendBuff,count,MPI_CHAR,i,2,intraComm,&requests[i]) ; 
     481         } 
     482 
     483//         for(int i=1;i<size;i++) 
     484//         { 
     485//            MPI_Isend(buff,count,MPI_CHAR,i,2,intraComm,&requests[i-1]) ; 
     486//         } 
     487//         MPI_Waitall(size-1,requests,status) ; 
     488//         registerContext(buff,count,it->second.leaderRank) ; 
    478489 
    479490         recvContextId.erase(it) ; 
     
    492503       static bool recept=false ; 
    493504       int rank ; 
    494        int count ; 
     505//       int count ; 
     506       static int count ; 
    495507       const int root=0 ; 
    496  
     508       boost::hash<string> hashString; 
     509       size_t hashId = hashString("RegisterContext"); 
     510 
     511       // (1) Receive context id from the root 
    497512       if (recept==false) 
    498513       { 
     
    508523         } 
    509524       } 
     525       // (2) If context id is received, save it into a buffer and register an event 
    510526       else 
    511527       { 
     
    514530         { 
    515531           MPI_Get_count(&status,MPI_CHAR,&count) ; 
    516            registerContext(buffer,count) ; 
    517            delete [] buffer ; 
     532           eventScheduler->registerEvent(nbContexts,hashId); 
     533//           registerContext(buffer,count) ; 
     534//           delete [] buffer ; 
    518535           recept=false ; 
    519536         } 
     537       } 
     538       // (3) If event has been scheduled, call register context 
     539       if (eventScheduler->queryEvent(nbContexts,hashId)) 
     540       { 
     541         registerContext(buffer,count) ; 
     542         ++nbContexts; 
     543         delete [] buffer ; 
    520544       } 
    521545     } 
     
    525549       string contextId; 
    526550       CBufferIn buffer(buff, count); 
    527        buffer >> contextId; 
     551//       buffer >> contextId; 
     552       buffer >> contextId>>leaderRank; 
    528553       CContext* context; 
    529554 
     
    537562       contextList[contextId]=context; 
    538563 
    539        // Primary or classical server: initialize its own server (CContextServer) 
     564       // Primary or classical server: create communication channel with a client 
     565       // (1) create interComm (with a client) 
     566       // (2) initialize client and server (contextClient and contextServer) 
    540567       MPI_Comm inter; 
    541568       if (serverLevel < 2) 
     
    550577 
    551578       } 
    552        // Secondary server: initialize its own server (CContextServer) 
     579       // Secondary server: create communication channel with a primary server 
     580       // (1) duplicate interComm with a primary server 
     581       // (2) initialize client and server (contextClient and contextServer) 
     582       // Remark: in the case of the secondary server there is no need to create an interComm calling MPI_Intercomm_create, 
     583       //         because interComm of CContext is defined on the same processes as the interComm of CServer. 
     584       //         So just duplicate it. 
    553585       else if (serverLevel == 2) 
    554586       { 
     
    558590       } 
    559591 
    560        // Primary server: send create context message to secondary servers and initialize its own client (CContextClient) 
     592       // Primary server: 
     593       // (1) send create context message to secondary servers 
     594       // (2) initialize communication channels with secondary servers (create contextClient and contextServer) 
    561595       if (serverLevel == 1) 
    562596       { 
     
    582616           delete [] buff ; 
    583617         } 
    584          ++nbContexts_; 
    585618       } 
    586619     } 
     
    596629         if (isFinalized) 
    597630         { 
    598 //           it->second->postFinalize(); 
    599631           contextList.erase(it) ; 
    600632           break ; 
     
    605637     } 
    606638 
    607      //! Get rank of the current process 
     639     //! Get rank of the current process in the intraComm 
    608640     int CServer::getRank() 
    609641     { 
Note: See TracChangeset for help on using the changeset viewer.