Changeset 869


Ignore:
Timestamp:
06/09/16 11:33:30 (8 years ago)
Author:
mhnguyen
Message:

Removing the usage of blocking MPI on transformations

+) Use non-blocking MPI for axis inversion

Test
+) On Curie
+) test pass

Location:
XIOS/trunk/src
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • XIOS/trunk/src/client_client_dht_template.hpp

    r867 r869  
    4848 
    4949    const Index2VectorInfoTypeMap& getInfoIndexMap() const {return indexToInfoMappingLevel_; } 
     50    Index2VectorInfoTypeMap& getInfoIndexMap() {return indexToInfoMappingLevel_; } 
    5051    int getNbClient() { return nbClient_; } 
    5152 
  • XIOS/trunk/src/transformation/axis_algorithm_inverse.cpp

    r842 r869  
    88 */ 
    99#include "axis_algorithm_inverse.hpp" 
    10 #include "transformation_mapping.hpp" 
    1110#include "context.hpp" 
    1211#include "context_client.hpp" 
     12#include "axis.hpp" 
     13#include "client_client_dht_template.hpp" 
    1314 
    1415namespace xios { 
     
    2223           << "Two axis have different global size" 
    2324           << "Size of axis source " <<axisSource->getId() << " is " << axisSource->n_glo.getValue()  << std::endl 
    24            << "Size of axis destionation " <<axisDestination->getId() << " is " << axisDestination->n_glo.getValue()); 
     25           << "Size of axis destination " <<axisDestination->getId() << " is " << axisDestination->n_glo.getValue()); 
    2526  } 
    2627} 
     
    6162  CContext* context = CContext::getCurrent(); 
    6263  CContextClient* client=context->client; 
     64  int clientRank = client->clientRank; 
     65  int nbClient = client->clientSize; 
    6366 
    6467  int niSrc     = axisSrc_->n.getValue(); 
    6568  int ibeginSrc = axisSrc_->begin.getValue(); 
    66  
    67   CTransformationMapping transformationMap(axisDest_, axisSrc_); 
    68  
     69  int nSrc = axisSrc_->index.numElements(); 
     70 
     71  CClientClientDHTInt::Index2VectorInfoTypeMap globalIndex2ProcRank; 
     72  for (int idx = 0; idx < nSrc; ++idx) 
     73  { 
     74    if ((axisSrc_->mask)(idx)) 
     75    { 
     76      globalIndex2ProcRank[(axisSrc_->index)(idx)].resize(1); 
     77      globalIndex2ProcRank[(axisSrc_->index)(idx)][0] = clientRank; 
     78    } 
     79  } 
     80 
     81  typedef boost::unordered_map<size_t, std::vector<double> > GlobalIndexMapFromSrcToDest; 
     82  GlobalIndexMapFromSrcToDest globalIndexMapFromSrcToDest; 
    6983  TransformationIndexMap& transMap = this->transformationMapping_[0]; 
    70   TransformationWeightMap& transWeight = this->transformationWeight_[0]; 
    71  
    72   CTransformationMapping::DestinationIndexMap globaIndexMapFromDestToSource; 
    73   TransformationIndexMap::const_iterator it = transMap.begin(), ite = transMap.end(); 
     84  TransformationIndexMap::const_iterator itb = transMap.begin(), ite = transMap.end(), it; 
     85  CArray<size_t,1> globalSrcIndex(transMap.size()); 
    7486  int localIndex = 0; 
    75   for (; it != ite; ++it) 
    76   { 
    77     globaIndexMapFromDestToSource[it->first].push_back(make_pair(localIndex,make_pair((it->second)[0], (transWeight[it->first])[0]))); 
     87  for (it = itb; it != ite; ++it) 
     88  { 
     89    size_t srcIndex = it->second[0]; 
     90    globalIndexMapFromSrcToDest[srcIndex].resize(1); 
     91    globalIndexMapFromSrcToDest[srcIndex][0] = it->first; 
     92    globalSrcIndex(localIndex) = srcIndex; 
    7893    ++localIndex; 
    7994  } 
    8095 
    81   transformationMap.computeTransformationMapping(globaIndexMapFromDestToSource); 
    82  
    83   const CTransformationMapping::ReceivedIndexMap& globalIndexToReceive = transformationMap.getGlobalIndexReceivedOnGridDestMapping(); 
    84   const CTransformationMapping::SentIndexMap& globalIndexToSend = transformationMap.getGlobalIndexSendToGridDestMapping(); 
    85  
    86  // Sending global index of original grid source 
    87   CTransformationMapping::SentIndexMap::const_iterator itbSend = globalIndexToSend.begin(), itSend, 
    88                                                        iteSend = globalIndexToSend.end(); 
    89  int sendBuffSize = 0; 
    90  for (itSend = itbSend; itSend != iteSend; ++itSend) sendBuffSize += (itSend->second).size(); 
    91  
    92  typedef double Scalar; 
    93  Scalar* sendBuff, *currentSendBuff; 
    94  if (0 != sendBuffSize) sendBuff = new Scalar[sendBuffSize]; 
    95  for (StdSize idx = 0; idx < sendBuffSize; ++idx) sendBuff[idx] = NumTraits<Scalar>::sfmax(); 
    96  
    97  int currentBuffPosition = 0; 
    98  for (itSend = itbSend; itSend != iteSend; ++itSend) 
    99  { 
    100    int destRank = itSend->first; 
    101    const std::vector<std::pair<int,size_t> >& globalIndexOfCurrentGridSourceToSend = itSend->second; 
    102    int countSize = globalIndexOfCurrentGridSourceToSend.size(); 
    103    for (int idx = 0; idx < (countSize); ++idx) 
    104    { 
    105      int index = globalIndexOfCurrentGridSourceToSend[idx].first; 
    106      sendBuff[idx+currentBuffPosition] = (axisSrc_->value)(index); 
    107    } 
    108    currentSendBuff = sendBuff + currentBuffPosition; 
    109    MPI_Send(currentSendBuff, countSize, MPI_DOUBLE, destRank, 14, client->intraComm); 
    110    currentBuffPosition += countSize; 
    111  } 
    112  
    113  // Receiving global index of grid source sending from current grid source 
    114  CTransformationMapping::ReceivedIndexMap::const_iterator itbRecv = globalIndexToReceive.begin(), itRecv, 
    115                                                           iteRecv = globalIndexToReceive.end(); 
    116  int recvBuffSize = 0; 
    117  for (itRecv = itbRecv; itRecv != iteRecv; ++itRecv) recvBuffSize += (itRecv->second).size(); 
    118  
    119  Scalar* recvBuff, *currentRecvBuff; 
    120  if (0 != recvBuffSize) recvBuff = new Scalar [recvBuffSize]; 
    121  for (StdSize idx = 0; idx < recvBuffSize; ++idx) recvBuff[idx] = NumTraits<Scalar>::sfmax(); 
    122  
    123  int currentRecvBuffPosition = 0; 
    124  for (itRecv = itbRecv; itRecv != iteRecv; ++itRecv) 
    125  { 
    126    MPI_Status status; 
    127    int srcRank = itRecv->first; 
    128    int countSize = (itRecv->second).size(); 
    129    currentRecvBuff = recvBuff + currentRecvBuffPosition; 
    130    MPI_Recv(currentRecvBuff, countSize, MPI_DOUBLE, srcRank, 14, client->intraComm, &status); 
    131    currentRecvBuffPosition += countSize; 
    132  } 
    133  
    134  int ibeginDest = axisDest_->begin.getValue(); 
    135  currentRecvBuff = recvBuff; 
    136  for (itRecv = itbRecv; itRecv != iteRecv; ++itRecv) 
    137  { 
    138    int countSize = (itRecv->second).size(); 
    139    for (int idx = 0; idx < countSize; ++idx, ++currentRecvBuff) 
    140    { 
    141      int index = ((itRecv->second)[idx]).localIndex - ibeginDest; 
    142      (axisDest_->value)(index) = *currentRecvBuff; 
    143 //     int ssize = (itRecv->second)[idx].size(); 
    144 //     for (int i = 0; i < ssize; ++i) 
    145 //     { 
    146 //       int index = ((itRecv->second)[idx][i]).first - ibeginDest; 
    147 //       (axisDest_->value)(index) = *currentRecvBuff; 
    148 //     } 
    149    } 
    150  } 
    151  
    152  if (0 != sendBuffSize) delete [] sendBuff; 
    153  if (0 != recvBuffSize) delete [] recvBuff; 
    154 } 
    155  
    156 } 
     96  CClientClientDHTInt dhtIndexProcRank(globalIndex2ProcRank, client->intraComm); 
     97  dhtIndexProcRank.computeIndexInfoMapping(globalSrcIndex); 
     98  CClientClientDHTInt::Index2VectorInfoTypeMap& computedGlobalIndexOnProc = dhtIndexProcRank.getInfoIndexMap(); 
     99  boost::unordered_map<int, std::vector<size_t> > globalSrcIndexSendToProc; 
     100  for (int idx = 0; idx < localIndex; ++idx) 
     101  { 
     102    size_t tmpIndex = globalSrcIndex(idx); 
     103    if (1 == computedGlobalIndexOnProc.count(tmpIndex)) 
     104    { 
     105      std::vector<int>& tmpVec = computedGlobalIndexOnProc[tmpIndex]; 
     106      globalSrcIndexSendToProc[tmpVec[0]].push_back(tmpIndex); 
     107    } 
     108  } 
     109 
     110 
     111  boost::unordered_map<int, std::vector<size_t> >::const_iterator itbIndex = globalSrcIndexSendToProc.begin(), itIndex, 
     112                                                                  iteIndex = globalSrcIndexSendToProc.end(); 
     113  std::map<int,int> sendRankSizeMap,recvRankSizeMap; 
     114  int connectedClient = globalSrcIndexSendToProc.size(); 
     115  int* recvCount=new int[nbClient]; 
     116  int* displ=new int[nbClient]; 
     117  int* sendRankBuff=new int[connectedClient]; 
     118  int* sendSizeBuff=new int[connectedClient]; 
     119  int n = 0; 
     120  for (itIndex = itbIndex; itIndex != iteIndex; ++itIndex, ++n) 
     121  { 
     122    sendRankBuff[n] = itIndex->first; 
     123    int sendSize = itIndex->second.size(); 
     124    sendSizeBuff[n] = sendSize; 
     125    sendRankSizeMap[itIndex->first] = sendSize; 
     126  } 
     127  MPI_Allgather(&connectedClient,1,MPI_INT,recvCount,1,MPI_INT,client->intraComm); 
     128 
     129  displ[0]=0 ; 
     130  for(int n=1;n<nbClient;n++) displ[n]=displ[n-1]+recvCount[n-1]; 
     131  int recvSize=displ[nbClient-1]+recvCount[nbClient-1]; 
     132  int* recvRankBuff=new int[recvSize]; 
     133  int* recvSizeBuff=new int[recvSize]; 
     134  MPI_Allgatherv(sendRankBuff,connectedClient,MPI_INT,recvRankBuff,recvCount,displ,MPI_INT,client->intraComm); 
     135  MPI_Allgatherv(sendSizeBuff,connectedClient,MPI_INT,recvSizeBuff,recvCount,displ,MPI_INT,client->intraComm); 
     136  for (int i = 0; i < nbClient; ++i) 
     137  { 
     138    int currentPos = displ[i]; 
     139    for (int j = 0; j < recvCount[i]; ++j) 
     140      if (recvRankBuff[currentPos+j] == clientRank) 
     141      { 
     142        recvRankSizeMap[i] = recvSizeBuff[currentPos+j]; 
     143      } 
     144  } 
     145 
     146  // Sending global index of grid source to corresponding process as well as the corresponding mask 
     147  std::vector<MPI_Request> requests; 
     148  std::vector<MPI_Status> status; 
     149  boost::unordered_map<int, unsigned long* > recvGlobalIndexSrc; 
     150  boost::unordered_map<int, double* > sendValueToDest; 
     151  for (std::map<int,int>::const_iterator itRecv = recvRankSizeMap.begin(); itRecv != recvRankSizeMap.end(); ++itRecv) 
     152  { 
     153    int recvRank = itRecv->first; 
     154    int recvSize = itRecv->second; 
     155    recvGlobalIndexSrc[recvRank] = new unsigned long [recvSize]; 
     156    sendValueToDest[recvRank] = new double [recvSize]; 
     157 
     158    requests.push_back(MPI_Request()); 
     159    MPI_Irecv(recvGlobalIndexSrc[recvRank], recvSize, MPI_UNSIGNED_LONG, recvRank, 46, client->intraComm, &requests.back()); 
     160  } 
     161 
     162  boost::unordered_map<int, unsigned long* > sendGlobalIndexSrc; 
     163  boost::unordered_map<int, double* > recvValueFromSrc; 
     164  for (itIndex = itbIndex; itIndex != iteIndex; ++itIndex) 
     165  { 
     166    int sendRank = itIndex->first; 
     167    int sendSize = sendRankSizeMap[sendRank]; 
     168    const std::vector<size_t>& sendIndexMap = itIndex->second; 
     169    std::vector<size_t>::const_iterator itbSend = sendIndexMap.begin(), iteSend = sendIndexMap.end(), itSend; 
     170    sendGlobalIndexSrc[sendRank] = new unsigned long [sendSize]; 
     171    recvValueFromSrc[sendRank] = new double [sendSize]; 
     172    int countIndex = 0; 
     173    for (itSend = itbSend; itSend != iteSend; ++itSend) 
     174    { 
     175      sendGlobalIndexSrc[sendRank][countIndex] = *itSend; 
     176      ++countIndex; 
     177    } 
     178 
     179    // Send global index source and mask 
     180    requests.push_back(MPI_Request()); 
     181    MPI_Isend(sendGlobalIndexSrc[sendRank], sendSize, MPI_UNSIGNED_LONG, sendRank, 46, client->intraComm, &requests.back()); 
     182  } 
     183 
     184  status.resize(requests.size()); 
     185  MPI_Waitall(requests.size(), &requests[0], &status[0]); 
     186 
     187 
     188  std::vector<MPI_Request>().swap(requests); 
     189  std::vector<MPI_Status>().swap(status); 
     190 
     191  // Okie, on destination side, we will wait for information of masked index of source 
     192  for (std::map<int,int>::const_iterator itSend = sendRankSizeMap.begin(); itSend != sendRankSizeMap.end(); ++itSend) 
     193  { 
     194    int recvRank = itSend->first; 
     195    int recvSize = itSend->second; 
     196 
     197    requests.push_back(MPI_Request()); 
     198    MPI_Irecv(recvValueFromSrc[recvRank], recvSize, MPI_DOUBLE, recvRank, 48, client->intraComm, &requests.back()); 
     199  } 
     200 
     201  for (std::map<int,int>::const_iterator itRecv = recvRankSizeMap.begin(); itRecv != recvRankSizeMap.end(); ++itRecv) 
     202  { 
     203    int recvRank = itRecv->first; 
     204    int recvSize = itRecv->second; 
     205    double* sendValue = sendValueToDest[recvRank]; 
     206    unsigned long* recvIndexSrc = recvGlobalIndexSrc[recvRank]; 
     207    int realSendSize = 0; 
     208    for (int idx = 0; idx < recvSize; ++idx) 
     209    { 
     210      size_t globalIndex = *(recvIndexSrc+idx); 
     211      int localIndex = globalIndex - ibeginSrc; 
     212      *(sendValue + idx) = axisSrc_->value(localIndex); 
     213    } 
     214    // Okie, now inform the destination which source index are masked 
     215    requests.push_back(MPI_Request()); 
     216    MPI_Isend(sendValueToDest[recvRank], recvSize, MPI_DOUBLE, recvRank, 48, client->intraComm, &requests.back()); 
     217  } 
     218  status.resize(requests.size()); 
     219  MPI_Waitall(requests.size(), &requests[0], &status[0]); 
     220 
     221 
     222  for (std::map<int,int>::const_iterator itSend = sendRankSizeMap.begin(); itSend != sendRankSizeMap.end(); ++itSend) 
     223  { 
     224    int recvRank = itSend->first; 
     225    int recvSize = itSend->second; 
     226 
     227    double* recvValue = recvValueFromSrc[recvRank]; 
     228    unsigned long* recvIndex = sendGlobalIndexSrc[recvRank]; 
     229    for (int idx = 0; idx < recvSize; ++idx) 
     230    { 
     231      size_t globalIndex = *(recvIndex+idx); 
     232      int localIndex = globalIndex - axisDest_->begin; 
     233      axisDest_->value(localIndex) = *(recvValue + idx); 
     234    } 
     235  } 
     236 
     237  delete [] recvCount; 
     238  delete [] displ; 
     239  delete [] sendRankBuff; 
     240  delete [] recvRankBuff; 
     241  delete [] sendSizeBuff; 
     242  delete [] recvSizeBuff; 
     243 
     244  boost::unordered_map<int, double* >::const_iterator itChar; 
     245  for (itChar = sendValueToDest.begin(); itChar != sendValueToDest.end(); ++itChar) 
     246    delete [] itChar->second; 
     247  for (itChar = recvValueFromSrc.begin(); itChar != recvValueFromSrc.end(); ++itChar) 
     248    delete [] itChar->second; 
     249  boost::unordered_map<int, unsigned long* >::const_iterator itLong; 
     250  for (itLong = sendGlobalIndexSrc.begin(); itLong != sendGlobalIndexSrc.end(); ++itLong) 
     251    delete [] itLong->second; 
     252  for (itLong = recvGlobalIndexSrc.begin(); itLong != recvGlobalIndexSrc.end(); ++itLong) 
     253    delete [] itLong->second; 
     254} 
     255 
     256} 
  • XIOS/trunk/src/transformation/axis_algorithm_inverse.hpp

    r827 r869  
    1111 
    1212#include "axis_algorithm_transformation.hpp" 
    13 #include "axis.hpp" 
    1413 
    1514namespace xios { 
     15 
     16  class CAxis; 
    1617/*! 
    1718  \class CAxisAlgorithmInverse 
Note: See TracChangeset for help on using the changeset viewer.