Changeset 1856 for XIOS


Ignore:
Timestamp:
02/05/20 16:02:18 (4 years ago)
Author:
ymipsl
Message:

Impose reproducibilty when interpolation weights are read from file. MPI message must be received in a deterministic order.

YM

Location:
XIOS/trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • XIOS/trunk/src/mpi_tag.hpp

    r833 r1856  
    4444#define MPI_DOMAIN_INTERPOLATION_WEIGHT 9 
    4545 
    46  
     46/* Tag for mpi communication to send and receive info of DOMAIN in domain interpolation*/ 
     47#define MPI_DOMAIN_INTERPOLATION_SOURCE_RANK 10 
    4748 
    4849#endif 
  • XIOS/trunk/src/transformation/domain_algorithm_interpolate.cpp

    r1852 r1856  
    609609  int* sendBuff = new int[nbClient]; 
    610610  int* recvBuff = new int[nbClient]; 
     611 
     612  int* sendParticipants = new int[nbClient]; 
     613  int* recvParticipants = new int[nbClient]; 
     614   
    611615  for (int i = 0; i < nbClient; ++i) 
    612616  { 
    613617    sendBuff[i] = 0; 
    614618    recvBuff[i] = 0; 
     619    sendParticipants[i]=0 ; 
     620    recvParticipants[i]=0 ; 
    615621  } 
    616622  int sendBuffSize = 0; 
     
    627633    } 
    628634    sendBuff[itMap->first] = sizeIndex; 
     635    sendParticipants[itMap->first] = 1 ; 
    629636    sendBuffSize += sizeIndex; 
    630637  } 
    631638 
    632  
    633639  MPI_Allreduce(sendBuff, recvBuff, nbClient, MPI_INT, MPI_SUM, client->intraComm); 
     640  MPI_Allreduce(sendParticipants, recvParticipants, nbClient, MPI_INT, MPI_SUM, client->intraComm); 
    634641 
    635642  int* sendIndexDestBuff = new int [sendBuffSize]; 
     
    657664      } 
    658665    } 
     666     
     667    sendRequest.push_back(MPI_Request()); 
     668    MPI_Isend(&clientRank, 
     669             1, 
     670             MPI_INT, 
     671             itMap->first, 
     672             MPI_DOMAIN_INTERPOLATION_SOURCE_RANK, 
     673             client->intraComm, 
     674             &sendRequest.back()); 
    659675 
    660676    sendRequest.push_back(MPI_Request()); 
     
    686702 
    687703  int recvBuffSize = recvBuff[clientRank]; 
     704  int numberOfParticipants = recvParticipants[clientRank] ; 
     705 
    688706  int* recvIndexDestBuff = new int [recvBuffSize]; 
    689707  int* recvIndexSrcBuff  = new int [recvBuffSize]; 
     
    691709  int receivedSize = 0; 
    692710  int clientSrcRank; 
    693   while (receivedSize < recvBuffSize) 
     711  
     712  
     713 // this part is done to impose a specific order for reception to retrive reproducibility  
     714  set<int> rankOrder ;  
     715  for (int np=0 ; np < numberOfParticipants; ++np) 
     716  { 
     717    MPI_Status recvStatus; 
     718    int rank ; 
     719    MPI_Recv(&rank, 
     720             1, 
     721             MPI_INT, 
     722             MPI_ANY_SOURCE, 
     723             MPI_DOMAIN_INTERPOLATION_SOURCE_RANK, 
     724             client->intraComm, 
     725             &recvStatus); 
     726    rankOrder.insert(rank) ; 
     727  } 
     728   
     729  for (auto  clientSrcRank : rankOrder) 
    694730  { 
    695731    MPI_Status recvStatus; 
     
    697733             recvBuffSize, 
    698734             MPI_INT, 
    699              MPI_ANY_SOURCE, 
     735             clientSrcRank, 
    700736             MPI_DOMAIN_INTERPOLATION_DEST_INDEX, 
    701737             client->intraComm, 
     
    704740    int countBuff = 0; 
    705741    MPI_Get_count(&recvStatus, MPI_INT, &countBuff); 
    706     clientSrcRank = recvStatus.MPI_SOURCE; 
     742//    clientSrcRank = recvStatus.MPI_SOURCE; 
    707743 
    708744    MPI_Recv((recvIndexSrcBuff + receivedSize), 
     
    729765    receivedSize += countBuff; 
    730766  } 
    731  
    732   std::vector<MPI_Status> requestStatus(sendRequest.size()); 
     767   
    733768  MPI_Waitall(sendRequest.size(), &sendRequest[0], MPI_STATUS_IGNORE); 
    734769 
     770  delete [] sendParticipants ; 
     771  delete [] recvParticipants ; 
    735772  delete [] sendIndexDestBuff; 
    736773  delete [] sendIndexSrcBuff; 
Note: See TracChangeset for help on using the changeset viewer.