source: XIOS/dev/XIOS_DEV_CMIP6/src/server_distribution_description.cpp @ 1232

Last change on this file since 1232 was 1232, checked in by mhnguyen, 7 years ago

Fixing the blocking problem where there are more servers than the number of grid band distribution

+) Correct this problem not only for writing but also for reading
+) Allow "zero-size" domain, axis (i.e: domain, axis with ni = 0, and/or nj=0)

Test
+) On Curie
+) Work in both cases: Read and Write data

File size: 13.6 KB
Line 
1/*!
2   \file server_distribution_description.hpp
3   \author Ha NGUYEN
4   \since 04 Jan 2015
5   \date 11 Jan 2016
6
7   \brief Description of index distribution on server(s).
8 */
9
10#include "server_distribution_description.hpp"
11#include "exception.hpp"
12
13namespace xios
14{
15  /*!
16  \param [in] globalDimensionSize global dimension of grid
17  \param [in] nServer number of server
18  \param [in] serType type of server distribution. For now, we can distribute server by band or plan
19  */
20CServerDistributionDescription::CServerDistributionDescription(const std::vector<int>& globalDimensionSize,
21                                                               int nServer,
22                                                               ServerDistributionType serType)
23  : nGlobal_(globalDimensionSize), indexBegin_(), dimensionSizes_(), globalIndex_(),
24    vecGlobalIndex_(), serverType_(serType), nServer_(nServer), positionDimensionDistributed_(1)
25{
26}
27
28CServerDistributionDescription::~CServerDistributionDescription()
29{ /* Nothing to do */ }
30
31/*!
32  Compute pre-defined global index distribution of server(s).
33  \param [in] doComputeGlobalIndex flag to compute global index on each server. By default, false
34
35*/
36void CServerDistributionDescription::computeServerDistribution(bool doComputeGlobalIndex,
37                                                               int positionDimensionDistributed)
38{
39  switch (serverType_) {
40    case BAND_DISTRIBUTION:
41      computeBandDistribution(nServer_, positionDimensionDistributed);
42      break;
43    default:
44      break;
45  }
46
47  if (doComputeGlobalIndex)
48  {
49    vecGlobalIndex_.resize(nServer_);
50    int dim = nGlobal_.size();
51    std::vector<int> currentIndex(dim);
52
53    for (int idxServer = 0; idxServer < nServer_; ++idxServer)
54    {
55      size_t ssize = 1, idx = 0;
56      for (int j = 0; j < dim; ++j) ssize *= dimensionSizes_[idxServer][j];
57      vecGlobalIndex_[idxServer].resize(ssize);
58
59      std::vector<int> idxLoop(dim,0);
60
61      int innerLoopSize = dimensionSizes_[idxServer][0];
62
63      while (idx<ssize)
64      {
65        for (int idxDim = 0; idxDim < dim-1; ++idxDim)
66        {
67          if (idxLoop[idxDim] == dimensionSizes_[idxServer][idxDim])
68          {
69            idxLoop[idxDim] = 0;
70            ++idxLoop[idxDim+1];
71          }
72        }
73
74        for (int idxDim = 1; idxDim < dim; ++idxDim)  currentIndex[idxDim] = idxLoop[idxDim] + indexBegin_[idxServer][idxDim];
75
76        size_t mulDim, globalIndex;
77        for (int j = 0; j < innerLoopSize; ++j)
78        {
79          mulDim = 1;
80          globalIndex = j + indexBegin_[idxServer][0];
81
82          for (int k = 1; k < dim; ++k)
83          {
84            mulDim *= nGlobal_[k-1];
85            globalIndex += currentIndex[k] * mulDim;
86          }
87          vecGlobalIndex_[idxServer](idx) = globalIndex;
88          ++idx;
89        }
90        idxLoop[0] += innerLoopSize;
91      }
92    }
93  }
94}
95
96/*!
97  Compute global index assigned to a server with a range.E.g: if a grid has 100 points and
98  there are 2 servers, the first one takes index from 0 to 49, the second has index from 50 to 99
99  \param [in] indexBeginEnd begining and ending index of range
100  \param [in] positionDimensionDistributed dimension of server on which we make the cut.
101*/
102std::vector<int> CServerDistributionDescription::computeServerGlobalIndexInRange(const std::pair<size_t, size_t>& indexBeginEnd,
103                                                                     int positionDimensionDistributed)
104{
105  int nBand  = 0;
106  switch (serverType_) {
107    case BAND_DISTRIBUTION:
108      nBand = computeBandDistribution(nServer_, positionDimensionDistributed);
109      break;
110    default:
111      break;
112  }
113
114  size_t indexBegin = indexBeginEnd.first;
115  size_t indexEnd   = indexBeginEnd.second;
116  if (indexBegin > indexEnd)
117     ERROR("CServerDistributionDescription::computeServerGlobalIndexInRange",
118           << "Index begin is larger than index end");
119
120  globalIndex_.rehash(std::ceil((indexEnd-indexBegin+1)/globalIndex_.max_load_factor()));
121
122  int dim = nGlobal_.size();
123  std::vector<int> currentIndex(dim);
124
125  for (int idxServer = 0; idxServer < nBand; ++idxServer)
126  {
127    size_t ssize = 1, idx = 0;
128    for (int j = 0; j < dim; ++j) ssize *= dimensionSizes_[idxServer][j];
129
130    std::vector<int> idxLoop(dim,0);
131    int innerLoopSize = dimensionSizes_[idxServer][0];
132
133    while (idx<ssize)
134    {
135      for (int idxDim = 0; idxDim < dim-1; ++idxDim)
136      {
137        if (idxLoop[idxDim] == dimensionSizes_[idxServer][idxDim])
138        {
139          idxLoop[idxDim] = 0;
140          ++idxLoop[idxDim+1];
141        }
142      }
143
144      for (int idxDim = 1; idxDim < dim; ++idxDim)  currentIndex[idxDim] = idxLoop[idxDim] + indexBegin_[idxServer][idxDim];
145
146      size_t mulDim, globalIndex;
147      for (int j = 0; j < innerLoopSize; ++j)
148      {
149        mulDim = 1;
150        globalIndex = j + indexBegin_[idxServer][0];
151
152        for (int k = 1; k < dim; ++k)
153        {
154          mulDim *= nGlobal_[k-1];
155          globalIndex += (currentIndex[k])*mulDim;
156        }
157        if ((indexBegin <= globalIndex) && (globalIndex <= indexEnd))
158          globalIndex_[globalIndex] = idxServer;
159        ++idx;
160      }
161      idxLoop[0] += innerLoopSize;
162    }
163  }
164
165    // List of servers without distribution (cause total number of server is greater than number of bands, for example)
166  std::vector<int> zeroIndexServer(nServer_-nBand); 
167  for (int idxServer = nBand; idxServer < nServer_; ++idxServer)
168    zeroIndexServer[idxServer-nBand] = idxServer;
169
170  return zeroIndexServer;
171}
172
173/*!
174  Compute the global index of grid elements (domain, axis) and their associated server rank.
175  Each client knows the general distribution of servers and from which they can compute the pieces of information to hold
176  \param [out] indexServerOnElement global index of each element as well as the corresponding server which contains these indices
177  \param [in] clientRank rank of client
178  \param [in] clientSize number of client
179  \param [in] axisDomainOrder the order of element in grid (2 for domain, 1 for axis, 0 for scalar)
180  \param [in] positionDimensionDistributed dimension of server on which we make the cut.
181*/
182std::vector<int> CServerDistributionDescription::computeServerGlobalByElement(std::vector<boost::unordered_map<size_t,std::vector<int> > >& indexServerOnElement,
183                                                                              int clientRank,
184                                                                              int clientSize,
185                                                                              const CArray<int,1>& axisDomainOrder,
186                                                                              int positionDimensionDistributed)
187{
188  int nBand  = 0;
189  switch (serverType_) {
190    case BAND_DISTRIBUTION:
191      nBand = computeBandDistribution(nServer_, positionDimensionDistributed);
192      break;
193    default:
194      break;
195  }
196
197  int nbElement = axisDomainOrder.numElements();
198  indexServerOnElement.resize(nbElement);
199  int idx = 0;
200  std::vector<int> idxMap(nbElement);
201  for (int i = 0; i < nbElement; ++i)
202  {
203    idxMap[i] = idx;
204    if (2 == axisDomainOrder(i)) idx += 2;
205    else ++idx;
206  }
207
208  for (int idxServer = 0; idxServer < nBand; ++idxServer)
209  {
210    std::vector<int> elementDimension(4);
211    for (int i = 0; i < nbElement; ++i)
212    {
213      int elementSize = 1;
214      if (2 == axisDomainOrder(i))
215      {
216        elementSize *= dimensionSizes_[idxServer][idxMap[i]] * dimensionSizes_[idxServer][idxMap[i]+1];
217        elementDimension[0] = indexBegin_[idxServer][idxMap[i]];
218        elementDimension[1] = indexBegin_[idxServer][idxMap[i]+1];
219        elementDimension[2] = dimensionSizes_[idxServer][idxMap[i]];
220        elementDimension[3] = dimensionSizes_[idxServer][idxMap[i]+1];
221      }
222
223      else if (1 == axisDomainOrder(i))
224      {
225        elementSize *= dimensionSizes_[idxServer][idxMap[i]];
226        elementDimension[0] = indexBegin_[idxServer][idxMap[i]];
227        elementDimension[1] = 0;
228        elementDimension[2] = dimensionSizes_[idxServer][idxMap[i]];
229        elementDimension[3] = 1;
230      }
231      else
232      {
233        elementSize *= dimensionSizes_[idxServer][idxMap[i]];
234        elementDimension[0] = 0;
235        elementDimension[1] = 0;
236        elementDimension[2] = 1;
237        elementDimension[3] = 1;
238      }
239
240      int rangeBegin, rangeSize;
241      computeRangeProcIndex(clientRank, clientSize, elementSize, rangeBegin, rangeSize);
242
243      size_t globalIndexElement;
244      idx = 0; int idxRange = 0;
245      for (int k = 0; k < elementDimension[3]; ++k)
246        for (int l = 0; l < elementDimension[2]; ++l)
247        {
248          globalIndexElement = (l+elementDimension[0]) + (k+elementDimension[1])*elementDimension[2];
249          if ((rangeBegin <= idx) && (idxRange < rangeSize))
250          {
251            indexServerOnElement[i][globalIndexElement].push_back(idxServer);
252            ++idxRange;
253          }
254          ++idx;
255        }
256    }
257  }
258
259  // List of servers without distribution (cause total number of server is greater than number of bands, for example)
260  std::vector<int> zeroIndexServer(nServer_-nBand); 
261  for (int idxServer = nBand; idxServer < nServer_; ++idxServer)
262    zeroIndexServer[idxServer-nBand] = idxServer;
263
264  return zeroIndexServer;
265}
266
267/*!
268  Compute a range of index on server which a client holds
269  For a range of index on a specific server, each client can hold a piece of the index range
270  If the range size is smaller than the number of client, there are some clients holding the same index
271  \param [in] clientRank rank of client
272  \param [in] clientSize number of client
273  \param [in] rangeProcSize index range size
274  \param [out] rangeBegin begin of range index a client holds
275  \param [out] rangeSize size of range index a client holds
276*/
277void CServerDistributionDescription::computeRangeProcIndex(int clientRank,
278                                                           int clientSize,
279                                                           int rangeProcSize,
280                                                           int& rangeBegin,
281                                                           int& rangeSize)
282{
283  if (rangeProcSize < clientSize)
284  {
285    int rangeIndex = 0;
286    for (int idx = 0; idx < clientSize; ++idx)
287    {
288      if (idx == clientRank)
289      {
290        rangeBegin = rangeIndex;
291        rangeSize = 1;
292      }
293      ++rangeIndex;
294      if (rangeIndex == rangeProcSize) rangeIndex = 0;
295    }
296    return;
297  }
298
299  int range, indexBegin = 0;
300  for (int i = 0; i < clientSize; ++i)
301  {
302    range = rangeProcSize / clientSize;
303    if (i < (rangeProcSize%clientSize)) ++range;
304    if (i == clientRank) break;
305    indexBegin += range;
306  }
307  rangeBegin = indexBegin;
308  rangeSize = range;
309}
310
311/*!
312  Compute global index of servers with band distribution
313  \param [in] nServer number of server
314*/
315int CServerDistributionDescription::computeBandDistribution(int nServer, int positionDimensionDistributed)
316{
317  int dim = nGlobal_.size();
318  positionDimensionDistributed_ = positionDimensionDistributed;
319  if (1 == dim) positionDimensionDistributed_ = 0;
320  if (positionDimensionDistributed_ > dim)
321    ERROR("CServerDistributionDescription::computeBandDistribution(int nServer, int positionDimensionDistributed)",
322          << "Position of distributed dimension is invalid" << std::endl
323          << "Position of distributed dimension is " << positionDimensionDistributed_
324          << "Dimension " << dim)
325
326  indexBegin_.resize(nServer);
327  dimensionSizes_.resize(nServer);
328
329  for (int i = 0; i< nServer; ++i)
330  {
331    indexBegin_[i].resize(dim);
332    dimensionSizes_[i].resize(dim);
333  }
334
335  int njRangeSize;
336  int nGlobTemp = 0;
337  std::vector<int> njRangeBegin(nServer,0);
338  std::vector<int> njRangeEnd(nServer,0);
339
340  int positionDistributed = (1<dim) ? positionDimensionDistributed_ : 0;
341  nGlobTemp = nGlobal_[positionDistributed];
342  int nbBand = std::min(nGlobTemp, nServer);
343
344  for (int i = 0; i < nbBand; ++i)
345  {
346    if (0 < i) njRangeBegin[i] = njRangeEnd[i-1];
347    njRangeSize = nGlobTemp / nbBand;
348    if (i < nGlobTemp%nbBand) ++njRangeSize;
349    njRangeEnd[i] = njRangeSize + njRangeBegin[i];
350  }
351  njRangeEnd[nbBand-1] = nGlobTemp;
352
353  for (int i = nbBand; i < nServer; ++i)
354  {
355    njRangeBegin[i] = njRangeEnd[i] = 0;
356  }
357
358  for (int i = 0; i < nServer; ++i)
359  {
360    for (int j = 0; j < dim; ++j)
361    {
362      if (positionDistributed != j)
363      {
364        if (1 == dim)
365        {
366          indexBegin_[i][j] = njRangeBegin[i];
367          dimensionSizes_[i][j] = njRangeEnd[i] - njRangeBegin[i];
368        }
369        else
370        {
371          indexBegin_[i][j] = 0;
372          dimensionSizes_[i][j] = nGlobal_[j];
373        }
374      }
375      else
376      {
377        indexBegin_[i][j] = njRangeBegin[i];
378        dimensionSizes_[i][j] = njRangeEnd[i] - njRangeBegin[i];
379      }
380    }
381  }
382
383  return nbBand;
384}
385
386/*!
387  Get size of each dimension on distributed server
388  \return size of dimensions on server(s)
389*/
390std::vector<std::vector<int> > CServerDistributionDescription::getServerDimensionSizes() const
391{
392  return dimensionSizes_;
393}
394
395/*!
396  Get index begin of each dimension on distributed server
397  \return index begin of dimensions on server(s)
398*/
399std::vector<std::vector<int> > CServerDistributionDescription::getServerIndexBegin() const
400{
401  return indexBegin_;
402}
403
404/*!
405  Get global index on distributed server
406  \return global index on server(s)
407*/
408const std::vector<CArray<size_t,1> >& CServerDistributionDescription::getGlobalIndex() const
409{
410  return vecGlobalIndex_;
411}
412
413/*!
414  Get global index calculated by computeServerGlobalIndexInRange
415*/
416const boost::unordered_map<size_t,int>& CServerDistributionDescription::getGlobalIndexRange() const
417{
418  return globalIndex_;
419}
420
421int CServerDistributionDescription::getDimensionDistributed()
422{
423  return ((1<nGlobal_.size()) ? positionDimensionDistributed_ : 0);
424}
425
426} // namespace xios
Note: See TracBrowser for help on using the repository browser.