Context Navigation

← Previous Change
Next Change →

extern

Timestamp:

03/22/18 10:43:20 (6 years ago)

Author:

yushan

Message:

branch_openmp merged with XIOS_DEV_CMIP6@1459

Location:

XIOS/dev/branch_openmp/extern

Files:

: 8 edited

remap/src/gridRemap.hpp (modified) (1 diff)
remap/src/mapper.cpp (modified) (15 diffs)
remap/src/node.cpp (modified) (1 diff)
remap/src/parallel_tree.cpp (modified) (4 diffs)
remap/src/parallel_tree.hpp (modified) (1 diff)
src_ep_dev/ep_declaration.cpp (modified) (5 diffs)
src_ep_dev/ep_declaration.hpp (modified) (2 diffs)
src_ep_dev/ep_reduce.cpp (modified) (5 diffs)

Legend:

: Unmodified
: Added
: Removed

XIOS/dev/branch_openmp/extern/remap/src/gridRemap.hpp

-                      r1335
+                      r1460
 Coord readPole(std::istream&);
+//extern CRemapGrid srcGrid;
+//extern CRemapGrid tgtGrid;
+}

XIOS/dev/branch_openmp/extern/remap/src/mapper.cpp

-                      r1341
+                      r1460
 void cptOffsetsFromLengths(const int *lengths, int *offsets, int sz)
+{
   offsets[0] = 0;
   for (int i = 1; i < sz; i++)
     offsets[i] = offsets[i-1] + lengths[i-1];
+        offsets[0] = 0;
+        for (int i = 1; i < sz; i++)
+                offsets[i] = offsets[i-1] + lengths[i-1];
+}
 …
   srcGrid.pole = Coord(pole[0], pole[1], pole[2]);
   int mpiRank, mpiSize;
   MPI_Comm_rank(communicator, &mpiRank);
   MPI_Comm_size(communicator, &mpiSize);
   sourceElements.reserve(nbCells);
   sourceMesh.reserve(nbCells);
+        int mpiRank, mpiSize;
+        MPI_Comm_rank(communicator, &mpiRank);
+        MPI_Comm_size(communicator, &mpiSize);
+        sourceElements.reserve(nbCells);
+        sourceMesh.reserve(nbCells);
   sourceGlobalId.resize(nbCells) ;
 …
   else sourceGlobalId.assign(globalId,globalId+nbCells);
   for (int i = 0; i < nbCells; i++)
+  {
     int offs = i*nVertex;
     Elt elt(boundsLon + offs, boundsLat + offs, nVertex);
     elt.src_id.rank = mpiRank;
     elt.src_id.ind = i;
+        for (int i = 0; i < nbCells; i++)
+        {
+                int offs = i*nVertex;
+                Elt elt(boundsLon + offs, boundsLat + offs, nVertex);
+                elt.src_id.rank = mpiRank;
+                elt.src_id.ind = i;
     elt.src_id.globalId = sourceGlobalId[i];
     sourceElements.push_back(elt);
     sourceMesh.push_back(Node(elt.x, cptRadius(elt), &sourceElements.back()));
     cptEltGeom(sourceElements[i], Coord(pole[0], pole[1], pole[2]));
+  }
+                sourceElements.push_back(elt);
+                sourceMesh.push_back(Node(elt.x, cptRadius(elt), &sourceElements.back()));
+                cptEltGeom(sourceElements[i], Coord(pole[0], pole[1], pole[2]));
+        }
+}
 …
   tgtGrid.pole = Coord(pole[0], pole[1], pole[2]);
   int mpiRank, mpiSize;
   MPI_Comm_rank(communicator, &mpiRank);
   MPI_Comm_size(communicator, &mpiSize);
   targetElements.reserve(nbCells);
   targetMesh.reserve(nbCells);
+        int mpiRank, mpiSize;
+        MPI_Comm_rank(communicator, &mpiRank);
+        MPI_Comm_size(communicator, &mpiSize);
+        targetElements.reserve(nbCells);
+        targetMesh.reserve(nbCells);
   targetGlobalId.resize(nbCells) ;
 …
   else targetGlobalId.assign(globalId,globalId+nbCells);
   for (int i = 0; i < nbCells; i++)
+  {
     int offs = i*nVertex;
     Elt elt(boundsLon + offs, boundsLat + offs, nVertex);
     targetElements.push_back(elt);
     targetMesh.push_back(Node(elt.x, cptRadius(elt), &sourceElements.back()));
     cptEltGeom(targetElements[i], Coord(pole[0], pole[1], pole[2]));
+  }
+        for (int i = 0; i < nbCells; i++)
+        {
+                int offs = i*nVertex;
+                Elt elt(boundsLon + offs, boundsLat + offs, nVertex);
+                targetElements.push_back(elt);
+                targetMesh.push_back(Node(elt.x, cptRadius(elt), &sourceElements.back()));
+                cptEltGeom(targetElements[i], Coord(pole[0], pole[1], pole[2]));
+        }
 …
 vector<double> Mapper::computeWeights(int interpOrder, bool renormalize, bool quantity)
+{
+  vector<double> timings;
+  int mpiSize, mpiRank;
+  MPI_Comm_size(communicator, &mpiSize);
+  MPI_Comm_rank(communicator, &mpiRank);
+        vector<double> timings;
+        int mpiSize, mpiRank;
+        MPI_Comm_size(communicator, &mpiSize);
+        MPI_Comm_rank(communicator, &mpiRank);
   this->buildSSTree(sourceMesh, targetMesh);
+  if (mpiRank == 0 && verbose) cout << "Computing intersections ..." << endl;
+  double tic = cputime();
+  computeIntersection(&targetElements[0], targetElements.size());
+  timings.push_back(cputime() - tic);
+  tic = cputime();
+  if (interpOrder == 2)
+  {
+    if (mpiRank == 0 && verbose) cout << "Computing grads ..." << endl;
+    buildMeshTopology();
+    computeGrads();
+  }
+  timings.push_back(cputime() - tic);
+  /* Prepare computation of weights */
+  /* compute number of intersections which for the first order case
+     corresponds to the number of edges in the remap matrix */
+  int nIntersections = 0;
+  for (int j = 0; j < targetElements.size(); j++)
+  {
+    Elt &elt = targetElements[j];
+    for (list<Polyg*>::iterator it = elt.is.begin(); it != elt.is.end(); it++)
+      nIntersections++;
+  }
+  /* overallocate for NMAX neighbours for each elements */
+  remapMatrix = new double[nIntersections*NMAX];
+  srcAddress = new int[nIntersections*NMAX];
+  srcRank = new int[nIntersections*NMAX];
+  dstAddress = new int[nIntersections*NMAX];
+        if (mpiRank == 0 && verbose) cout << "Computing intersections ..." << endl;
+        double tic = cputime();
+        computeIntersection(&targetElements[0], targetElements.size());
+        timings.push_back(cputime() - tic);
+        tic = cputime();
+        if (interpOrder == 2) {
+                if (mpiRank == 0 && verbose) cout << "Computing grads ..." << endl;
+                buildMeshTopology();
+                computeGrads();
+        }
+        timings.push_back(cputime() - tic);
+        /* Prepare computation of weights */
+        /* compute number of intersections which for the first order case
+           corresponds to the number of edges in the remap matrix */
+        int nIntersections = 0;
+        for (int j = 0; j < targetElements.size(); j++)
+        {
+                Elt &elt = targetElements[j];
+                for (list<Polyg*>::iterator it = elt.is.begin(); it != elt.is.end(); it++)
+                        nIntersections++;
+        }
+        /* overallocate for NMAX neighbours for each elements */
+        remapMatrix = new double[nIntersections*NMAX];
+        srcAddress = new int[nIntersections*NMAX];
+        srcRank = new int[nIntersections*NMAX];
+        dstAddress = new int[nIntersections*NMAX];
   sourceWeightId =new long[nIntersections*NMAX];
   targetWeightId =new long[nIntersections*NMAX];
   if (mpiRank == 0 && verbose) cout << "Remapping..." << endl;
   tic = cputime();
   nWeights = remap(&targetElements[0], targetElements.size(), interpOrder, renormalize, quantity);
   timings.push_back(cputime() - tic);
+        if (mpiRank == 0 && verbose) cout << "Remapping..." << endl;
+        tic = cputime();
+        nWeights = remap(&targetElements[0], targetElements.size(), interpOrder, renormalize, quantity);
+        timings.push_back(cputime() - tic);
   for (int i = 0; i < targetElements.size(); i++) targetElements[i].delete_intersections();
   return timings;
+        return timings;
+}
 …
 int Mapper::remap(Elt *elements, int nbElements, int order, bool renormalize, bool quantity)
+{
+  int mpiSize, mpiRank;
+  MPI_Comm_size(communicator, &mpiSize);
+  MPI_Comm_rank(communicator, &mpiRank);
+  /* create list of intersections (super mesh elements) for each rank */
+  multimap<int, Polyg *> *elementList = new multimap<int, Polyg *>[mpiSize];
+  for (int j = 0; j < nbElements; j++)
+  {
+    Elt& e = elements[j];
+    for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+    {
+      elementList[(*it)->id.rank].insert(pair<int, Polyg *>((*it)->id.ind, *it));
+      //std::cout<<"elementList["<<(*it)->id.rank<<"].size = "<< elementList[(*it)->id.rank].size()<<std::endl;
+    }
+  }
+  int *nbSendElement = new int[mpiSize];
+  int **sendElement = new int*[mpiSize]; /* indices of elements required from other rank */
+  double **recvValue = new double*[mpiSize];
+  double **recvArea = new double*[mpiSize];
+  Coord **recvGrad = new Coord*[mpiSize];
+  GloId **recvNeighIds = new GloId*[mpiSize]; /* ids of the of the source neighbours which also contribute through gradient */
+  for (int rank = 0; rank < mpiSize; rank++)
+  {
+    /* get size for allocation */
+    int last = -1; /* compares unequal to any index */
+    int index = -1; /* increased to starting index 0 in first iteration */
+    for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+    {
+      if (last != it->first)
+        index++;
+      (it->second)->id.ind = index;
+      last = it->first;
+    }
+    nbSendElement[rank] = index + 1;
+    /* if size is non-zero allocate and collect indices of elements on other ranks that we intersect */
+    if (nbSendElement[rank] > 0)
+    {
+      sendElement[rank] = new int[nbSendElement[rank]];
+      recvValue[rank]   = new double[nbSendElement[rank]];
+      recvArea[rank]    = new double[nbSendElement[rank]];
+      if (order == 2)
+      {
+        recvNeighIds[rank] = new GloId[nbSendElement[rank]*(NMAX+1)];
+        recvGrad[rank]    = new Coord[nbSendElement[rank]*(NMAX+1)];
+      }
+      else
+        recvNeighIds[rank] = new GloId[nbSendElement[rank]];
+      last = -1;
+      index = -1;
+      for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+      {
+        if (last != it->first)
+          index++;
+        sendElement[rank][index] = it->first;
+        last = it->first;
+      }
+    }
+  }
+  /* communicate sizes of source elements to be sent (index lists and later values and gradients) */
+  int *nbRecvElement = new int[mpiSize];
+  MPI_Alltoall(nbSendElement, 1, MPI_INT, nbRecvElement, 1, MPI_INT, communicator);
+  /* communicate indices of source elements on other ranks whoes value and gradient we need (since intersection) */
+  int nbSendRequest = 0;
+  int nbRecvRequest = 0;
+  int **recvElement = new int*[mpiSize];
+  double **sendValue = new double*[mpiSize];
+  double **sendArea = new double*[mpiSize];
+  Coord **sendGrad = new Coord*[mpiSize];
+  GloId **sendNeighIds = new GloId*[mpiSize];
+  MPI_Request *sendRequest = new MPI_Request[4*mpiSize];
+  MPI_Request *recvRequest = new MPI_Request[4*mpiSize];
+  for (int rank = 0; rank < mpiSize; rank++)
+  {
+    if (nbSendElement[rank] > 0)
+    {
+      MPI_Issend(sendElement[rank], nbSendElement[rank], MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
+      nbSendRequest++;
+    }
+    if (nbRecvElement[rank] > 0)
+    {
+      recvElement[rank] = new int[nbRecvElement[rank]];
+      sendValue[rank]   = new double[nbRecvElement[rank]];
+      sendArea[rank]   = new double[nbRecvElement[rank]];
+      if (order == 2)
+      {
+        sendNeighIds[rank] = new GloId[nbRecvElement[rank]*(NMAX+1)];
+        sendGrad[rank]    = new Coord[nbRecvElement[rank]*(NMAX+1)];
+      }
+      else
+      {
+        sendNeighIds[rank] = new GloId[nbRecvElement[rank]];
+      }
+      MPI_Irecv(recvElement[rank], nbRecvElement[rank], MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+      nbRecvRequest++;
+    }
+  }
+  MPI_Status *status = new MPI_Status[4*mpiSize];
+  MPI_Waitall(nbSendRequest, sendRequest, status);
+  MPI_Waitall(nbRecvRequest, recvRequest, status);
+  /* for all indices that have been received from requesting ranks: pack values and gradients, then send */
+  nbSendRequest = 0;
+  nbRecvRequest = 0;
+  for (int rank = 0; rank < mpiSize; rank++)
+  {
+    if (nbRecvElement[rank] > 0)
+    {
+      int jj = 0; // jj == j if no weight writing
+      for (int j = 0; j < nbRecvElement[rank]; j++)
+      {
+        sendValue[rank][j] = sstree.localElements[recvElement[rank][j]].val;
+        sendArea[rank][j] = sstree.localElements[recvElement[rank][j]].area;
+        if (order == 2)
+        {
+          sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].grad;
+          sendNeighIds[rank][jj] = sstree.localElements[recvElement[rank][j]].src_id;
+          jj++;
+          for (int i = 0; i < NMAX; i++)
+          {
+            sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].gradNeigh[i];
+        int mpiSize, mpiRank;
+        MPI_Comm_size(communicator, &mpiSize);
+        MPI_Comm_rank(communicator, &mpiRank);
+        /* create list of intersections (super mesh elements) for each rank */
+        multimap<int, Polyg *> *elementList = new multimap<int, Polyg *>[mpiSize];
+        for (int j = 0; j < nbElements; j++)
+        {
+                Elt& e = elements[j];
+                for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+                        elementList[(*it)->id.rank].insert(pair<int, Polyg *>((*it)->id.ind, *it));
+        }
+        int *nbSendElement = new int[mpiSize];
+        int **sendElement = new int*[mpiSize]; /* indices of elements required from other rank */
+        double **recvValue = new double*[mpiSize];
+        double **recvArea = new double*[mpiSize];
+        Coord **recvGrad = new Coord*[mpiSize];
+        GloId **recvNeighIds = new GloId*[mpiSize]; /* ids of the of the source neighbours which also contribute through gradient */
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                /* get size for allocation */
+                int last = -1; /* compares unequal to any index */
+                int index = -1; /* increased to starting index 0 in first iteration */
+                for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+                {
+                        if (last != it->first)
+                                index++;
+                        (it->second)->id.ind = index;
+                        last = it->first;
+                }
+                nbSendElement[rank] = index + 1;
+                /* if size is non-zero allocate and collect indices of elements on other ranks that we intersect */
+                if (nbSendElement[rank] > 0)
+                {
+                        sendElement[rank] = new int[nbSendElement[rank]];
+                        recvValue[rank]   = new double[nbSendElement[rank]];
+                        recvArea[rank]    = new double[nbSendElement[rank]];
+                        if (order == 2)
+                        {
+                                recvNeighIds[rank] = new GloId[nbSendElement[rank]*(NMAX+1)];
+                                recvGrad[rank]    = new Coord[nbSendElement[rank]*(NMAX+1)];
+                        }
+                        else
+                                recvNeighIds[rank] = new GloId[nbSendElement[rank]];
+                        last = -1;
+                        index = -1;
+                        for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+                        {
+                                if (last != it->first)
+                                        index++;
+                                sendElement[rank][index] = it->first;
+                                last = it->first;
+                        }
+                }
+        }
+        /* communicate sizes of source elements to be sent (index lists and later values and gradients) */
+        int *nbRecvElement = new int[mpiSize];
+        MPI_Alltoall(nbSendElement, 1, MPI_INT, nbRecvElement, 1, MPI_INT, communicator);
+        /* communicate indices of source elements on other ranks whoes value and gradient we need (since intersection) */
+        int nbSendRequest = 0;
+        int nbRecvRequest = 0;
+        int **recvElement = new int*[mpiSize];
+        double **sendValue = new double*[mpiSize];
+        double **sendArea = new double*[mpiSize];
+        Coord **sendGrad = new Coord*[mpiSize];
+        GloId **sendNeighIds = new GloId*[mpiSize];
+        MPI_Request *sendRequest = new MPI_Request[4*mpiSize];
+        MPI_Request *recvRequest = new MPI_Request[4*mpiSize];
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbSendElement[rank] > 0)
+                {
+                        MPI_Issend(sendElement[rank], nbSendElement[rank], MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                }
+                if (nbRecvElement[rank] > 0)
+                {
+                        recvElement[rank] = new int[nbRecvElement[rank]];
+                        sendValue[rank]   = new double[nbRecvElement[rank]];
+                        sendArea[rank]   = new double[nbRecvElement[rank]];
+                        if (order == 2)
+                        {
+                                sendNeighIds[rank] = new GloId[nbRecvElement[rank]*(NMAX+1)];
+                                sendGrad[rank]    = new Coord[nbRecvElement[rank]*(NMAX+1)];
+                        }
+                        else
+                        {
+                                sendNeighIds[rank] = new GloId[nbRecvElement[rank]];
+                        }
+                        MPI_Irecv(recvElement[rank], nbRecvElement[rank], MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                }
+        }
+        MPI_Status *status = new MPI_Status[4*mpiSize];
+        MPI_Waitall(nbSendRequest, sendRequest, status);
+        MPI_Waitall(nbRecvRequest, recvRequest, status);
+        /* for all indices that have been received from requesting ranks: pack values and gradients, then send */
+        nbSendRequest = 0;
+        nbRecvRequest = 0;
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbRecvElement[rank] > 0)
+                {
+                        int jj = 0; // jj == j if no weight writing
+                        for (int j = 0; j < nbRecvElement[rank]; j++)
+                        {
+                                sendValue[rank][j] = sstree.localElements[recvElement[rank][j]].val;
+                                sendArea[rank][j] = sstree.localElements[recvElement[rank][j]].area;
+                                if (order == 2)
+                                {
+                                        sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].grad;
+//          cout<<"grad  "<<jj<<"  "<<recvElement[rank][j]<<"  "<<sendGrad[rank][jj]<<" "<<sstree.localElements[recvElement[rank][j]].grad<<endl ;
+                                        sendNeighIds[rank][jj] = sstree.localElements[recvElement[rank][j]].src_id;
+                                        jj++;
+                                        for (int i = 0; i < NMAX; i++)
+                                        {
+                                                sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].gradNeigh[i];
+//            cout<<"grad  "<<jj<<"  "<<sendGrad[rank][jj]<<" "<<sstree.localElements[recvElement[rank][j]].grad<<endl ;
             sendNeighIds[rank][jj] = sstree.localElements[recvElement[rank][j]].neighId[i];
+            jj++;
+          }
+        }
+        else
+          sendNeighIds[rank][j] = sstree.localElements[recvElement[rank][j]].src_id;
+      }
+      MPI_Issend(sendValue[rank],  nbRecvElement[rank], MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+      nbSendRequest++;
+      MPI_Issend(sendArea[rank],  nbRecvElement[rank], MPI_DOUBLE, rank, 1, communicator, &sendRequest[nbSendRequest]);
+      nbSendRequest++;
+      if (order == 2)
+      {
+        MPI_Issend(sendGrad[rank], 3*nbRecvElement[rank]*(NMAX+1), MPI_DOUBLE, rank, 2, communicator, &sendRequest[nbSendRequest]);
+        nbSendRequest++;
+        MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank]*(NMAX+1), MPI_INT, rank, 3, communicator, &sendRequest[nbSendRequest]);
+                                                jj++;
+                                        }
+                                }
+                                else
+                                        sendNeighIds[rank][j] = sstree.localElements[recvElement[rank][j]].src_id;
+                        }
+                        MPI_Issend(sendValue[rank],  nbRecvElement[rank], MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                        MPI_Issend(sendArea[rank],  nbRecvElement[rank], MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                        if (order == 2)
+                        {
+                                MPI_Issend(sendGrad[rank], 3*nbRecvElement[rank]*(NMAX+1),
+                                                                MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                                nbSendRequest++;
+                                MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank]*(NMAX+1), MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
 //ym  --> attention taille GloId
         nbSendRequest++;
+      }
       else
+      {
         MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank], MPI_INT, rank, 4, communicator, &sendRequest[nbSendRequest]);
+                                nbSendRequest++;
+                        }
+                        else
+                        {
+                                MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank], MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
 //ym  --> attention taille GloId
+        nbSendRequest++;
+      }
+    }
+    if (nbSendElement[rank] > 0)
+    {
+      MPI_Irecv(recvValue[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+      nbRecvRequest++;
+      MPI_Irecv(recvArea[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 1, communicator, &recvRequest[nbRecvRequest]);
+      nbRecvRequest++;
+      if (order == 2)
+      {
+        MPI_Irecv(recvGrad[rank], 3*nbSendElement[rank]*(NMAX+1), MPI_DOUBLE, rank, 2, communicator, &recvRequest[nbRecvRequest]);
+        nbRecvRequest++;
+        MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank]*(NMAX+1), MPI_INT, rank, 3, communicator, &recvRequest[nbRecvRequest]);
+                                nbSendRequest++;
+                        }
+                }
+                if (nbSendElement[rank] > 0)
+                {
+                        MPI_Irecv(recvValue[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                        MPI_Irecv(recvArea[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                        if (order == 2)
+                        {
+                                MPI_Irecv(recvGrad[rank], 3*nbSendElement[rank]*(NMAX+1),
+                                                MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                                nbRecvRequest++;
+                                MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank]*(NMAX+1), MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
 //ym  --> attention taille GloId
         nbRecvRequest++;
+      }
       else
+      {
         MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank], MPI_INT, rank, 4, communicator, &recvRequest[nbRecvRequest]);
+                                nbRecvRequest++;
+                        }
+                        else
+                        {
+                                MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank], MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
 //ym  --> attention taille GloId
         nbRecvRequest++;
+      }
+    }
+  }
+                                nbRecvRequest++;
+                        }
+                }
+        }
   MPI_Waitall(nbSendRequest, sendRequest, status);
   MPI_Waitall(nbRecvRequest, recvRequest, status);
+        MPI_Waitall(nbSendRequest, sendRequest, status);
+        MPI_Waitall(nbRecvRequest, recvRequest, status);
   /* now that all values and gradients are available use them to computed interpolated values on target
     and also to compute weights */
   int i = 0;
   for (int j = 0; j < nbElements; j++)
+  {
     Elt& e = elements[j];
     /* since for the 2nd order case source grid elements can contribute to a destination grid element over several "paths"
     (step1: gradient is computed using neighbours on same grid, step2: intersection uses several elements on other grid)
     accumulate them so that there is only one final weight between two elements */
     map<GloId,double> wgt_map;
     /* for destination element `e` loop over all intersetions/the corresponding source elements */
     for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+    {
       /* it is the intersection element, so it->x and it->area are barycentre and area of intersection element (super mesh)
       but it->id is id of the source element that it intersects */
       int n1 = (*it)->id.ind;
       int rank = (*it)->id.rank;
       double fk = recvValue[rank][n1];
       double srcArea = recvArea[rank][n1];
       double w = (*it)->area;
+        /* now that all values and gradients are available use them to computed interpolated values on target
+           and also to compute weights */
+        int i = 0;
+        for (int j = 0; j < nbElements; j++)
+        {
+                Elt& e = elements[j];
+                /* since for the 2nd order case source grid elements can contribute to a destination grid element over several "paths"
+                   (step1: gradient is computed using neighbours on same grid, step2: intersection uses several elements on other grid)
+                   accumulate them so that there is only one final weight between two elements */
+                map<GloId,double> wgt_map;
+                /* for destination element `e` loop over all intersetions/the corresponding source elements */
+                for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+                {
+                        /* it is the intersection element, so it->x and it->area are barycentre and area of intersection element (super mesh)
+                        but it->id is id of the source element that it intersects */
+                        int n1 = (*it)->id.ind;
+                        int rank = (*it)->id.rank;
+                        double fk = recvValue[rank][n1];
+                        double srcArea = recvArea[rank][n1];
+                        double w = (*it)->area;
       if (quantity) w/=srcArea ;
       /* first order: src value times weight (weight = supermesh area), later divide by target area */
       int kk = (order == 2) ? n1 * (NMAX + 1) : n1;
       GloId neighID = recvNeighIds[rank][kk];
       wgt_map[neighID] += w;
       if (order == 2)
+      {
         for (int k = 0; k < NMAX+1; k++)
+        {
           int kk = n1 * (NMAX + 1) + k;
           GloId neighID = recvNeighIds[rank][kk];
           if (neighID.ind != -1)  wgt_map[neighID] += w * scalarprod(recvGrad[rank][kk], (*it)->x);
+        }
+      }
+    }
+                        /* first order: src value times weight (weight = supermesh area), later divide by target area */
+                        int kk = (order == 2) ? n1 * (NMAX + 1) : n1;
+                        GloId neighID = recvNeighIds[rank][kk];
+                        wgt_map[neighID] += w;
+                        if (order == 2)
+                        {
+                                for (int k = 0; k < NMAX+1; k++)
+                                {
+                                        int kk = n1 * (NMAX + 1) + k;
+                                        GloId neighID = recvNeighIds[rank][kk];
+                                        if (neighID.ind != -1)  wgt_map[neighID] += w * scalarprod(recvGrad[rank][kk], (*it)->x);
+                                }
+                        }
+                }
     double renorm=0;
 …
     for (map<GloId,double>::iterator it = wgt_map.begin(); it != wgt_map.end(); it++)
+    {
+                {
       if (quantity)  this->remapMatrix[i] = (it->second ) / renorm;
       else this->remapMatrix[i] = (it->second / e.area) / renorm;
       this->srcAddress[i] = it->first.ind;
       this->srcRank[i] = it->first.rank;
       this->dstAddress[i] = j;
+                        else this->remapMatrix[i] = (it->second / e.area) / renorm;
+                        this->srcAddress[i] = it->first.ind;
+                        this->srcRank[i] = it->first.rank;
+                        this->dstAddress[i] = j;
       this->sourceWeightId[i]= it->first.globalId ;
       this->targetWeightId[i]= targetGlobalId[j] ;
       i++;
+    }
+  }
   /* free all memory allocated in this function */
   for (int rank = 0; rank < mpiSize; rank++)
+  {
     if (nbSendElement[rank] > 0)
+    {
       delete[] sendElement[rank];
       delete[] recvValue[rank];
       delete[] recvArea[rank];
       if (order == 2)
+      {
         delete[] recvGrad[rank];
+      }
       delete[] recvNeighIds[rank];
+    }
     if (nbRecvElement[rank] > 0)
+    {
       delete[] recvElement[rank];
       delete[] sendValue[rank];
       delete[] sendArea[rank];
       if (order == 2)
         delete[] sendGrad[rank];
       delete[] sendNeighIds[rank];
+    }
+  }
   delete[] status;
   delete[] sendRequest;
   delete[] recvRequest;
   delete[] elementList;
   delete[] nbSendElement;
   delete[] nbRecvElement;
   delete[] sendElement;
   delete[] recvElement;
   delete[] sendValue;
   delete[] recvValue;
   delete[] sendGrad;
   delete[] recvGrad;
   delete[] sendNeighIds;
   delete[] recvNeighIds;
   return i;
+                        i++;
+                }
+        }
+        /* free all memory allocated in this function */
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbSendElement[rank] > 0)
+                {
+                        delete[] sendElement[rank];
+                        delete[] recvValue[rank];
+                        delete[] recvArea[rank];
+                        if (order == 2)
+                        {
+                                delete[] recvGrad[rank];
+                        }
+                        delete[] recvNeighIds[rank];
+                }
+                if (nbRecvElement[rank] > 0)
+                {
+                        delete[] recvElement[rank];
+                        delete[] sendValue[rank];
+                        delete[] sendArea[rank];
+                        if (order == 2)
+                                delete[] sendGrad[rank];
+                        delete[] sendNeighIds[rank];
+                }
+        }
+        delete[] status;
+        delete[] sendRequest;
+        delete[] recvRequest;
+        delete[] elementList;
+        delete[] nbSendElement;
+        delete[] nbRecvElement;
+        delete[] sendElement;
+        delete[] recvElement;
+        delete[] sendValue;
+        delete[] recvValue;
+        delete[] sendGrad;
+        delete[] recvGrad;
+        delete[] sendNeighIds;
+        delete[] recvNeighIds;
+        return i;
+}
 …
         mpiRoute.init(routes);
         int nRecv = mpiRoute.getTotalSourceElement();
+// cout << mpiRank << " NRECV " << nRecv << "(" << routes.size() << ")"<< endl;
         int *nbSendNode = new int[mpiSize];
 …
+        }
   MPI_Waitall(nbRecvRequest, recvRequest, status);
   MPI_Waitall(nbSendRequest, sendRequest, status);
+        MPI_Waitall(nbRecvRequest, recvRequest, status);
+        MPI_Waitall(nbSendRequest, sendRequest, status);
         for (int rank = 0; rank < mpiSize; rank++)
 …
+                }
+/*
+                for (int i = 0; i < elt->n; i++)
+                {
+                        if (elt->neighbour[i] == NOT_FOUND)
+                                error_exit("neighbour not found");
+                }
+*/
+        }
+}
 …
         MPI_Waitall(nbRecvRequest, recvRequest, status);
         MPI_Waitall(nbSendRequest, sendRequest, status);
         char **sendBuffer2 = new char*[mpiSize];
         char **recvBuffer2 = new char*[mpiSize];
 …
                                         intersect_ym(&recvElt[j], elt2);
+                                }
                                 if (recvElt[j].is.size() > 0) sentMessageSize[rank] += packIntersectionSize(recvElt[j]);
 …
+                        }
                         delete [] recvElt;
+                }
+        }
 …
+        }
   MPI_Waitall(nbRecvRequest, recvRequest, status);
   MPI_Waitall(nbSendRequest, sendRequest, status);
+        MPI_Waitall(nbRecvRequest, recvRequest, status);
+        MPI_Waitall(nbSendRequest, sendRequest, status);
         delete [] sendRequest;

XIOS/dev/branch_openmp/extern/remap/src/node.cpp

r1328	r1460
281	281	}
282	282
	283
283	284	return q;
284	285	}

XIOS/dev/branch_openmp/extern/remap/src/parallel_tree.cpp

-                      r1355
+                      r1460
         delete[] displs;
+        /* unpack */
+/*
+        randomArray.resize(nrecv);
+        randomizeArray(randomArray);
+        tree.leafs.resize(nrecv);
+        index = 0;
+  for (int i = 0; i < nrecv; i++)
+        {
+                Coord x = *(Coord *)(&recvBuffer[index]);
+                index += sizeof(Coord)/sizeof(*recvBuffer);
+                double radius = recvBuffer[index++];
+                tree.leafs[randomArray[i]].centre = x;
+                tree.leafs[randomArray[i]].radius = radius;
+        }
+*/
   randomArray.resize(blocSize);
 …
     cerr << comm.rank << ": PROBLEM: (node assign)" << tree.levelSize[assignLevel] << " != " << comm.group_size << " (keepNodes)"
          << "   node size : "<<node.size()<<"   bloc size : "<<blocSize<<"  total number of leaf : "<<tree.leafs.size()<<endl ;
+/*
+        MPI_Allreduce(&ok, &allok, 1, MPI_INT, MPI_PROD, communicator);
+        if (!allok) {
+                MPI_Finalize();
+                exit(1);
+        }
+*/
     MPI_Abort(MPI_COMM_WORLD,-1) ;
+  }
 …
   nb=nb1+nb2 ;
   MPI_Allreduce(&nb, &nbTot, 1, MPI_LONG, MPI_SUM, communicator) ;
   int commSize ;
   MPI_Comm_size(communicator,&commSize) ;
 …
         randomizeArray(randomArray2);
+/*
+        int s1,s2 ;
+        if (node.size()< nbSampleNodes/2)
+        {
+          s1 = node.size() ;
+          s2 = nbSampleNodes-s1 ;
+        }
+        else if (node2.size()< nbSampleNodes/2)
+        {
+          s2 = node.size() ;
+          s1 = nbSampleNodes-s2 ;
+        }
+        else
+        {
+          s1=nbSampleNodes/2 ;
+          s2=nbSampleNodes/2 ;
+        }
+*/
         for (int i = 0; i <nbSampleNodes1; i++) sampleNodes.push_back(Node(node[randomArray1[i%nb1]].centre,  node[randomArray1[i%nb1]].radius, NULL));
         for (int i = 0; i <nbSampleNodes2; i++) sampleNodes.push_back(Node(node2[randomArray2[i%nb2]].centre, node2[randomArray2[i%nb2]].radius, NULL));
+/*
+        for (int i = 0; i < nbSampleNodes/2; i++)
+        {
+          sampleNodes.push_back(Node(node[randomArray1[i]].centre,  node[randomArray1[i]].radius, NULL));
+          sampleNodes.push_back(Node(node2[randomArray2[i]].centre, node2[randomArray2[i]].radius, NULL));
+        }
+*/
         CTimer::get("buildParallelSampleTree").resume();
         //sampleTree.buildParallelSampleTree(sampleNodes, cascade);

XIOS/dev/branch_openmp/extern/remap/src/parallel_tree.hpp

r1328	r1460
6	6	#include "mpi_cascade.hpp"
7	7	#include "mpi.hpp"
	8
8	9	namespace sphereRemap {
9	10

XIOS/dev/branch_openmp/extern/src_ep_dev/ep_declaration.cpp

-                      r1373
+                      r1460
 ::MPI_Datatype MPI_UNSIGNED_LONG_STD = MPI_UNSIGNED_LONG;
 ::MPI_Datatype MPI_UNSIGNED_CHAR_STD = MPI_UNSIGNED_CHAR;
+::MPI_Datatype MPI_UINT64_T_STD = MPI_UINT64_T;
 #undef MPI_INT
 …
 #undef MPI_UNSIGNED_LONG
 #undef MPI_UNSIGNED_CHAR
+#undef MPI_UINT64_T
 …
 ::MPI_Op MPI_MAX_STD = MPI_MAX;
 ::MPI_Op MPI_MIN_STD = MPI_MIN;
+::MPI_Op MPI_LOR_STD = MPI_LOR;
 #undef MPI_SUM
 #undef MPI_MAX
 #undef MPI_MIN
+/*#undef MPI_INT
+#undef MPI_FLOAT
+#undef MPI_DOUBLE
+#undef MPI_CHAR
+#undef MPI_LONG
+#undef MPI_UNSIGNED_LONG
+#undef MPI_UNSIGNED_CHAR
+#undef MPI_SUM
+#undef MPI_MAX
+#undef MPI_MIN
+#undef MPI_COMM_WORLD
+#undef MPI_COMM_NULL
+#undef MPI_STATUS_IGNORE
+#undef MPI_REQUEST_NULL
+#undef MPI_INFO_NULL
+*/
+#undef MPI_LOR
 …
 extern ::MPI_Datatype MPI_UNSIGNED_LONG_STD;
 extern ::MPI_Datatype MPI_UNSIGNED_CHAR_STD;
+extern ::MPI_Datatype MPI_UINT64_T_STD;
 extern ::MPI_Op MPI_SUM_STD;
 extern ::MPI_Op MPI_MAX_STD;
 extern ::MPI_Op MPI_MIN_STD;
+extern ::MPI_Op MPI_LOR_STD;
 extern ::MPI_Comm MPI_COMM_WORLD_STD;
 …
 ep_lib::MPI_Datatype MPI_UNSIGNED_LONG = &MPI_UNSIGNED_LONG_STD;
 ep_lib::MPI_Datatype MPI_UNSIGNED_CHAR = &MPI_UNSIGNED_CHAR_STD;
+ep_lib::MPI_Datatype MPI_UINT64_T = &MPI_UINT64_T_STD;
 ep_lib::MPI_Op MPI_SUM = &MPI_SUM_STD;
 ep_lib::MPI_Op MPI_MAX = &MPI_MAX_STD;
 ep_lib::MPI_Op MPI_MIN = &MPI_MIN_STD;
+ep_lib::MPI_Op MPI_LOR = &MPI_LOR_STD;
 ep_lib::MPI_Comm MPI_COMM_WORLD(&MPI_COMM_WORLD_STD);

XIOS/dev/branch_openmp/extern/src_ep_dev/ep_declaration.hpp

-                      r1369
+                      r1460
 #undef MPI_UNSIGNED_LONG
 #undef MPI_UNSIGNED_CHAR
+#undef MPI_UINT64_T
 #undef MPI_SUM
 #undef MPI_MAX
 #undef MPI_MIN
+#undef MPI_LOR
 #undef MPI_COMM_WORLD
 …
 extern ep_lib::MPI_Datatype MPI_UNSIGNED_LONG;
 extern ep_lib::MPI_Datatype MPI_UNSIGNED_CHAR;
+extern ep_lib::MPI_Datatype MPI_UINT64_T;
 extern ep_lib::MPI_Op MPI_SUM;
 extern ep_lib::MPI_Op MPI_MAX;
 extern ep_lib::MPI_Op MPI_MIN;
+extern ep_lib::MPI_Op MPI_LOR;
 extern ep_lib::MPI_Comm MPI_COMM_WORLD;

XIOS/dev/branch_openmp/extern/src_ep_dev/ep_reduce.cpp

-                      r1365
+                      r1460
   template<typename T>
+  T lor_op(T a, T b)
+  {
+    return a||b;
+  }
+  template<typename T>
   void reduce_max(const T * buffer, T* recvbuf, int count)
+  {
 …
+  {
     transform(buffer, buffer+count, recvbuf, recvbuf, std::plus<T>());
+  }
+  template<typename T>
+  void reduce_lor(const T * buffer, T* recvbuf, int count)
+  {
+    transform(buffer, buffer+count, recvbuf, recvbuf, lor_op<T>);
+  }
 …
+        }
+        else if(datatype == MPI_UINT64_T)
+        {
+          assert(datasize == sizeof(uint64_t));
+          for(int i=1; i<num_ep; i++)
+            reduce_max<uint64_t>(static_cast<uint64_t*>(comm.my_buffer->void_buffer[i]), static_cast<uint64_t*>(recvbuf), count);
+        }
         else printf("datatype Error\n");
 …
+        }
+        else if(datatype == MPI_UINT64_T)
+        {
+          assert(datasize == sizeof(uint64_t));
+          for(int i=1; i<num_ep; i++)
+            reduce_min<uint64_t>(static_cast<uint64_t*>(comm.my_buffer->void_buffer[i]), static_cast<uint64_t*>(recvbuf), count);
+        }
         else printf("datatype Error\n");
 …
+        }
+        else if(datatype ==MPI_UINT64_T)
+        {
+          assert(datasize == sizeof(uint64_t));
+          for(int i=1; i<num_ep; i++)
+            reduce_sum<uint64_t>(static_cast<uint64_t*>(comm.my_buffer->void_buffer[i]), static_cast<uint64_t*>(recvbuf), count);
+        }
         else printf("datatype Error\n");
+      }
+      if(op == MPI_SUM)
+      {
+        if(datatype != MPI_INT)
+          printf("datatype Error, must be MPI_INT\n");
+        else
+        {
+          assert(datasize == sizeof(int));
+          for(int i=1; i<num_ep; i++)
+            reduce_lor<int>(static_cast<int*>(comm.my_buffer->void_buffer[i]), static_cast<int*>(recvbuf), count);
+        }
+      }
+    }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 1460 for XIOS/dev/branch_openmp/extern

Legend:

Download in other formats: