Changeset 1338

XIOS/dev/branch_openmp/extern/remap/src/mapper.cpp

-                      r1335
+                      r1338
   timings.push_back(cputime() - tic);
+        tic = cputime();
+        if (interpOrder == 2) {
+                if (mpiRank == 0 && verbose) cout << "Computing grads ..." << endl;
+                buildMeshTopology();
+                computeGrads();
+        }
+        timings.push_back(cputime() - tic);
+        /* Prepare computation of weights */
+        /* compute number of intersections which for the first order case
+           corresponds to the number of edges in the remap matrix */
+        int nIntersections = 0;
+        for (int j = 0; j < targetElements.size(); j++)
+        {
+                Elt &elt = targetElements[j];
+                for (list<Polyg*>::iterator it = elt.is.begin(); it != elt.is.end(); it++)
+                        nIntersections++;
+        }
+        /* overallocate for NMAX neighbours for each elements */
+        remapMatrix = new double[nIntersections*NMAX];
+        srcAddress = new int[nIntersections*NMAX];
+        srcRank = new int[nIntersections*NMAX];
+        dstAddress = new int[nIntersections*NMAX];
+  tic = cputime();
+  if (interpOrder == 2)
+  {
+    if (mpiRank == 0 && verbose) cout << "Computing grads ..." << endl;
+    buildMeshTopology();
+    computeGrads();
+  }
+  timings.push_back(cputime() - tic);
+  /* Prepare computation of weights */
+  /* compute number of intersections which for the first order case
+     corresponds to the number of edges in the remap matrix */
+  int nIntersections = 0;
+  for (int j = 0; j < targetElements.size(); j++)
+  {
+    Elt &elt = targetElements[j];
+    for (list<Polyg*>::iterator it = elt.is.begin(); it != elt.is.end(); it++)
+      nIntersections++;
+  }
+  /* overallocate for NMAX neighbours for each elements */
+  remapMatrix = new double[nIntersections*NMAX];
+  srcAddress = new int[nIntersections*NMAX];
+  srcRank = new int[nIntersections*NMAX];
+  dstAddress = new int[nIntersections*NMAX];
   sourceWeightId =new long[nIntersections*NMAX];
   targetWeightId =new long[nIntersections*NMAX];
         if (mpiRank == 0 && verbose) cout << "Remapping..." << endl;
         tic = cputime();
         nWeights = remap(&targetElements[0], targetElements.size(), interpOrder, renormalize, quantity);
         timings.push_back(cputime() - tic);
+  if (mpiRank == 0 && verbose) cout << "Remapping..." << endl;
+  tic = cputime();
+  nWeights = remap(&targetElements[0], targetElements.size(), interpOrder, renormalize, quantity);
+  timings.push_back(cputime() - tic);
   for (int i = 0; i < targetElements.size(); i++) targetElements[i].delete_intersections();
         return timings;
+  return timings;
+}
 …
 int Mapper::remap(Elt *elements, int nbElements, int order, bool renormalize, bool quantity)
+{
+        int mpiSize, mpiRank;
+        MPI_Comm_size(communicator, &mpiSize);
+        MPI_Comm_rank(communicator, &mpiRank);
+        /* create list of intersections (super mesh elements) for each rank */
+        multimap<int, Polyg *> *elementList = new multimap<int, Polyg *>[mpiSize];
+        for (int j = 0; j < nbElements; j++)
+        {
+                Elt& e = elements[j];
+                for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+                        elementList[(*it)->id.rank].insert(pair<int, Polyg *>((*it)->id.ind, *it));
+        }
+        int *nbSendElement = new int[mpiSize];
+        int **sendElement = new int*[mpiSize]; /* indices of elements required from other rank */
+        double **recvValue = new double*[mpiSize];
+        double **recvArea = new double*[mpiSize];
+        Coord **recvGrad = new Coord*[mpiSize];
+        GloId **recvNeighIds = new GloId*[mpiSize]; /* ids of the of the source neighbours which also contribute through gradient */
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                /* get size for allocation */
+                int last = -1; /* compares unequal to any index */
+                int index = -1; /* increased to starting index 0 in first iteration */
+                for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+                {
+                        if (last != it->first)
+                                index++;
+                        (it->second)->id.ind = index;
+                        last = it->first;
+                }
+                nbSendElement[rank] = index + 1;
+                /* if size is non-zero allocate and collect indices of elements on other ranks that we intersect */
+                if (nbSendElement[rank] > 0)
+                {
+                        sendElement[rank] = new int[nbSendElement[rank]];
+                        recvValue[rank]   = new double[nbSendElement[rank]];
+                        recvArea[rank]    = new double[nbSendElement[rank]];
+                        if (order == 2)
+                        {
+                                recvNeighIds[rank] = new GloId[nbSendElement[rank]*(NMAX+1)];
+                                recvGrad[rank]    = new Coord[nbSendElement[rank]*(NMAX+1)];
+                        }
+                        else
+                                recvNeighIds[rank] = new GloId[nbSendElement[rank]];
+                        last = -1;
+                        index = -1;
+                        for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+                        {
+                                if (last != it->first)
+                                        index++;
+                                sendElement[rank][index] = it->first;
+                                last = it->first;
+                        }
+                }
+        }
+        /* communicate sizes of source elements to be sent (index lists and later values and gradients) */
+        int *nbRecvElement = new int[mpiSize];
+        MPI_Alltoall(nbSendElement, 1, MPI_INT, nbRecvElement, 1, MPI_INT, communicator);
+        /* communicate indices of source elements on other ranks whoes value and gradient we need (since intersection) */
+        int nbSendRequest = 0;
+        int nbRecvRequest = 0;
+        int **recvElement = new int*[mpiSize];
+        double **sendValue = new double*[mpiSize];
+        double **sendArea = new double*[mpiSize];
+        Coord **sendGrad = new Coord*[mpiSize];
+        GloId **sendNeighIds = new GloId*[mpiSize];
+        MPI_Request *sendRequest = new MPI_Request[4*mpiSize];
+        MPI_Request *recvRequest = new MPI_Request[4*mpiSize];
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbSendElement[rank] > 0)
+                {
+                        MPI_Issend(sendElement[rank], nbSendElement[rank], MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                }
+                if (nbRecvElement[rank] > 0)
+                {
+                        recvElement[rank] = new int[nbRecvElement[rank]];
+                        sendValue[rank]   = new double[nbRecvElement[rank]];
+                        sendArea[rank]   = new double[nbRecvElement[rank]];
+                        if (order == 2)
+                        {
+                                sendNeighIds[rank] = new GloId[nbRecvElement[rank]*(NMAX+1)];
+                                sendGrad[rank]    = new Coord[nbRecvElement[rank]*(NMAX+1)];
+                        }
+                        else
+                        {
+                                sendNeighIds[rank] = new GloId[nbRecvElement[rank]];
+                        }
+                        MPI_Irecv(recvElement[rank], nbRecvElement[rank], MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                }
+        }
+  int mpiSize, mpiRank;
+  MPI_Comm_size(communicator, &mpiSize);
+  MPI_Comm_rank(communicator, &mpiRank);
+  /* create list of intersections (super mesh elements) for each rank */
+  multimap<int, Polyg *> *elementList = new multimap<int, Polyg *>[mpiSize];
+  for (int j = 0; j < nbElements; j++)
+  {
+    Elt& e = elements[j];
+    for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+    {
+      elementList[(*it)->id.rank].insert(pair<int, Polyg *>((*it)->id.ind, *it));
+      //std::cout<<"elementList["<<(*it)->id.rank<<"].size = "<< elementList[(*it)->id.rank].size()<<std::endl;
+    }
+  }
+  int *nbSendElement = new int[mpiSize];
+  int **sendElement = new int*[mpiSize]; /* indices of elements required from other rank */
+  double **recvValue = new double*[mpiSize];
+  double **recvArea = new double*[mpiSize];
+  Coord **recvGrad = new Coord*[mpiSize];
+  GloId **recvNeighIds = new GloId*[mpiSize]; /* ids of the of the source neighbours which also contribute through gradient */
+  for (int rank = 0; rank < mpiSize; rank++)
+  {
+    /* get size for allocation */
+    int last = -1; /* compares unequal to any index */
+    int index = -1; /* increased to starting index 0 in first iteration */
+    for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+    {
+      if (last != it->first)
+        index++;
+      (it->second)->id.ind = index;
+      last = it->first;
+    }
+    nbSendElement[rank] = index + 1;
+    /* if size is non-zero allocate and collect indices of elements on other ranks that we intersect */
+    if (nbSendElement[rank] > 0)
+    {
+      sendElement[rank] = new int[nbSendElement[rank]];
+      recvValue[rank]   = new double[nbSendElement[rank]];
+      recvArea[rank]    = new double[nbSendElement[rank]];
+      if (order == 2)
+      {
+        recvNeighIds[rank] = new GloId[nbSendElement[rank]*(NMAX+1)];
+        recvGrad[rank]    = new Coord[nbSendElement[rank]*(NMAX+1)];
+      }
+      else
+        recvNeighIds[rank] = new GloId[nbSendElement[rank]];
+      last = -1;
+      index = -1;
+      for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+      {
+        if (last != it->first)
+          index++;
+        sendElement[rank][index] = it->first;
+        last = it->first;
+      }
+    }
+  }
+  /* communicate sizes of source elements to be sent (index lists and later values and gradients) */
+  int *nbRecvElement = new int[mpiSize];
+  MPI_Alltoall(nbSendElement, 1, MPI_INT, nbRecvElement, 1, MPI_INT, communicator);
+  /* communicate indices of source elements on other ranks whoes value and gradient we need (since intersection) */
+  int nbSendRequest = 0;
+  int nbRecvRequest = 0;
+  int **recvElement = new int*[mpiSize];
+  double **sendValue = new double*[mpiSize];
+  double **sendArea = new double*[mpiSize];
+  Coord **sendGrad = new Coord*[mpiSize];
+  GloId **sendNeighIds = new GloId*[mpiSize];
+  MPI_Request *sendRequest = new MPI_Request[4*mpiSize];
+  MPI_Request *recvRequest = new MPI_Request[4*mpiSize];
+  for (int rank = 0; rank < mpiSize; rank++)
+  {
+    if (nbSendElement[rank] > 0)
+    {
+      MPI_Issend(sendElement[rank], nbSendElement[rank], MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
+      nbSendRequest++;
+    }
+    if (nbRecvElement[rank] > 0)
+    {
+      recvElement[rank] = new int[nbRecvElement[rank]];
+      sendValue[rank]   = new double[nbRecvElement[rank]];
+      sendArea[rank]   = new double[nbRecvElement[rank]];
+      if (order == 2)
+      {
+        sendNeighIds[rank] = new GloId[nbRecvElement[rank]*(NMAX+1)];
+        sendGrad[rank]    = new Coord[nbRecvElement[rank]*(NMAX+1)];
+      }
+      else
+      {
+        sendNeighIds[rank] = new GloId[nbRecvElement[rank]];
+      }
+      MPI_Irecv(recvElement[rank], nbRecvElement[rank], MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+      nbRecvRequest++;
+    }
+  }
   MPI_Status *status = new MPI_Status[4*mpiSize];
 …
   MPI_Waitall(nbRecvRequest, recvRequest, status);
         /* for all indices that have been received from requesting ranks: pack values and gradients, then send */
         nbSendRequest = 0;
         nbRecvRequest = 0;
         for (int rank = 0; rank < mpiSize; rank++)
+        {
                 if (nbRecvElement[rank] > 0)
+                {
                         int jj = 0; // jj == j if no weight writing
                         for (int j = 0; j < nbRecvElement[rank]; j++)
+                        {
                                 sendValue[rank][j] = sstree.localElements[recvElement[rank][j]].val;
                                 sendArea[rank][j] = sstree.localElements[recvElement[rank][j]].area;
                                 if (order == 2)
+                                {
                                         sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].grad;
                                         sendNeighIds[rank][jj] = sstree.localElements[recvElement[rank][j]].src_id;
                                         jj++;
                                         for (int i = 0; i < NMAX; i++)
+                                        {
                                                 sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].gradNeigh[i];
+  /* for all indices that have been received from requesting ranks: pack values and gradients, then send */
+  nbSendRequest = 0;
+  nbRecvRequest = 0;
+  for (int rank = 0; rank < mpiSize; rank++)
+  {
+    if (nbRecvElement[rank] > 0)
+    {
+      int jj = 0; // jj == j if no weight writing
+      for (int j = 0; j < nbRecvElement[rank]; j++)
+      {
+        sendValue[rank][j] = sstree.localElements[recvElement[rank][j]].val;
+        sendArea[rank][j] = sstree.localElements[recvElement[rank][j]].area;
+        if (order == 2)
+        {
+          sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].grad;
+          sendNeighIds[rank][jj] = sstree.localElements[recvElement[rank][j]].src_id;
+          jj++;
+          for (int i = 0; i < NMAX; i++)
+          {
+            sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].gradNeigh[i];
             sendNeighIds[rank][jj] = sstree.localElements[recvElement[rank][j]].neighId[i];
+                                                jj++;
+                                        }
+                                }
+                                else
+                                        sendNeighIds[rank][j] = sstree.localElements[recvElement[rank][j]].src_id;
+                        }
+                        MPI_Issend(sendValue[rank],  nbRecvElement[rank], MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                        MPI_Issend(sendArea[rank],  nbRecvElement[rank], MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                        if (order == 2)
+                        {
+                                MPI_Issend(sendGrad[rank], 3*nbRecvElement[rank]*(NMAX+1),
+                                                                MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                                nbSendRequest++;
+                                MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank]*(NMAX+1), MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
+            jj++;
+          }
+        }
+        else
+          sendNeighIds[rank][j] = sstree.localElements[recvElement[rank][j]].src_id;
+      }
+      MPI_Issend(sendValue[rank],  nbRecvElement[rank], MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+      nbSendRequest++;
+      MPI_Issend(sendArea[rank],  nbRecvElement[rank], MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+      nbSendRequest++;
+      if (order == 2)
+      {
+        MPI_Issend(sendGrad[rank], 3*nbRecvElement[rank]*(NMAX+1), MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+        nbSendRequest++;
+        MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank]*(NMAX+1), MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
 //ym  --> attention taille GloId
                                 nbSendRequest++;
+                        }
                         else
+                        {
                                 MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank], MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
+        nbSendRequest++;
+      }
+      else
+      {
+        MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank], MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
 //ym  --> attention taille GloId
+                                nbSendRequest++;
+                        }
+                }
+                if (nbSendElement[rank] > 0)
+                {
+                        MPI_Irecv(recvValue[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                        MPI_Irecv(recvArea[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                        if (order == 2)
+                        {
+                                MPI_Irecv(recvGrad[rank], 3*nbSendElement[rank]*(NMAX+1),
+                                                MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                                nbRecvRequest++;
+                                MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank]*(NMAX+1), MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+        nbSendRequest++;
+      }
+    }
+    if (nbSendElement[rank] > 0)
+    {
+      MPI_Irecv(recvValue[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+      nbRecvRequest++;
+      MPI_Irecv(recvArea[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+      nbRecvRequest++;
+      if (order == 2)
+      {
+        MPI_Irecv(recvGrad[rank], 3*nbSendElement[rank]*(NMAX+1), MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+        nbRecvRequest++;
+        MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank]*(NMAX+1), MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
 //ym  --> attention taille GloId
                                 nbRecvRequest++;
+                        }
                         else
+                        {
                                 MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank], MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+        nbRecvRequest++;
+      }
+      else
+      {
+        MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank], MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
 //ym  --> attention taille GloId
                                 nbRecvRequest++;
+                        }
+                }
+        }
+        nbRecvRequest++;
+      }
+    }
+  }
         MPI_Waitall(nbSendRequest, sendRequest, status);
         MPI_Waitall(nbRecvRequest, recvRequest, status);
+  MPI_Waitall(nbSendRequest, sendRequest, status);
+  MPI_Waitall(nbRecvRequest, recvRequest, status);
         /* now that all values and gradients are available use them to computed interpolated values on target
            and also to compute weights */
         int i = 0;
         for (int j = 0; j < nbElements; j++)
+        {
                 Elt& e = elements[j];
                 /* since for the 2nd order case source grid elements can contribute to a destination grid element over several "paths"
                    (step1: gradient is computed using neighbours on same grid, step2: intersection uses several elements on other grid)
                    accumulate them so that there is only one final weight between two elements */
                 map<GloId,double> wgt_map;
                 /* for destination element `e` loop over all intersetions/the corresponding source elements */
                 for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+                {
                         /* it is the intersection element, so it->x and it->area are barycentre and area of intersection element (super mesh)
                         but it->id is id of the source element that it intersects */
                         int n1 = (*it)->id.ind;
                         int rank = (*it)->id.rank;
                         double fk = recvValue[rank][n1];
                         double srcArea = recvArea[rank][n1];
                         double w = (*it)->area;
+  /* now that all values and gradients are available use them to computed interpolated values on target
+    and also to compute weights */
+  int i = 0;
+  for (int j = 0; j < nbElements; j++)
+  {
+    Elt& e = elements[j];
+    /* since for the 2nd order case source grid elements can contribute to a destination grid element over several "paths"
+    (step1: gradient is computed using neighbours on same grid, step2: intersection uses several elements on other grid)
+    accumulate them so that there is only one final weight between two elements */
+    map<GloId,double> wgt_map;
+    /* for destination element `e` loop over all intersetions/the corresponding source elements */
+    for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+    {
+      /* it is the intersection element, so it->x and it->area are barycentre and area of intersection element (super mesh)
+      but it->id is id of the source element that it intersects */
+      int n1 = (*it)->id.ind;
+      int rank = (*it)->id.rank;
+      double fk = recvValue[rank][n1];
+      double srcArea = recvArea[rank][n1];
+      double w = (*it)->area;
       if (quantity) w/=srcArea ;
                         /* first order: src value times weight (weight = supermesh area), later divide by target area */
                         int kk = (order == 2) ? n1 * (NMAX + 1) : n1;
                         GloId neighID = recvNeighIds[rank][kk];
                         wgt_map[neighID] += w;
                         if (order == 2)
+                        {
                                 for (int k = 0; k < NMAX+1; k++)
+                                {
                                         int kk = n1 * (NMAX + 1) + k;
                                         GloId neighID = recvNeighIds[rank][kk];
                                         if (neighID.ind != -1)  wgt_map[neighID] += w * scalarprod(recvGrad[rank][kk], (*it)->x);
+                                }
+                        }
+                }
+      /* first order: src value times weight (weight = supermesh area), later divide by target area */
+      int kk = (order == 2) ? n1 * (NMAX + 1) : n1;
+      GloId neighID = recvNeighIds[rank][kk];
+      wgt_map[neighID] += w;
+      if (order == 2)
+      {
+        for (int k = 0; k < NMAX+1; k++)
+        {
+          int kk = n1 * (NMAX + 1) + k;
+          GloId neighID = recvNeighIds[rank][kk];
+          if (neighID.ind != -1)  wgt_map[neighID] += w * scalarprod(recvGrad[rank][kk], (*it)->x);
+        }
+      }
+    }
     double renorm=0;
 …
     for (map<GloId,double>::iterator it = wgt_map.begin(); it != wgt_map.end(); it++)
+                {
+    {
       if (quantity)  this->remapMatrix[i] = (it->second ) / renorm;
                         else this->remapMatrix[i] = (it->second / e.area) / renorm;
                         this->srcAddress[i] = it->first.ind;
                         this->srcRank[i] = it->first.rank;
                         this->dstAddress[i] = j;
+      else this->remapMatrix[i] = (it->second / e.area) / renorm;
+      this->srcAddress[i] = it->first.ind;
+      this->srcRank[i] = it->first.rank;
+      this->dstAddress[i] = j;
       this->sourceWeightId[i]= it->first.globalId ;
       this->targetWeightId[i]= targetGlobalId[j] ;
                         i++;
+                }
+        }
         /* free all memory allocated in this function */
         for (int rank = 0; rank < mpiSize; rank++)
+        {
                 if (nbSendElement[rank] > 0)
+                {
                         delete[] sendElement[rank];
                         delete[] recvValue[rank];
                         delete[] recvArea[rank];
                         if (order == 2)
+                        {
                                 delete[] recvGrad[rank];
+                        }
                         delete[] recvNeighIds[rank];
+                }
                 if (nbRecvElement[rank] > 0)
+                {
                         delete[] recvElement[rank];
                         delete[] sendValue[rank];
                         delete[] sendArea[rank];
                         if (order == 2)
                                 delete[] sendGrad[rank];
                         delete[] sendNeighIds[rank];
+                }
+        }
         delete[] status;
         delete[] sendRequest;
         delete[] recvRequest;
         delete[] elementList;
         delete[] nbSendElement;
         delete[] nbRecvElement;
         delete[] sendElement;
         delete[] recvElement;
         delete[] sendValue;
         delete[] recvValue;
         delete[] sendGrad;
         delete[] recvGrad;
         delete[] sendNeighIds;
+        delete[] recvNeighIds;
         return i;
+      i++;
+    }
+  }
+  /* free all memory allocated in this function */
+ /* for (int rank = 0; rank < mpiSize; rank++)
+  {
+    if (nbSendElement[rank] > 0)
+    {
+      delete[] sendElement[rank];
+      delete[] recvValue[rank];
+      delete[] recvArea[rank];
+      if (order == 2)
+      {
+        delete[] recvGrad[rank];
+      }
+      delete[] recvNeighIds[rank];
+    }
+    if (nbRecvElement[rank] > 0)
+    {
+      delete[] recvElement[rank];
+      delete[] sendValue[rank];
+      delete[] sendArea[rank];
+      if (order == 2)
+        delete[] sendGrad[rank];
+      delete[] sendNeighIds[rank];
+    }
+  }
+  delete[] status;
+  delete[] sendRequest;
+  delete[] recvRequest;
+  delete[] elementList;
+  delete[] nbSendElement;
+  delete[] nbRecvElement;
+  delete[] sendElement;
+  delete[] recvElement;
+  delete[] sendValue;
+  delete[] recvValue;
+  delete[] sendGrad;
+  delete[] recvGrad;
+  delete[] sendNeighIds;
+  delete[] recvNeighIds;*/
+  return i;
+}

XIOS/dev/branch_openmp/extern/src_ep_dev/ep_split.cpp

r1295	r1338
99	99	MPI_Gather_local(&key, 1, MPI_INT, key_loc.data(), 0, comm);
100	100	MPI_Bcast_local(key_loc.data(), num_ep, MPI_INT, 0, comm);
	101
	102	std::sort(key_loc.begin(), key_loc.end());
101	103
102	104	for(int i=0; i<num_ep; i++)

XIOS/dev/branch_openmp/inputs/REMAP/iodef.xml

-                      r1334
+                      r1338
           <field field_ref="dst_field_2D_regular_pole" name="field_dst_regular_2" enabled=".TRUE."/>
           <field field_ref="dst_field_2D_clone" name="field_dst_regular_3" detect_missing_value=".false." default_value="100000" enabled=".FALSE."/>
           <field field_ref="dst_field_2D_extract" name="field_dst_regular_4" enabled=".TRUE."/>
+          <field field_ref="dst_field_2D_extract" name="field_dst_regular_4" enabled=".FALSE."/>
         </file>
        <file id="output_3D" name="output_3D" enabled=".TRUE.">
 …
        <file id="output_4D" name="output_4D" enabled=".TRUE.">
           <field field_ref="src_field_4D" name="field_4D" />
           <field field_ref="dst_field_4D_extract" name="field_4D_extract" enabled=".TRUE."/>
+          <field field_ref="dst_field_4D_extract" name="field_4D_extract" enabled=".FALSE."/>
         </file>
      </file_group>

XIOS/dev/branch_openmp/src/buffer_client.cpp

r1328	r1338
30	30	buffer[1] = new char[bufferSize];
31	31	retBuffer = new CBufferOut(buffer[current], bufferSize);
32		info(10) << "CClientBuffer: allocated 2 x " << bufferSize << " bytes for server " << serverRank << " with a maximum of " << maxBufferedEvents << " buffered events" << endl;
	32	//info(10) << "CClientBuffer: allocated 2 x " << bufferSize << " bytes for server " << serverRank << " with a maximum of " << maxBufferedEvents << " buffered events" << endl;
33	33	}
34	34

XIOS/dev/branch_openmp/src/client.cpp

-                      r1334
+                      r1338
+      }
       info(20) << "Client side context is finalized"<<endl ;
       report(0) <<" Performance report : Whole time from XIOS init and finalize: "<< CTimer::get("XIOS init/finalize").getCumulatedTime()<<" s"<<endl ;
       report(0) <<" Performance report : total time spent for XIOS : "<< CTimer::get("XIOS").getCumulatedTime()<<" s"<<endl ;
       report(0)<< " Performance report : time spent for waiting free buffer : "<< CTimer::get("Blocking time").getCumulatedTime()<<" s"<<endl ;
       report(0)<< " Performance report : Ratio : "<< CTimer::get("Blocking time").getCumulatedTime()/CTimer::get("XIOS init/finalize").getCumulatedTime()*100.<<" %"<<endl ;
       report(0)<< " Performance report : This ratio must be close to zero. Otherwise it may be usefull to increase buffer size or numbers of server"<<endl ;
+      //info(20) << "Client side context is finalized"<<endl ;
+      //report(0) <<" Performance report : Whole time from XIOS init and finalize: "<< CTimer::get("XIOS init/finalize").getCumulatedTime()<<" s"<<endl ;
+      //report(0) <<" Performance report : total time spent for XIOS : "<< CTimer::get("XIOS").getCumulatedTime()<<" s"<<endl ;
+      //report(0)<< " Performance report : time spent for waiting free buffer : "<< CTimer::get("Blocking time").getCumulatedTime()<<" s"<<endl ;
+      //report(0)<< " Performance report : Ratio : "<< CTimer::get("Blocking time").getCumulatedTime()/CTimer::get("XIOS init/finalize").getCumulatedTime()*100.<<" %"<<endl ;
+      //report(0)<< " Performance report : This ratio must be close to zero. Otherwise it may be usefull to increase buffer size or numbers of server"<<endl ;
 //      report(0)<< " Memory report : Current buffer_size : "<<CXios::bufferSize<<endl ;
       report(0)<< " Memory report : Minimum buffer size required : " << CClientBuffer::maxRequestSize << " bytes" << endl ;
       report(0)<< " Memory report : increasing it by a factor will increase performance, depending of the volume of data wrote in file at each time step of the file"<<endl ;
       report(100)<<CTimer::getAllCumulatedTime()<<endl ;
+      //report(0)<< " Memory report : Minimum buffer size required : " << CClientBuffer::maxRequestSize << " bytes" << endl ;
+      //report(0)<< " Memory report : increasing it by a factor will increase performance, depending of the volume of data wrote in file at each time step of the file"<<endl ;
+      //report(100)<<CTimer::getAllCumulatedTime()<<endl ;
+   }

XIOS/dev/branch_openmp/src/context_client.cpp

-                      r1328
+                      r1338
      for (itMap = itbMap; itMap != iteMap; ++itMap)
+     {
        report(10) << " Memory report : Context <" << context->getId() << "> : client side : memory used for buffer of each connection to server" << endl
                   << "  +) To server with rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
+       //report(10) << " Memory report : Context <" << context->getId() << "> : client side : memory used for buffer of each connection to server" << endl
+       //           << "  +) To server with rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
        totalBuf += itMap->second;
+     }
      report(0) << " Memory report : Context <" << context->getId() << "> : client side : total memory used for buffer " << totalBuf << " bytes" << endl;
+     //report(0) << " Memory report : Context <" << context->getId() << "> : client side : total memory used for buffer " << totalBuf << " bytes" << endl;
      releaseBuffers();

XIOS/dev/branch_openmp/src/context_server.cpp

-                      r1328
+                      r1338
+    {
       finished=true;
       info(20)<<"Server Side context <"<<context->getId()<<"> finalized"<<endl;
+      //info(20)<<"Server Side context <"<<context->getId()<<"> finalized"<<endl;
       std::map<int, StdSize>::const_iterator itbMap = mapBufferSize_.begin(),
                                              iteMap = mapBufferSize_.end(), itMap;
 …
       for (itMap = itbMap; itMap != iteMap; ++itMap)
+      {
         report(10)<< " Memory report : Context <"<<context->getId()<<"> : server side : memory used for buffer of each connection to client" << endl
                   << "  +) With client of rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
+        //report(10)<< " Memory report : Context <"<<context->getId()<<"> : server side : memory used for buffer of each connection to client" << endl
+        //          << "  +) With client of rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
         totalBuf += itMap->second;
+      }
       context->finalize();
       report(0)<< " Memory report : Context <"<<context->getId()<<"> : server side : total memory used for buffer "<<totalBuf<<" bytes"<<endl;
+      //report(0)<< " Memory report : Context <"<<context->getId()<<"> : server side : total memory used for buffer "<<totalBuf<<" bytes"<<endl;
+    }
     else if (event.classId==CContext::GetType()) CContext::dispatchEvent(event);

XIOS/dev/branch_openmp/src/io/nc4_data_input.cpp

-                      r1328
+                      r1338
     CArray<double,1> fieldData(grid->getWrittenDataSize());
     if (!field->default_value.isEmpty()) fieldData = field->default_value;
+    #ifdef _usingEP
+      SuperClass::type = ONE_FILE;
+      printf("SuperClass::type = %d\n", SuperClass::type);
+    #endif
     switch (SuperClass::type)
+    {

XIOS/dev/branch_openmp/src/io/netCdfInterface.cpp

-                      r1334
+                      r1338
 int CNetCdfInterface::close(int ncId)
+{
+  int status = nc_close(ncId);
+  if (NC_NOERR != status)
+  {
+    StdString errormsg(nc_strerror(status));
+    StdStringStream sstr;
+    sstr << "Error when calling function nc_close(ncId)" << std::endl
+         << errormsg << std::endl
+         << "Unable to close file, given its id: " << ncId << std::endl;
+    StdString e = sstr.str();
+    throw CNetCdfException(e);
+  }
+  int status = NC_NOERR;
+  #pragma omp master
+  {
+    status = nc_close(ncId);
+    if (NC_NOERR != status)
+    {
+      StdString errormsg(nc_strerror(status));
+      StdStringStream sstr;
+      sstr << "Error when calling function nc_close(ncId)" << std::endl
+           << errormsg << std::endl
+           << "Unable to close file, given its id: " << ncId << std::endl;
+      StdString e = sstr.str();
+      throw CNetCdfException(e);
+    }
+  }
   return status;
+}

XIOS/dev/branch_openmp/src/node/context.cpp

-                      r1334
+                      r1338
    void CContext::updateCalendar(int step)
+   {
       info(50) << "updateCalendar : before : " << calendar->getCurrentDate() << endl;
+      //info(50) << "updateCalendar : before : " << calendar->getCurrentDate() << endl;
       calendar->update(step);
       info(50) << "updateCalendar : after : " << calendar->getCurrentDate() << endl;
+      //info(50) << "updateCalendar : after : " << calendar->getCurrentDate() << endl;
 #ifdef XIOS_MEMTRACK_LIGHT
       info(50) << " Current memory used by XIOS : "<<  MemTrack::getCurrentMemorySize()*1.0/(1024*1024)<<" Mbyte, at timestep "<<step<<" of context "<<this->getId()<<endl ;

XIOS/dev/branch_openmp/src/node/field.cpp

-                      r1328
+                      r1338
     while (currentDate >= lastDataRequestedFromServer)
+    {
       info(20) << "currentDate : " << currentDate << endl ;
       info(20) << "lastDataRequestedFromServer : " << lastDataRequestedFromServer << endl ;
       info(20) << "file->output_freq.getValue() : " << file->output_freq.getValue() << endl ;
       info(20) << "lastDataRequestedFromServer + file->output_freq.getValue() : " << lastDataRequestedFromServer + file->output_freq << endl ;
+      //info(20) << "currentDate : " << currentDate << endl ;
+      //info(20) << "lastDataRequestedFromServer : " << lastDataRequestedFromServer << endl ;
+      //info(20) << "file->output_freq.getValue() : " << file->output_freq.getValue() << endl ;
+      //info(20) << "lastDataRequestedFromServer + file->output_freq.getValue() : " << lastDataRequestedFromServer + file->output_freq << endl ;
       dataRequested |= sendReadDataRequest(lastDataRequestedFromServer + file->output_freq);
 …
       while (isDataLate && timer.getCumulatedTime() < CXios::recvFieldTimeout);
       if (isDataLate)
         ERROR("void CField::checkForLateDataFromServer(void)",
               << "Late data at timestep = " << currentDate);
+      //if (isDataLate)
+      //  ERROR("void CField::checkForLateDataFromServer(void)",
+      //        << "Late data at timestep = " << currentDate);
+    }
+  }

XIOS/dev/branch_openmp/src/node/file.cpp

-                      r1334
+                      r1338
       bool isCollective = par_access.isEmpty() || par_access == par_access_attr::collective;
+      if (isOpen) data_out->closeFile();
+      if (time_counter_name.isEmpty()) data_in = boost::shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), fileComm, multifile, isCollective));
+      else data_in = boost::shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), fileComm, multifile, isCollective, time_counter_name));
+      isOpen = true;
+      #ifdef _usingEP
+        //printf("multifile was %d\n", multifile);
+        //multifile = true;
+        if (isOpen) data_out->closeFile();
+        if (time_counter_name.isEmpty()) data_in = boost::shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), fileComm, multifile, isCollective));
+        else data_in = boost::shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), fileComm, multifile, isCollective, time_counter_name));
+        isOpen = true;
+      #elif _usingMPI
+        if (isOpen) data_out->closeFile();
+        if (time_counter_name.isEmpty()) data_in = boost::shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), fileComm, multifile, isCollective));
+        else data_in = boost::shared_ptr<CDataInput>(new CNc4DataInput(oss.str(), fileComm, multifile, isCollective, time_counter_name));
+        isOpen = true;
+      #endif
+    }
+  }
 …
      // Now everything is ok, close it
      close();
+     //close();
+   }

XIOS/dev/branch_openmp/src/test/test_remap_omp.f90

-                      r1334
+                      r1338
   ALLOCATE(lval1(interpolatedLlm))
   ALLOCATE(lval2(llm2))
+  lval2 = 0
+  lval=0
+  lval1=0
   ierr=NF90_INQ_VARID(ncid,"lon",varid)

XIOS/dev/branch_openmp/src/transformation/domain_algorithm_interpolate.cpp

-                      r1334
+                      r1338
   CContext* context = CContext::getCurrent();
   CContextClient* client=context->client;
+  int mykey;
+  ep_lib::MPI_Comm_rank(client->intraComm, &mykey);
   ep_lib::MPI_Comm poleComme;
+  ep_lib::MPI_Comm_split(client->intraComm, interMapValuePole.empty() ? 0 : 1, 0, &poleComme);
+  //ep_lib::MPI_Comm_split(client->intraComm, interMapValuePole.empty() ? 0 : 1, 0, &poleComme);
+  ep_lib::MPI_Comm_split(client->intraComm, interMapValuePole.empty() ? 0 : 1, mykey, &poleComme);
   if (!poleComme.is_null())
+  {
 …
     std::vector<int> displ(nbClientPole,0);
     ep_lib::MPI_Allgather(&nbWeight,1,MPI_INT,&recvCount[0],1,MPI_INT,poleComme) ;
     displ[0]=0;
     for(int n=1;n<nbClientPole;++n) displ[n]=displ[n-1]+recvCount[n-1] ;

Context Navigation

Legend:

XIOS/dev/branch_openmp/extern/remap/src/mapper.cpp

XIOS/dev/branch_openmp/extern/src_ep_dev/ep_split.cpp

XIOS/dev/branch_openmp/inputs/REMAP/iodef.xml

XIOS/dev/branch_openmp/src/buffer_client.cpp

XIOS/dev/branch_openmp/src/client.cpp

XIOS/dev/branch_openmp/src/context_client.cpp

XIOS/dev/branch_openmp/src/context_server.cpp

XIOS/dev/branch_openmp/src/io/nc4_data_input.cpp

XIOS/dev/branch_openmp/src/io/netCdfInterface.cpp

XIOS/dev/branch_openmp/src/node/context.cpp

XIOS/dev/branch_openmp/src/node/field.cpp

XIOS/dev/branch_openmp/src/node/file.cpp

XIOS/dev/branch_openmp/src/test/test_remap_omp.f90

XIOS/dev/branch_openmp/src/transformation/domain_algorithm_interpolate.cpp

Download in other formats: