Context Navigation

← Previous Change
Next Change →

extern

Timestamp:

11/15/17 12:14:34 (7 years ago)

Author:

yushan

Message:

dev_omp

Location:

XIOS/dev/branch_openmp/extern

Files:

: 28 edited

remap/src/clipper.cpp (modified) (1 diff)
remap/src/cputime.cpp (modified) (1 diff)
remap/src/elt.hpp (modified) (2 diffs)
remap/src/gridRemap.cpp (modified) (1 diff)
remap/src/gridRemap.hpp (modified) (1 diff)
remap/src/intersect.cpp (modified) (8 diffs)
remap/src/intersection_ym.cpp (modified) (1 diff)
remap/src/libmapper.cpp (modified) (5 diffs)
remap/src/mapper.cpp (modified) (4 diffs)
remap/src/mapper.hpp (modified) (2 diffs)
remap/src/mpi_cascade.cpp (modified) (1 diff)
remap/src/mpi_cascade.hpp (modified) (2 diffs)
remap/src/mpi_routing.cpp (modified) (4 diffs)
remap/src/mpi_routing.hpp (modified) (3 diffs)
remap/src/node.cpp (modified) (1 diff)
remap/src/node.hpp (modified) (3 diffs)
remap/src/parallel_tree.cpp (modified) (5 diffs)
remap/src/parallel_tree.hpp (modified) (2 diffs)
remap/src/polyg.cpp (modified) (5 diffs)
remap/src/timerRemap.cpp (modified) (3 diffs)
remap/src/timerRemap.hpp (modified) (1 diff)
remap/src/tree.cpp (modified) (3 diffs)
remap/src/tree.hpp (modified) (3 diffs)
src_ep_dev/ep_fortran.cpp (modified) (2 diffs)
src_ep_dev/ep_intercomm.cpp (modified) (1 diff)
src_ep_dev/ep_lib.cpp (modified) (1 diff)
src_ep_dev/ep_send.cpp (modified) (1 diff)
src_ep_dev/ep_type.hpp (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

XIOS/dev/branch_openmp/extern/remap/src/clipper.cpp

-                      r1205
+                      r1328
+{
   //The equation of a line in general form (Ax + By + C = 0)
   //given 2 points (x,y) & (x,y) is ...
   //(y - y)x + (x - x)y + (y - y)x - (x - x)y = 0
   //A = (y - y); B = (x - x); C = (y - y)x - (x - x)y
   //perpendicular distance of point (x,y) = (Ax + By + C)/Sqrt(A + B)
+  //given 2 points (x¹,y¹) & (x²,y²) is ...
+  //(y¹ - y²)x + (x² - x¹)y + (y² - y¹)x¹ - (x² - x¹)y¹ = 0
+  //A = (y¹ - y²); B = (x² - x¹); C = (y² - y¹)x¹ - (x² - x¹)y¹
+  //perpendicular distance of point (x³,y³) = (Ax³ + By³ + C)/Sqrt(A² + B²)
   //see http://en.wikipedia.org/wiki/Perpendicular_distance
   double A = double(ln1.Y - ln2.Y);

XIOS/dev/branch_openmp/extern/remap/src/cputime.cpp

r694	r1328
1	1	#include "mpi.hpp"
	2	using namespace ep_lib;
2	3
3	4	namespace sphereRemap {

XIOS/dev/branch_openmp/extern/remap/src/elt.hpp

-                      r1172
+                      r1328
 struct Elt : Polyg
+{
+  Elt() {}
+  Elt(const double *bounds_lon, const double *bounds_lat, int max_num_vert)
+  {
+    int k = 0;
+    vertex[k++] = xyz(bounds_lon[0], bounds_lat[0]);
+    for (int i = 1; i < max_num_vert; i++)
+    {
+      vertex[k] = xyz(bounds_lon[i], bounds_lat[i]);
+      /* netCDF convention: if first vertex repeats element is finished (at least three vertices == triagle) */
+      if (k >= 3 && squaredist(vertex[k], vertex[0]) < EPS*EPS)
+        break;
+      /* eliminate zero edges: move to next vertex only if it is different */
+      if (squaredist(vertex[k], vertex[k-1]) > EPS*EPS)
+        k++;
+      //else cout << "Removed edge " << k << " due to zero length (coinciding endpoints)." << endl ;
+    }
+    n = k;
+    x = barycentre(vertex, n);
+  }
+        Elt() {}
+        Elt(const double *bounds_lon, const double *bounds_lat, int max_num_vert)
+        {
+                int k = 0;
+                vertex[k++] = xyz(bounds_lon[0], bounds_lat[0]);
+                for (int i = 1; i < max_num_vert; i++)
+                {
+                        vertex[k] = xyz(bounds_lon[i], bounds_lat[i]);
+                        /* netCDF convention: if first vertex repeats element is finished (at least three vertices == triagle) */
+                        if (k >= 3 && squaredist(vertex[k], vertex[0]) < EPS*EPS)
+                                break;
+                        /* eliminate zero edges: move to next vertex only if it is different */
+                        if (squaredist(vertex[k], vertex[k-1]) > EPS*EPS)
+                                k++;
+                        else
+                                /* cout << "Removed edge " << k << " due to zero length (coinciding endpoints)." << endl */ ;
+                }
+                n = k;
+                x = barycentre(vertex, n);
+        }
         Elt& operator=(const Elt& rhs)
 …
+        }
   void delete_intersections()
+  {
     for (list<Polyg*>::iterator it = this->is.begin(); it != this->is.end(); it++)
+    {
       Polyg* poly = *it;
       delete poly;
+    }
+  }
+        void delete_intersections()
+        {
+                for (list<Polyg*>::iterator it = this->is.begin(); it != this->is.end(); it++)
+                {
+                        Polyg* poly = *it;
+                        delete poly;
+                }
+        }
   void insert_vertex(int i, const Coord& v)

XIOS/dev/branch_openmp/extern/remap/src/gridRemap.cpp

-                      r1155
+                      r1328
 CRemapGrid srcGrid;
-#pragma omp threadprivate(srcGrid)
 CRemapGrid tgtGrid;
-#pragma omp threadprivate(tgtGrid)
+}

XIOS/dev/branch_openmp/extern/remap/src/gridRemap.hpp

-                      r1220
+                      r1328
 Coord readPole(std::istream&);
+extern CRemapGrid srcGrid;
+extern CRemapGrid tgtGrid;
+}

XIOS/dev/branch_openmp/extern/remap/src/intersect.cpp

-                      r1220
+                      r1328
 namespace sphereRemap {
-extern CRemapGrid srcGrid;
-#pragma omp threadprivate(srcGrid)
-extern CRemapGrid tgtGrid;
-#pragma omp threadprivate(tgtGrid)
 using namespace std;
 …
 int neighbour_idx(const Elt& a, const Elt& b)
+{
   for (int i = 0; i < a.n; i++)
+  {
     for (int j = 0; j < b.n; j++)
+    {
       assert(squaredist(a.vertex[ i       ], b.vertex[ j       ]) > EPS*EPS ||
              squaredist(a.vertex[(i+1)%a.n], b.vertex[(j+1)%b.n]) > EPS*EPS);
       if (   squaredist(a.vertex[ i       ], b.vertex[ j           ]) < 1e-13*1e-13 &&
              squaredist(a.vertex[(i+1)%a.n], b.vertex[(j+b.n-1)%b.n]) < 1e-13*1e-13)
+      {
         return i;
+      }
+    }
+  }
   return NOT_FOUND;
+        for (int i = 0; i < a.n; i++)
+        {
+                for (int j = 0; j < b.n; j++)
+                {
+                        assert(squaredist(a.vertex[ i       ], b.vertex[ j       ]) > EPS*EPS ||
+                               squaredist(a.vertex[(i+1)%a.n], b.vertex[(j+1)%b.n]) > EPS*EPS);
+                        if (   squaredist(a.vertex[ i       ], b.vertex[ j           ]) < 1e-13*1e-13 &&
+                               squaredist(a.vertex[(i+1)%a.n], b.vertex[(j+b.n-1)%b.n]) < 1e-13*1e-13)
+                        {
+                                return i;
+                        }
+                }
+        }
+        return NOT_FOUND;
+}
 …
 bool isNeighbour(Elt& a, const Elt& b)
+{
   // return neighbour_idx(a, b) != NOT_FOUND;
+        // return neighbour_idx(a, b) != NOT_FOUND;
   return insertNeighbour(a,b,false) ;
+}
 …
 void intersect(Elt *a, Elt *b)
+{
   int na = a->n; /* vertices of a */
   int nb = b->n; /* vertices of b */
   Coord *c   = new Coord[na+nb];
   Coord *c2  = new Coord[na+nb];
   Coord *xc  = new Coord[na+nb];
   Coord *xc2 = new Coord[na+nb];
   Coord gc, gc2;
   double *d = new double[na+nb];
   double *d2 = new double[na+nb];
   double are, are2;
   Ipt ipt[NMAX*NMAX];
   Ipt ipt2[NMAX*NMAX];
   ptsec(a, b, ipt);
   /* make ipt2 transpose of ipt */
   for (int ii = 0; ii < na; ii++)
     for (int jj = 0; jj < nb; jj++)
       ipt2[jj*na+ii] = ipt[ii*nb+jj];
   list<Sgm> iscot;
   recense(a, b, ipt, iscot, 0);
   recense(b, a, ipt2, iscot, 1);
   int nseg = iscot.size();
   int nc = 0;
   int nc2 = 0;
   while (iscot.size() && nc < 2)
     nc = assemble(iscot, c, d, xc);
   while (iscot.size() && nc2 < 2)
     nc2 = assemble(iscot, c2, d2, xc2);
 //  assert(nseg == nc + nc2 || nseg == 1); // unused segment
+        int na = a->n; /* vertices of a */
+        int nb = b->n; /* vertices of b */
+        Coord *c   = new Coord[na+nb];
+        Coord *c2  = new Coord[na+nb];
+        Coord *xc  = new Coord[na+nb];
+        Coord *xc2 = new Coord[na+nb];
+        Coord gc, gc2;
+        double *d = new double[na+nb];
+        double *d2 = new double[na+nb];
+        double are, are2;
+        Ipt ipt[NMAX*NMAX];
+        Ipt ipt2[NMAX*NMAX];
+        ptsec(a, b, ipt);
+        /* make ipt2 transpose of ipt */
+        for (int ii = 0; ii < na; ii++)
+                for (int jj = 0; jj < nb; jj++)
+                        ipt2[jj*na+ii] = ipt[ii*nb+jj];
+        list<Sgm> iscot;
+        recense(a, b, ipt, iscot, 0);
+        recense(b, a, ipt2, iscot, 1);
+        int nseg = iscot.size();
+        int nc = 0;
+        int nc2 = 0;
+        while (iscot.size() && nc < 2)
+                nc = assemble(iscot, c, d, xc);
+        while (iscot.size() && nc2 < 2)
+                nc2 = assemble(iscot, c2, d2, xc2);
+//      assert(nseg == nc + nc2 || nseg == 1); // unused segment
         if (!(nseg == nc + nc2 || nseg == 1))
 …
 //    intersect_ym(a,b) ;
   if (nc == 1) nc = 0;
   if (nc2 == 1) nc2 = 0;
   gc = barycentre(xc, nc);
   gc2 = barycentre(xc2, nc2);
   orient(nc, xc, c, d, gc);
   Coord pole = srcGrid.pole;
   if (pole == ORIGIN) pole = tgtGrid.pole;
   const double MINBASE = 1e-11;
   if (nc == 2) /* nc is the number of vertices of super mesh element */
+  {
     double base = arcdist(xc[0], xc[1]);
+        if (nc == 1) nc = 0;
+        if (nc2 == 1) nc2 = 0;
+        gc = barycentre(xc, nc);
+        gc2 = barycentre(xc2, nc2);
+        orient(nc, xc, c, d, gc);
+        Coord pole = srcGrid.pole;
+        if (pole == ORIGIN) pole = tgtGrid.pole;
+        const double MINBASE = 1e-11;
+        if (nc == 2) /* nc is the number of vertices of super mesh element */
+        {
+                double base = arcdist(xc[0], xc[1]);
 cerr << "DID ARRIVE " << base << xc[0] << xc[1] << endl;
     gc = midpoint(gc, midpointSC(xc[0], xc[1]));
     /* intersection area `are` must be zero here unless we have one great and one small circle */
     are = alun(base, fabs(scalarprod(xc[0], pole)));
+  }
   else
+  {
     are = airbar(nc, xc, c, d, pole, gc);
+  }
   if (nc2 == 2)
+  {
     double base = arcdist(xc2[0], xc2[1]);
+                gc = midpoint(gc, midpointSC(xc[0], xc[1]));
+                /* intersection area `are` must be zero here unless we have one great and one small circle */
+                are = alun(base, fabs(scalarprod(xc[0], pole)));
+        }
+        else
+        {
+                are = airbar(nc, xc, c, d, pole, gc);
+        }
+        if (nc2 == 2)
+        {
+                double base = arcdist(xc2[0], xc2[1]);
 cerr << "DID ARRIVE " << base << xc2[0] << xc2[1] << endl;
     assert(base > MINBASE);
     gc2 = midpoint(gc2, midpointSC(xc2[0], xc2[1]));
     are2 = alun(base, fabs(scalarprod(xc2[0], pole))); // 0
+  }
   else
+  {
     are2 = airbar(nc2, xc2, c2, d2, pole, gc2);
+  }
+                assert(base > MINBASE);
+                gc2 = midpoint(gc2, midpointSC(xc2[0], xc2[1]));
+                are2 = alun(base, fabs(scalarprod(xc2[0], pole))); // 0
+        }
+        else
+        {
+                are2 = airbar(nc2, xc2, c2, d2, pole, gc2);
+        }
 //  double ym_area=intersect_ym(a,b) ;
   if (nc > 1)
+  {
     /* create one super mesh polygon that src and dest point to */
     Polyg *is = new Polyg;
     is->x = gc;
     is->area = are;
     is->id = b->id;
     is->src_id = b->src_id;
     is->n = nc;
     (a->is).push_back(is);
     (b->is).push_back(is);
+        {
+                /* create one super mesh polygon that src and dest point to */
+                Polyg *is = new Polyg;
+                is->x = gc;
+                is->area = are;
+                is->id = b->id;
+                is->src_id = b->src_id;
+                is->n = nc;
+                (a->is).push_back(is);
+                (b->is).push_back(is);
 /*
     if (  2*fabs(are-ym_area)/(are+ym_area) > 1.1 && ym_area>1e-8)
+          if (  2*fabs(are-ym_area)/(are+ym_area) > 1.1 && ym_area>1e-8)
+    {
       cout<<"Big area difference : "<<are<<"  "<<ym_area<<endl ;
 …
+    }
 */
 //    cout<<"intersection : "<<are<<" "<< ym_area<<"  diff : "<<fabs(are-ym_area)<<"  ratio : "<<fabs(are-ym_area)/(0.5*(are+ym_area))<<endl ;
+  }
   if (nc2 > 1)
+  {
     Polyg *is = new Polyg;
     is->x = gc2;
     is->area = are2;
     is->id = b->id; /* intersection holds id of corresponding source element (see Elt class definition for details about id) */
     is->src_id = b->src_id;
     is->n = nc2;
     (a->is).push_back(is);
     (b->is).push_back(is);
+//              cout<<"intersection : "<<are<<" "<< ym_area<<"  diff : "<<fabs(are-ym_area)<<"  ratio : "<<fabs(are-ym_area)/(0.5*(are+ym_area))<<endl ;
+        }
+        if (nc2 > 1)
+        {
+                Polyg *is = new Polyg;
+                is->x = gc2;
+                is->area = are2;
+                is->id = b->id; /* intersection holds id of corresponding source element (see Elt class definition for details about id) */
+                is->src_id = b->src_id;
+                is->n = nc2;
+                (a->is).push_back(is);
+                (b->is).push_back(is);
 /*
     if (  2*fabs(are-ym_area)/(are+ym_area) > 1.1 && ym_area>1e-8 )
+    if (        2*fabs(are-ym_area)/(are+ym_area) > 1.1 && ym_area>1e-8 )
+    {
       cout<<"Big area difference : "<<are<<"  "<<ym_area<<endl ;
 …
+    }
 */
 //    cout<<"intersection : "<<are2<<" "<< ym_area<<"  diff : "<<fabs(are-ym_area)<<"  ratio : "<<fabs(are-ym_area)/(0.5*(are+ym_area))<<endl ;
+  }
+//              cout<<"intersection : "<<are2<<" "<< ym_area<<"  diff : "<<fabs(are-ym_area)<<"  ratio : "<<fabs(are-ym_area)/(0.5*(are+ym_area))<<endl ;
+        }
 /*
   if (nc<=1 && nc2<=1)
 …
+  }
 */
   delete [] c;
   delete [] c2;
   delete [] xc;
   delete [] xc2;
   delete [] d;
   delete [] d2;
+}
+}
+        delete [] c;
+        delete [] c2;
+        delete [] xc;
+        delete [] xc2;
+        delete [] d;
+        delete [] d2;
+}
+}

XIOS/dev/branch_openmp/extern/remap/src/intersection_ym.cpp

-                      r1220
+                      r1328
 namespace sphereRemap {
-extern CRemapGrid srcGrid;
-#pragma omp threadprivate(srcGrid)
-extern CRemapGrid tgtGrid;
-#pragma omp threadprivate(tgtGrid)
 using namespace std;

XIOS/dev/branch_openmp/extern/remap/src/libmapper.cpp

-                      r1205
+                      r1328
 #include "mapper.hpp"
 #include "cputime.hpp" // cputime
+#include <stdio.h>
+using namespace ep_lib;
 using namespace sphereRemap ;
 …
    and deallocated during the second step (computing the weights) */
 Mapper *mapper;
+#pragma omp threadprivate(mapper)
 /** xxx_bounds_yyy is of length n_vert_per_cell_xxx*n_cell_xxx
 …
                      int order, int* n_weights)
+{
   assert(src_bounds_lon);
   assert(src_bounds_lat);
   assert(n_vert_per_cell_src >= 3);
   assert(n_cell_src >= 4);
   assert(dst_bounds_lon);
   assert(dst_bounds_lat);
   assert(n_vert_per_cell_dst >= 3);
   assert(n_cell_dst >= 4);
   assert(1 <= order && order <= 2);
+        assert(src_bounds_lon);
+        assert(src_bounds_lat);
+        assert(n_vert_per_cell_src >= 3);
+        assert(n_cell_src >= 4);
+        assert(dst_bounds_lon);
+        assert(dst_bounds_lat);
+        assert(n_vert_per_cell_dst >= 3);
+        assert(n_cell_dst >= 4);
+        assert(1 <= order && order <= 2);
   mapper = new Mapper(MPI_COMM_WORLD);
 …
         double tic = cputime();
         mapper = new Mapper(MPI_COMM_WORLD);
         mapper->setVerbosity(PROGRESS) ;
+  mapper->setVerbosity(PROGRESS) ;
         mapper->buildSSTree(src_msh, dst_msh);
         double tac = cputime();
 …
         char **argv = NULL;
         MPI_Init(&argc, &argv);*/
+        //MPI_Init(NULL, NULL);
+        int provided;
+        MPI_Init_thread(NULL, NULL, 3, &provided);
+        assert(provided >= 3);
+        MPI_Init(NULL, NULL);
+}

XIOS/dev/branch_openmp/extern/remap/src/mapper.cpp

-                      r1220
+                      r1328
 #include "mapper.hpp"
+using namespace ep_lib;
 namespace sphereRemap {
-extern CRemapGrid srcGrid;
-#pragma omp threadprivate(srcGrid)
-extern CRemapGrid tgtGrid;
-#pragma omp threadprivate(tgtGrid)
 /* A subdivition of an array into N sub-arays
 …
 void Mapper::setTargetMesh(const double* boundsLon, const double* boundsLat, int nVertex, int nbCells, const double* pole, const long int* globalId)
+{
     tgtGrid.pole = Coord(pole[0], pole[1], pole[2]);
     int mpiRank, mpiSize;
     MPI_Comm_rank(communicator, &mpiRank);
     MPI_Comm_size(communicator, &mpiSize);
     targetElements.reserve(nbCells);
     targetMesh.reserve(nbCells);
     targetGlobalId.resize(nbCells) ;
     if (globalId==NULL)
+    {
         long int offset ;
         long int nb=nbCells ;
         MPI_Scan(&nb,&offset,1,MPI_LONG,MPI_SUM,communicator) ;
         offset=offset-nb ;
         for(int i=0;i<nbCells;i++) targetGlobalId[i]=offset+i ;
+    }
     else targetGlobalId.assign(globalId,globalId+nbCells);
     for (int i = 0; i < nbCells; i++)
+    {
         int offs = i*nVertex;
         Elt elt(boundsLon + offs, boundsLat + offs, nVertex);
         targetElements.push_back(elt);
         targetMesh.push_back(Node(elt.x, cptRadius(elt), &sourceElements.back()));
         cptEltGeom(targetElements[i], Coord(pole[0], pole[1], pole[2]));
+    }
+  tgtGrid.pole = Coord(pole[0], pole[1], pole[2]);
+  int mpiRank, mpiSize;
+  MPI_Comm_rank(communicator, &mpiRank);
+  MPI_Comm_size(communicator, &mpiSize);
+  targetElements.reserve(nbCells);
+  targetMesh.reserve(nbCells);
+  targetGlobalId.resize(nbCells) ;
+  if (globalId==NULL)
+  {
+    long int offset ;
+    long int nb=nbCells ;
+    MPI_Scan(&nb,&offset,1,MPI_LONG,MPI_SUM,communicator) ;
+    offset=offset-nb ;
+    for(int i=0;i<nbCells;i++) targetGlobalId[i]=offset+i ;
+  }
+  else targetGlobalId.assign(globalId,globalId+nbCells);
+  for (int i = 0; i < nbCells; i++)
+  {
+    int offs = i*nVertex;
+    Elt elt(boundsLon + offs, boundsLat + offs, nVertex);
+    targetElements.push_back(elt);
+    targetMesh.push_back(Node(elt.x, cptRadius(elt), &sourceElements.back()));
+    cptEltGeom(targetElements[i], Coord(pole[0], pole[1], pole[2]));
+  }
 …
 void Mapper::setSourceValue(const double* val)
+{
     int size=sourceElements.size() ;
     for(int i=0;i<size;++i) sourceElements[i].val=val[i] ;
+  int size=sourceElements.size() ;
+  for(int i=0;i<size;++i) sourceElements[i].val=val[i] ;
+}
 void Mapper::getTargetValue(double* val)
+{
     int size=targetElements.size() ;
     for(int i=0;i<size;++i) val[i]=targetElements[i].val ;
+  int size=targetElements.size() ;
+  for(int i=0;i<size;++i) val[i]=targetElements[i].val ;
+}
 vector<double> Mapper::computeWeights(int interpOrder, bool renormalize, bool quantity)
+{
+    vector<double> timings;
+    int mpiSize, mpiRank;
+    MPI_Comm_size(communicator, &mpiSize);
+    MPI_Comm_rank(communicator, &mpiRank);
+    this->buildSSTree(sourceMesh, targetMesh);
+    if (mpiRank == 0 && verbose) cout << "Computing intersections ..." << endl;
+    double tic = cputime();
+    computeIntersection(&targetElements[0], targetElements.size());
+    timings.push_back(cputime() - tic);
+    tic = cputime();
+    if (interpOrder == 2) {
+        if (mpiRank == 0 && verbose) cout << "Computing grads ..." << endl;
+        buildMeshTopology();
+        computeGrads();
+    }
+    timings.push_back(cputime() - tic);
+    /* Prepare computation of weights */
+    /* compute number of intersections which for the first order case
+       corresponds to the number of edges in the remap matrix */
+    int nIntersections = 0;
+    for (int j = 0; j < targetElements.size(); j++)
+    {
+        Elt &elt = targetElements[j];
+        for (list<Polyg*>::iterator it = elt.is.begin(); it != elt.is.end(); it++)
+            nIntersections++;
+    }
+    /* overallocate for NMAX neighbours for each elements */
+    remapMatrix = new double[nIntersections*NMAX];
+    srcAddress = new int[nIntersections*NMAX];
+    srcRank = new int[nIntersections*NMAX];
+    dstAddress = new int[nIntersections*NMAX];
+    sourceWeightId =new long[nIntersections*NMAX];
+    targetWeightId =new long[nIntersections*NMAX];
+    if (mpiRank == 0 && verbose) cout << "Remapping..." << endl;
+    tic = cputime();
+    nWeights = remap(&targetElements[0], targetElements.size(), interpOrder, renormalize, quantity);
+    timings.push_back(cputime() - tic);
+    for (int i = 0; i < targetElements.size(); i++) targetElements[i].delete_intersections();
+    return timings;
+  vector<double> timings;
+  int mpiSize, mpiRank;
+  MPI_Comm_size(communicator, &mpiSize);
+  MPI_Comm_rank(communicator, &mpiRank);
+  this->buildSSTree(sourceMesh, targetMesh);
+  if (mpiRank == 0 && verbose) cout << "Computing intersections ..." << endl;
+  double tic = cputime();
+  computeIntersection(&targetElements[0], targetElements.size());
+  timings.push_back(cputime() - tic);
+        tic = cputime();
+        if (interpOrder == 2) {
+                if (mpiRank == 0 && verbose) cout << "Computing grads ..." << endl;
+                buildMeshTopology();
+                computeGrads();
+        }
+        timings.push_back(cputime() - tic);
+        /* Prepare computation of weights */
+        /* compute number of intersections which for the first order case
+           corresponds to the number of edges in the remap matrix */
+        int nIntersections = 0;
+        for (int j = 0; j < targetElements.size(); j++)
+        {
+                Elt &elt = targetElements[j];
+                for (list<Polyg*>::iterator it = elt.is.begin(); it != elt.is.end(); it++)
+                        nIntersections++;
+        }
+        /* overallocate for NMAX neighbours for each elements */
+        remapMatrix = new double[nIntersections*NMAX];
+        srcAddress = new int[nIntersections*NMAX];
+        srcRank = new int[nIntersections*NMAX];
+        dstAddress = new int[nIntersections*NMAX];
+  sourceWeightId =new long[nIntersections*NMAX];
+  targetWeightId =new long[nIntersections*NMAX];
+        if (mpiRank == 0 && verbose) cout << "Remapping..." << endl;
+        tic = cputime();
+        nWeights = remap(&targetElements[0], targetElements.size(), interpOrder, renormalize, quantity);
+        timings.push_back(cputime() - tic);
+  for (int i = 0; i < targetElements.size(); i++) targetElements[i].delete_intersections();
+        return timings;
+}
 /**
   @param elements are cells of the target grid that are distributed over CPUs
   indepentently of the distribution of the SS-tree.
   @param nbElements is the size of the elements array.
   @param order is the order of interpolaton (must be 1 or 2).
   */
+   @param elements are cells of the target grid that are distributed over CPUs
+          indepentently of the distribution of the SS-tree.
+   @param nbElements is the size of the elements array.
+   @param order is the order of interpolaton (must be 1 or 2).
+*/
 int Mapper::remap(Elt *elements, int nbElements, int order, bool renormalize, bool quantity)
+{
+    int mpiSize, mpiRank;
+    MPI_Comm_size(communicator, &mpiSize);
+    MPI_Comm_rank(communicator, &mpiRank);
+    /* create list of intersections (super mesh elements) for each rank */
+    multimap<int, Polyg *> *elementList = new multimap<int, Polyg *>[mpiSize];
+    for (int j = 0; j < nbElements; j++)
+    {
+        Elt& e = elements[j];
+        for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+            elementList[(*it)->id.rank].insert(pair<int, Polyg *>((*it)->id.ind, *it));
+    }
+    int *nbSendElement = new int[mpiSize];
+    int **sendElement = new int*[mpiSize]; /* indices of elements required from other rank */
+    double **recvValue = new double*[mpiSize];
+    double **recvArea = new double*[mpiSize];
+    Coord **recvGrad = new Coord*[mpiSize];
+    GloId **recvNeighIds = new GloId*[mpiSize]; /* ids of the of the source neighbours which also contribute through gradient */
+    for (int rank = 0; rank < mpiSize; rank++)
+    {
+        /* get size for allocation */
+        int last = -1; /* compares unequal to any index */
+        int index = -1; /* increased to starting index 0 in first iteration */
+        for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+        {
+            if (last != it->first)
+                index++;
+            (it->second)->id.ind = index;
+            last = it->first;
+        }
+        nbSendElement[rank] = index + 1;
+        /* if size is non-zero allocate and collect indices of elements on other ranks that we intersect */
+        if (nbSendElement[rank] > 0)
+        {
+            sendElement[rank] = new int[nbSendElement[rank]];
+            recvValue[rank]   = new double[nbSendElement[rank]];
+            recvArea[rank]    = new double[nbSendElement[rank]];
+            if (order == 2)
+            {
+                recvNeighIds[rank] = new GloId[nbSendElement[rank]*(NMAX+1)];
+                recvGrad[rank]    = new Coord[nbSendElement[rank]*(NMAX+1)];
+            }
+            else
+                recvNeighIds[rank] = new GloId[nbSendElement[rank]];
+            last = -1;
+            index = -1;
+            for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+            {
+                if (last != it->first)
+                    index++;
+                sendElement[rank][index] = it->first;
+                last = it->first;
+            }
+        }
+    }
+    /* communicate sizes of source elements to be sent (index lists and later values and gradients) */
+    int *nbRecvElement = new int[mpiSize];
+    MPI_Alltoall(nbSendElement, 1, MPI_INT, nbRecvElement, 1, MPI_INT, communicator);
+    /* communicate indices of source elements on other ranks whoes value and gradient we need (since intersection) */
+    int nbSendRequest = 0;
+    int nbRecvRequest = 0;
+    int **recvElement = new int*[mpiSize];
+    double **sendValue = new double*[mpiSize];
+    double **sendArea = new double*[mpiSize];
+    Coord **sendGrad = new Coord*[mpiSize];
+    GloId **sendNeighIds = new GloId*[mpiSize];
+    MPI_Request *sendRequest = new MPI_Request[4*mpiSize];
+    MPI_Request *recvRequest = new MPI_Request[4*mpiSize];
+    for (int rank = 0; rank < mpiSize; rank++)
+    {
+        if (nbSendElement[rank] > 0)
+        {
+            MPI_Issend(sendElement[rank], nbSendElement[rank], MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
+            nbSendRequest++;
+        }
+        if (nbRecvElement[rank] > 0)
+        {
+            recvElement[rank] = new int[nbRecvElement[rank]];
+            sendValue[rank]   = new double[nbRecvElement[rank]];
+            sendArea[rank]   = new double[nbRecvElement[rank]];
+            if (order == 2)
+            {
+                sendNeighIds[rank] = new GloId[nbRecvElement[rank]*(NMAX+1)];
+                sendGrad[rank]    = new Coord[nbRecvElement[rank]*(NMAX+1)];
+            }
+            else
+            {
+                sendNeighIds[rank] = new GloId[nbRecvElement[rank]];
+            }
+            MPI_Irecv(recvElement[rank], nbRecvElement[rank], MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+            nbRecvRequest++;
+        }
+    }
+    MPI_Status *status = new MPI_Status[4*mpiSize];
+    MPI_Waitall(nbSendRequest, sendRequest, status);
+    MPI_Waitall(nbRecvRequest, recvRequest, status);
+    /* for all indices that have been received from requesting ranks: pack values and gradients, then send */
+    nbSendRequest = 0;
+    nbRecvRequest = 0;
+    for (int rank = 0; rank < mpiSize; rank++)
+    {
+        if (nbRecvElement[rank] > 0)
+        {
+            int jj = 0; // jj == j if no weight writing
+            for (int j = 0; j < nbRecvElement[rank]; j++)
+            {
+                sendValue[rank][j] = sstree.localElements[recvElement[rank][j]].val;
+                sendArea[rank][j] = sstree.localElements[recvElement[rank][j]].area;
+                if (order == 2)
+                {
+                    sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].grad;
+                    sendNeighIds[rank][jj] = sstree.localElements[recvElement[rank][j]].src_id;
+                    jj++;
+                    for (int i = 0; i < NMAX; i++)
+                    {
+                        sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].gradNeigh[i];
+                        sendNeighIds[rank][jj] = sstree.localElements[recvElement[rank][j]].neighId[i];
+                        jj++;
+                    }
+                }
+                else
+                    sendNeighIds[rank][j] = sstree.localElements[recvElement[rank][j]].src_id;
+            }
+            MPI_Issend(sendValue[rank], nbRecvElement[rank], MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+            nbSendRequest++;
+            MPI_Issend(sendArea[rank], nbRecvElement[rank], MPI_DOUBLE, rank, 1, communicator, &sendRequest[nbSendRequest]);
+            nbSendRequest++;
+            if (order == 2)
+            {
+                MPI_Issend(sendGrad[rank], 3*nbRecvElement[rank]*(NMAX+1),
+                        MPI_DOUBLE, rank, 2, communicator, &sendRequest[nbSendRequest]);
+                nbSendRequest++;
+                MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank]*(NMAX+1), MPI_INT, rank, 3, communicator, &sendRequest[nbSendRequest]);
+                //ym  --> attention taille GloId
+                nbSendRequest++;
+            }
+            else
+            {
+                MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank], MPI_INT, rank, 4, communicator, &sendRequest[nbSendRequest]);
+                //ym  --> attention taille GloId
+                nbSendRequest++;
+            }
+        }
+        if (nbSendElement[rank] > 0)
+        {
+            MPI_Irecv(recvValue[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+            nbRecvRequest++;
+            MPI_Irecv(recvArea[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 1, communicator, &recvRequest[nbRecvRequest]);
+            nbRecvRequest++;
+            if (order == 2)
+            {
+                MPI_Irecv(recvGrad[rank], 3*nbSendElement[rank]*(NMAX+1),
+                        MPI_DOUBLE, rank, 2, communicator, &recvRequest[nbRecvRequest]);
+                nbRecvRequest++;
+                MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank]*(NMAX+1), MPI_INT, rank, 3, communicator, &recvRequest[nbRecvRequest]);
+                //ym  --> attention taille GloId
+                nbRecvRequest++;
+            }
+            else
+            {
+                MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank], MPI_INT, rank, 4, communicator, &recvRequest[nbRecvRequest]);
+                //ym  --> attention taille GloId
+                nbRecvRequest++;
+            }
+        }
+    }
+    MPI_Waitall(nbRecvRequest, recvRequest, status);
+    MPI_Waitall(nbSendRequest, sendRequest, status);
+    /* now that all values and gradients are available use them to computed interpolated values on target
+       and also to compute weights */
+    int i = 0;
+    for (int j = 0; j < nbElements; j++)
+    {
+        Elt& e = elements[j];
+        /* since for the 2nd order case source grid elements can contribute to a destination grid element over several "paths"
+           (step1: gradient is computed using neighbours on same grid, step2: intersection uses several elements on other grid)
+           accumulate them so that there is only one final weight between two elements */
+        map<GloId,double> wgt_map;
+        /* for destination element `e` loop over all intersetions/the corresponding source elements */
+        for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+        {
+            /* it is the intersection element, so it->x and it->area are barycentre and area of intersection element (super mesh)
+               but it->id is id of the source element that it intersects */
+            int n1 = (*it)->id.ind;
+            int rank = (*it)->id.rank;
+            double fk = recvValue[rank][n1];
+            double srcArea = recvArea[rank][n1];
+            double w = (*it)->area;
+            if (quantity) w/=srcArea ;
+            /* first order: src value times weight (weight = supermesh area), later divide by target area */
+            int kk = (order == 2) ? n1 * (NMAX + 1) : n1;
+            GloId neighID = recvNeighIds[rank][kk];
+            wgt_map[neighID] += w;
+            if (order == 2)
+            {
+                for (int k = 0; k < NMAX+1; k++)
+                {
+                    int kk = n1 * (NMAX + 1) + k;
+                    GloId neighID = recvNeighIds[rank][kk];
+                    if (neighID.ind != -1)  wgt_map[neighID] += w * scalarprod(recvGrad[rank][kk], (*it)->x);
+                }
+            }
+        }
+        double renorm=0;
+        if (renormalize)
+            for (map<GloId,double>::iterator it = wgt_map.begin(); it != wgt_map.end(); it++) renorm+=it->second / e.area;
+        else renorm=1. ;
+        for (map<GloId,double>::iterator it = wgt_map.begin(); it != wgt_map.end(); it++)
+        {
+            if (quantity)  this->remapMatrix[i] = (it->second ) / renorm;
+            else this->remapMatrix[i] = (it->second / e.area) / renorm;
+            this->srcAddress[i] = it->first.ind;
+            this->srcRank[i] = it->first.rank;
+            this->dstAddress[i] = j;
+            this->sourceWeightId[i]= it->first.globalId ;
+            this->targetWeightId[i]= targetGlobalId[j] ;
+            i++;
+        }
+    }
+    /* free all memory allocated in this function */
+    for (int rank = 0; rank < mpiSize; rank++)
+    {
+        if (nbSendElement[rank] > 0)
+        {
+            delete[] sendElement[rank];
+            delete[] recvValue[rank];
+            delete[] recvArea[rank];
+            if (order == 2)
+            {
+                delete[] recvGrad[rank];
+            }
+            delete[] recvNeighIds[rank];
+        }
+        if (nbRecvElement[rank] > 0)
+        {
+            delete[] recvElement[rank];
+            delete[] sendValue[rank];
+            delete[] sendArea[rank];
+            if (order == 2)
+                delete[] sendGrad[rank];
+            delete[] sendNeighIds[rank];
+        }
+    }
+    delete[] status;
+    delete[] sendRequest;
+    delete[] recvRequest;
+    delete[] elementList;
+    delete[] nbSendElement;
+    delete[] nbRecvElement;
+    delete[] sendElement;
+    delete[] recvElement;
+    delete[] sendValue;
+    delete[] recvValue;
+    delete[] sendGrad;
+    delete[] recvGrad;
+    delete[] sendNeighIds;
+    delete[] recvNeighIds;
+    return i;
+        int mpiSize, mpiRank;
+        MPI_Comm_size(communicator, &mpiSize);
+        MPI_Comm_rank(communicator, &mpiRank);
+        /* create list of intersections (super mesh elements) for each rank */
+        multimap<int, Polyg *> *elementList = new multimap<int, Polyg *>[mpiSize];
+        for (int j = 0; j < nbElements; j++)
+        {
+                Elt& e = elements[j];
+                for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+                        elementList[(*it)->id.rank].insert(pair<int, Polyg *>((*it)->id.ind, *it));
+        }
+        int *nbSendElement = new int[mpiSize];
+        int **sendElement = new int*[mpiSize]; /* indices of elements required from other rank */
+        double **recvValue = new double*[mpiSize];
+        double **recvArea = new double*[mpiSize];
+        Coord **recvGrad = new Coord*[mpiSize];
+        GloId **recvNeighIds = new GloId*[mpiSize]; /* ids of the of the source neighbours which also contribute through gradient */
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                /* get size for allocation */
+                int last = -1; /* compares unequal to any index */
+                int index = -1; /* increased to starting index 0 in first iteration */
+                for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+                {
+                        if (last != it->first)
+                                index++;
+                        (it->second)->id.ind = index;
+                        last = it->first;
+                }
+                nbSendElement[rank] = index + 1;
+                /* if size is non-zero allocate and collect indices of elements on other ranks that we intersect */
+                if (nbSendElement[rank] > 0)
+                {
+                        sendElement[rank] = new int[nbSendElement[rank]];
+                        recvValue[rank]   = new double[nbSendElement[rank]];
+                        recvArea[rank]    = new double[nbSendElement[rank]];
+                        if (order == 2)
+                        {
+                                recvNeighIds[rank] = new GloId[nbSendElement[rank]*(NMAX+1)];
+                                recvGrad[rank]    = new Coord[nbSendElement[rank]*(NMAX+1)];
+                        }
+                        else
+                                recvNeighIds[rank] = new GloId[nbSendElement[rank]];
+                        last = -1;
+                        index = -1;
+                        for (multimap<int, Polyg *>::iterator it = elementList[rank].begin(); it != elementList[rank].end(); ++it)
+                        {
+                                if (last != it->first)
+                                        index++;
+                                sendElement[rank][index] = it->first;
+                                last = it->first;
+                        }
+                }
+        }
+        /* communicate sizes of source elements to be sent (index lists and later values and gradients) */
+        int *nbRecvElement = new int[mpiSize];
+        MPI_Alltoall(nbSendElement, 1, MPI_INT, nbRecvElement, 1, MPI_INT, communicator);
+        /* communicate indices of source elements on other ranks whoes value and gradient we need (since intersection) */
+        int nbSendRequest = 0;
+        int nbRecvRequest = 0;
+        int **recvElement = new int*[mpiSize];
+        double **sendValue = new double*[mpiSize];
+        double **sendArea = new double*[mpiSize];
+        Coord **sendGrad = new Coord*[mpiSize];
+        GloId **sendNeighIds = new GloId*[mpiSize];
+        MPI_Request *sendRequest = new MPI_Request[4*mpiSize];
+        MPI_Request *recvRequest = new MPI_Request[4*mpiSize];
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbSendElement[rank] > 0)
+                {
+                        MPI_Issend(sendElement[rank], nbSendElement[rank], MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                }
+                if (nbRecvElement[rank] > 0)
+                {
+                        recvElement[rank] = new int[nbRecvElement[rank]];
+                        sendValue[rank]   = new double[nbRecvElement[rank]];
+                        sendArea[rank]   = new double[nbRecvElement[rank]];
+                        if (order == 2)
+                        {
+                                sendNeighIds[rank] = new GloId[nbRecvElement[rank]*(NMAX+1)];
+                                sendGrad[rank]    = new Coord[nbRecvElement[rank]*(NMAX+1)];
+                        }
+                        else
+                        {
+                                sendNeighIds[rank] = new GloId[nbRecvElement[rank]];
+                        }
+                        MPI_Irecv(recvElement[rank], nbRecvElement[rank], MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                }
+        }
+  MPI_Status *status = new MPI_Status[4*mpiSize];
+  MPI_Waitall(nbSendRequest, sendRequest, status);
+  MPI_Waitall(nbRecvRequest, recvRequest, status);
+        /* for all indices that have been received from requesting ranks: pack values and gradients, then send */
+        nbSendRequest = 0;
+        nbRecvRequest = 0;
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbRecvElement[rank] > 0)
+                {
+                        int jj = 0; // jj == j if no weight writing
+                        for (int j = 0; j < nbRecvElement[rank]; j++)
+                        {
+                                sendValue[rank][j] = sstree.localElements[recvElement[rank][j]].val;
+                                sendArea[rank][j] = sstree.localElements[recvElement[rank][j]].area;
+                                if (order == 2)
+                                {
+                                        sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].grad;
+                                        sendNeighIds[rank][jj] = sstree.localElements[recvElement[rank][j]].src_id;
+                                        jj++;
+                                        for (int i = 0; i < NMAX; i++)
+                                        {
+                                                sendGrad[rank][jj] = sstree.localElements[recvElement[rank][j]].gradNeigh[i];
+            sendNeighIds[rank][jj] = sstree.localElements[recvElement[rank][j]].neighId[i];
+                                                jj++;
+                                        }
+                                }
+                                else
+                                        sendNeighIds[rank][j] = sstree.localElements[recvElement[rank][j]].src_id;
+                        }
+                        MPI_Issend(sendValue[rank],  nbRecvElement[rank], MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                        MPI_Issend(sendArea[rank],  nbRecvElement[rank], MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                        if (order == 2)
+                        {
+                                MPI_Issend(sendGrad[rank], 3*nbRecvElement[rank]*(NMAX+1),
+                                                                MPI_DOUBLE, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                                nbSendRequest++;
+                                MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank]*(NMAX+1), MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
+//ym  --> attention taille GloId
+                                nbSendRequest++;
+                        }
+                        else
+                        {
+                                MPI_Issend(sendNeighIds[rank], 4*nbRecvElement[rank], MPI_INT, rank, 0, communicator, &sendRequest[nbSendRequest]);
+//ym  --> attention taille GloId
+                                nbSendRequest++;
+                        }
+                }
+                if (nbSendElement[rank] > 0)
+                {
+                        MPI_Irecv(recvValue[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                        MPI_Irecv(recvArea[rank],  nbSendElement[rank], MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                        if (order == 2)
+                        {
+                                MPI_Irecv(recvGrad[rank], 3*nbSendElement[rank]*(NMAX+1),
+                                                MPI_DOUBLE, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                                nbRecvRequest++;
+                                MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank]*(NMAX+1), MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+//ym  --> attention taille GloId
+                                nbRecvRequest++;
+                        }
+                        else
+                        {
+                                MPI_Irecv(recvNeighIds[rank], 4*nbSendElement[rank], MPI_INT, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+//ym  --> attention taille GloId
+                                nbRecvRequest++;
+                        }
+                }
+        }
+        MPI_Waitall(nbSendRequest, sendRequest, status);
+        MPI_Waitall(nbRecvRequest, recvRequest, status);
+        /* now that all values and gradients are available use them to computed interpolated values on target
+           and also to compute weights */
+        int i = 0;
+        for (int j = 0; j < nbElements; j++)
+        {
+                Elt& e = elements[j];
+                /* since for the 2nd order case source grid elements can contribute to a destination grid element over several "paths"
+                   (step1: gradient is computed using neighbours on same grid, step2: intersection uses several elements on other grid)
+                   accumulate them so that there is only one final weight between two elements */
+                map<GloId,double> wgt_map;
+                /* for destination element `e` loop over all intersetions/the corresponding source elements */
+                for (list<Polyg *>::iterator it = e.is.begin(); it != e.is.end(); it++)
+                {
+                        /* it is the intersection element, so it->x and it->area are barycentre and area of intersection element (super mesh)
+                        but it->id is id of the source element that it intersects */
+                        int n1 = (*it)->id.ind;
+                        int rank = (*it)->id.rank;
+                        double fk = recvValue[rank][n1];
+                        double srcArea = recvArea[rank][n1];
+                        double w = (*it)->area;
+      if (quantity) w/=srcArea ;
+                        /* first order: src value times weight (weight = supermesh area), later divide by target area */
+                        int kk = (order == 2) ? n1 * (NMAX + 1) : n1;
+                        GloId neighID = recvNeighIds[rank][kk];
+                        wgt_map[neighID] += w;
+                        if (order == 2)
+                        {
+                                for (int k = 0; k < NMAX+1; k++)
+                                {
+                                        int kk = n1 * (NMAX + 1) + k;
+                                        GloId neighID = recvNeighIds[rank][kk];
+                                        if (neighID.ind != -1)  wgt_map[neighID] += w * scalarprod(recvGrad[rank][kk], (*it)->x);
+                                }
+                        }
+                }
+    double renorm=0;
+    if (renormalize)
+      for (map<GloId,double>::iterator it = wgt_map.begin(); it != wgt_map.end(); it++) renorm+=it->second / e.area;
+    else renorm=1. ;
+    for (map<GloId,double>::iterator it = wgt_map.begin(); it != wgt_map.end(); it++)
+                {
+      if (quantity)  this->remapMatrix[i] = (it->second ) / renorm;
+                        else this->remapMatrix[i] = (it->second / e.area) / renorm;
+                        this->srcAddress[i] = it->first.ind;
+                        this->srcRank[i] = it->first.rank;
+                        this->dstAddress[i] = j;
+      this->sourceWeightId[i]= it->first.globalId ;
+      this->targetWeightId[i]= targetGlobalId[j] ;
+                        i++;
+                }
+        }
+        /* free all memory allocated in this function */
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbSendElement[rank] > 0)
+                {
+                        delete[] sendElement[rank];
+                        delete[] recvValue[rank];
+                        delete[] recvArea[rank];
+                        if (order == 2)
+                        {
+                                delete[] recvGrad[rank];
+                        }
+                        delete[] recvNeighIds[rank];
+                }
+                if (nbRecvElement[rank] > 0)
+                {
+                        delete[] recvElement[rank];
+                        delete[] sendValue[rank];
+                        delete[] sendArea[rank];
+                        if (order == 2)
+                                delete[] sendGrad[rank];
+                        delete[] sendNeighIds[rank];
+                }
+        }
+        delete[] status;
+        delete[] sendRequest;
+        delete[] recvRequest;
+        delete[] elementList;
+        delete[] nbSendElement;
+        delete[] nbRecvElement;
+        delete[] sendElement;
+        delete[] recvElement;
+        delete[] sendValue;
+        delete[] recvValue;
+        delete[] sendGrad;
+        delete[] recvGrad;
+        delete[] sendNeighIds;
+        delete[] recvNeighIds;
+        return i;
+}
 void Mapper::computeGrads()
+{
     /* array of pointers to collect local elements and elements received from other cpu */
     vector<Elt*> globalElements(sstree.nbLocalElements + nbNeighbourElements);
     int index = 0;
     for (int i = 0; i < sstree.nbLocalElements; i++, index++)
         globalElements[index] = &(sstree.localElements[i]);
     for (int i = 0; i < nbNeighbourElements; i++, index++)
         globalElements[index] = &neighbourElements[i];
     update_baryc(sstree.localElements, sstree.nbLocalElements);
     computeGradients(&globalElements[0], sstree.nbLocalElements);
+        /* array of pointers to collect local elements and elements received from other cpu */
+        vector<Elt*> globalElements(sstree.nbLocalElements + nbNeighbourElements);
+        int index = 0;
+        for (int i = 0; i < sstree.nbLocalElements; i++, index++)
+                globalElements[index] = &(sstree.localElements[i]);
+        for (int i = 0; i < nbNeighbourElements; i++, index++)
+                globalElements[index] = &neighbourElements[i];
+        update_baryc(sstree.localElements, sstree.nbLocalElements);
+        computeGradients(&globalElements[0], sstree.nbLocalElements);
+}
 /** for each element of the source grid, finds all the neighbouring elements that share an edge
   (filling array neighbourElements). This is used later to compute gradients */
+    (filling array neighbourElements). This is used later to compute gradients */
 void Mapper::buildMeshTopology()
+{
     int mpiSize, mpiRank;
     MPI_Comm_size(communicator, &mpiSize);
     MPI_Comm_rank(communicator, &mpiRank);
     vector<Node> *routingList = new vector<Node>[mpiSize];
     vector<vector<int> > routes(sstree.localTree.leafs.size());
     sstree.routeIntersections(routes, sstree.localTree.leafs);
     for (int i = 0; i < routes.size(); ++i)
         for (int k = 0; k < routes[i].size(); ++k)
             routingList[routes[i][k]].push_back(sstree.localTree.leafs[i]);
     routingList[mpiRank].clear();
     CMPIRouting mpiRoute(communicator);
     mpiRoute.init(routes);
     int nRecv = mpiRoute.getTotalSourceElement();
     int *nbSendNode = new int[mpiSize];
     int *nbRecvNode = new int[mpiSize];
     int *sendMessageSize = new int[mpiSize];
     int *recvMessageSize = new int[mpiSize];
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         nbSendNode[rank] = routingList[rank].size();
         sendMessageSize[rank] = 0;
         for (size_t j = 0; j < routingList[rank].size(); j++)
+        {
             Elt *elt = (Elt *) (routingList[rank][j].data);
             sendMessageSize[rank] += packedPolygonSize(*elt);
+        }
+    }
     MPI_Alltoall(nbSendNode, 1, MPI_INT, nbRecvNode, 1, MPI_INT, communicator);
     MPI_Alltoall(sendMessageSize, 1, MPI_INT, recvMessageSize, 1, MPI_INT, communicator);
     char **sendBuffer = new char*[mpiSize];
     char **recvBuffer = new char*[mpiSize];
     int *pos = new int[mpiSize];
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         if (nbSendNode[rank] > 0) sendBuffer[rank] = new char[sendMessageSize[rank]];
         if (nbRecvNode[rank] > 0) recvBuffer[rank] = new char[recvMessageSize[rank]];
+    }
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         pos[rank] = 0;
         for (size_t j = 0; j < routingList[rank].size(); j++)
+        {
             Elt *elt = (Elt *) (routingList[rank][j].data);
             packPolygon(*elt, sendBuffer[rank], pos[rank]);
+        }
+    }
     delete [] routingList;
     int nbSendRequest = 0;
     int nbRecvRequest = 0;
     MPI_Request *sendRequest = new MPI_Request[mpiSize];
     MPI_Request *recvRequest = new MPI_Request[mpiSize];
     MPI_Status  *status      = new MPI_Status[mpiSize];
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         if (nbSendNode[rank] > 0)
+        {
             MPI_Issend(sendBuffer[rank], sendMessageSize[rank], MPI_CHAR, rank, 0, communicator, &sendRequest[nbSendRequest]);
             nbSendRequest++;
+        }
         if (nbRecvNode[rank] > 0)
+        {
             MPI_Irecv(recvBuffer[rank], recvMessageSize[rank], MPI_CHAR, rank, 0, communicator, &recvRequest[nbRecvRequest]);
             nbRecvRequest++;
+        }
+    }
     MPI_Waitall(nbRecvRequest, recvRequest, status);
     MPI_Waitall(nbSendRequest, sendRequest, status);
     for (int rank = 0; rank < mpiSize; rank++)
         if (nbSendNode[rank] > 0) delete [] sendBuffer[rank];
     delete [] sendBuffer;
     char **sendBuffer2 = new char*[mpiSize];
     char **recvBuffer2 = new char*[mpiSize];
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         nbSendNode[rank] = 0;
         sendMessageSize[rank] = 0;
         if (nbRecvNode[rank] > 0)
+        {
             set<NodePtr> neighbourList;
             pos[rank] = 0;
             for (int j = 0; j < nbRecvNode[rank]; j++)
+            {
                 Elt elt;
                 unpackPolygon(elt, recvBuffer[rank], pos[rank]);
                 Node node(elt.x, cptRadius(elt), &elt);
                 findNeighbour(sstree.localTree.root, &node, neighbourList);
+            }
             nbSendNode[rank] = neighbourList.size();
             for (set<NodePtr>::iterator it = neighbourList.begin(); it != neighbourList.end(); it++)
+            {
                 Elt *elt = (Elt *) ((*it)->data);
                 sendMessageSize[rank] += packedPolygonSize(*elt);
+            }
             sendBuffer2[rank] = new char[sendMessageSize[rank]];
             pos[rank] = 0;
             for (set<NodePtr>::iterator it = neighbourList.begin(); it != neighbourList.end(); it++)
+            {
                 Elt *elt = (Elt *) ((*it)->data);
                 packPolygon(*elt, sendBuffer2[rank], pos[rank]);
+            }
+        }
+    }
     for (int rank = 0; rank < mpiSize; rank++)
         if (nbRecvNode[rank] > 0) delete [] recvBuffer[rank];
     delete [] recvBuffer;
     MPI_Barrier(communicator);
     MPI_Alltoall(nbSendNode, 1, MPI_INT, nbRecvNode, 1, MPI_INT, communicator);
     MPI_Alltoall(sendMessageSize, 1, MPI_INT, recvMessageSize, 1, MPI_INT, communicator);
     for (int rank = 0; rank < mpiSize; rank++)
         if (nbRecvNode[rank] > 0) recvBuffer2[rank] = new char[recvMessageSize[rank]];
     nbSendRequest = 0;
     nbRecvRequest = 0;
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         if (nbSendNode[rank] > 0)
+        {
             MPI_Issend(sendBuffer2[rank], sendMessageSize[rank], MPI_CHAR, rank, 0, communicator, &sendRequest[nbSendRequest]);
             nbSendRequest++;
+        }
         if (nbRecvNode[rank] > 0)
+        {
             MPI_Irecv(recvBuffer2[rank], recvMessageSize[rank], MPI_CHAR, rank, 0, communicator, &recvRequest[nbRecvRequest]);
             nbRecvRequest++;
+        }
+    }
     MPI_Waitall(nbRecvRequest, recvRequest, status);
     MPI_Waitall(nbSendRequest, sendRequest, status);
     int nbNeighbourNodes = 0;
     for (int rank = 0; rank < mpiSize; rank++)
         nbNeighbourNodes += nbRecvNode[rank];
     neighbourElements = new Elt[nbNeighbourNodes];
     nbNeighbourElements = nbNeighbourNodes;
     int index = 0;
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         pos[rank] = 0;
         for (int j = 0; j < nbRecvNode[rank]; j++)
+        {
             unpackPolygon(neighbourElements[index], recvBuffer2[rank], pos[rank]);
             neighbourElements[index].id.ind = sstree.localTree.leafs.size() + index;
             index++;
+        }
+    }
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         if (nbRecvNode[rank] > 0) delete [] recvBuffer2[rank];
         if (nbSendNode[rank] > 0) delete [] sendBuffer2[rank];
+    }
     delete [] recvBuffer2;
     delete [] sendBuffer2;
     delete [] sendMessageSize;
     delete [] recvMessageSize;
     delete [] nbSendNode;
     delete [] nbRecvNode;
     delete [] sendRequest;
     delete [] recvRequest;
     delete [] status;
     delete [] pos;
     /* re-compute on received elements to avoid having to send this information */
     neighbourNodes.resize(nbNeighbourNodes);
     setCirclesAndLinks(neighbourElements, neighbourNodes);
     cptAllEltsGeom(neighbourElements, nbNeighbourNodes, srcGrid.pole);
     /* the local SS tree must include nodes from other cpus if they are potential
        intersector of a local node */
     sstree.localTree.insertNodes(neighbourNodes);
     /* for every local element,
        use the SS-tree to find all elements (including neighbourElements)
        who are potential neighbours because their circles intersect,
        then check all canditates for common edges to build up connectivity information
        */
     for (int j = 0; j < sstree.localTree.leafs.size(); j++)
+    {
         Node& node = sstree.localTree.leafs[j];
         /* find all leafs whoes circles that intersect node's circle and save into node->intersectors */
         node.search(sstree.localTree.root);
         Elt *elt = (Elt *)(node.data);
         for (int i = 0; i < elt->n; i++) elt->neighbour[i] = NOT_FOUND;
         /* for element `elt` loop through all nodes in the SS-tree
            whoes circles intersect with the circle around `elt` (the SS intersectors)
            and check if they are neighbours in the sense that the two elements share an edge.
            If they do, save this information for elt */
         for (list<NodePtr>::iterator it = (node.intersectors).begin(); it != (node.intersectors).end(); ++it)
+        {
             Elt *elt2 = (Elt *)((*it)->data);
             set_neighbour(*elt, *elt2);
+        }
+    }
+        int mpiSize, mpiRank;
+        MPI_Comm_size(communicator, &mpiSize);
+        MPI_Comm_rank(communicator, &mpiRank);
+        vector<Node> *routingList = new vector<Node>[mpiSize];
+        vector<vector<int> > routes(sstree.localTree.leafs.size());
+        sstree.routeIntersections(routes, sstree.localTree.leafs);
+        for (int i = 0; i < routes.size(); ++i)
+                for (int k = 0; k < routes[i].size(); ++k)
+                        routingList[routes[i][k]].push_back(sstree.localTree.leafs[i]);
+        routingList[mpiRank].clear();
+        CMPIRouting mpiRoute(communicator);
+        mpiRoute.init(routes);
+        int nRecv = mpiRoute.getTotalSourceElement();
+        int *nbSendNode = new int[mpiSize];
+        int *nbRecvNode = new int[mpiSize];
+        int *sendMessageSize = new int[mpiSize];
+        int *recvMessageSize = new int[mpiSize];
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                nbSendNode[rank] = routingList[rank].size();
+                sendMessageSize[rank] = 0;
+                for (size_t j = 0; j < routingList[rank].size(); j++)
+                {
+                        Elt *elt = (Elt *) (routingList[rank][j].data);
+                        sendMessageSize[rank] += packedPolygonSize(*elt);
+                }
+        }
+        MPI_Alltoall(nbSendNode, 1, MPI_INT, nbRecvNode, 1, MPI_INT, communicator);
+        MPI_Alltoall(sendMessageSize, 1, MPI_INT, recvMessageSize, 1, MPI_INT, communicator);
+        char **sendBuffer = new char*[mpiSize];
+        char **recvBuffer = new char*[mpiSize];
+        int *pos = new int[mpiSize];
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbSendNode[rank] > 0) sendBuffer[rank] = new char[sendMessageSize[rank]];
+                if (nbRecvNode[rank] > 0) recvBuffer[rank] = new char[recvMessageSize[rank]];
+        }
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                pos[rank] = 0;
+                for (size_t j = 0; j < routingList[rank].size(); j++)
+                {
+                        Elt *elt = (Elt *) (routingList[rank][j].data);
+                        packPolygon(*elt, sendBuffer[rank], pos[rank]);
+                }
+        }
+        delete [] routingList;
+        int nbSendRequest = 0;
+        int nbRecvRequest = 0;
+        MPI_Request *sendRequest = new MPI_Request[mpiSize];
+        MPI_Request *recvRequest = new MPI_Request[mpiSize];
+        MPI_Status  *status      = new MPI_Status[mpiSize];
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbSendNode[rank] > 0)
+                {
+                        MPI_Issend(sendBuffer[rank], sendMessageSize[rank], MPI_CHAR, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                }
+                if (nbRecvNode[rank] > 0)
+                {
+                        MPI_Irecv(recvBuffer[rank], recvMessageSize[rank], MPI_CHAR, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                }
+        }
+  MPI_Waitall(nbRecvRequest, recvRequest, status);
+  MPI_Waitall(nbSendRequest, sendRequest, status);
+        for (int rank = 0; rank < mpiSize; rank++)
+                if (nbSendNode[rank] > 0) delete [] sendBuffer[rank];
+        delete [] sendBuffer;
+        char **sendBuffer2 = new char*[mpiSize];
+        char **recvBuffer2 = new char*[mpiSize];
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                nbSendNode[rank] = 0;
+                sendMessageSize[rank] = 0;
+                if (nbRecvNode[rank] > 0)
+                {
+                        set<NodePtr> neighbourList;
+                        pos[rank] = 0;
+                        for (int j = 0; j < nbRecvNode[rank]; j++)
+                        {
+                                Elt elt;
+                                unpackPolygon(elt, recvBuffer[rank], pos[rank]);
+                                Node node(elt.x, cptRadius(elt), &elt);
+                                findNeighbour(sstree.localTree.root, &node, neighbourList);
+                        }
+                        nbSendNode[rank] = neighbourList.size();
+                        for (set<NodePtr>::iterator it = neighbourList.begin(); it != neighbourList.end(); it++)
+                        {
+                                Elt *elt = (Elt *) ((*it)->data);
+                                sendMessageSize[rank] += packedPolygonSize(*elt);
+                        }
+                        sendBuffer2[rank] = new char[sendMessageSize[rank]];
+                        pos[rank] = 0;
+                        for (set<NodePtr>::iterator it = neighbourList.begin(); it != neighbourList.end(); it++)
+                        {
+                                Elt *elt = (Elt *) ((*it)->data);
+                                packPolygon(*elt, sendBuffer2[rank], pos[rank]);
+                        }
+                }
+        }
+        for (int rank = 0; rank < mpiSize; rank++)
+                if (nbRecvNode[rank] > 0) delete [] recvBuffer[rank];
+        delete [] recvBuffer;
+        MPI_Barrier(communicator);
+        MPI_Alltoall(nbSendNode, 1, MPI_INT, nbRecvNode, 1, MPI_INT, communicator);
+        MPI_Alltoall(sendMessageSize, 1, MPI_INT, recvMessageSize, 1, MPI_INT, communicator);
+        for (int rank = 0; rank < mpiSize; rank++)
+                if (nbRecvNode[rank] > 0) recvBuffer2[rank] = new char[recvMessageSize[rank]];
+        nbSendRequest = 0;
+        nbRecvRequest = 0;
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbSendNode[rank] > 0)
+                {
+                        MPI_Issend(sendBuffer2[rank], sendMessageSize[rank], MPI_CHAR, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                }
+                if (nbRecvNode[rank] > 0)
+                {
+                        MPI_Irecv(recvBuffer2[rank], recvMessageSize[rank], MPI_CHAR, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                }
+        }
+        MPI_Waitall(nbRecvRequest, recvRequest, status);
+        MPI_Waitall(nbSendRequest, sendRequest, status);
+        int nbNeighbourNodes = 0;
+        for (int rank = 0; rank < mpiSize; rank++)
+                nbNeighbourNodes += nbRecvNode[rank];
+        neighbourElements = new Elt[nbNeighbourNodes];
+        nbNeighbourElements = nbNeighbourNodes;
+        int index = 0;
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                pos[rank] = 0;
+                for (int j = 0; j < nbRecvNode[rank]; j++)
+                {
+                        unpackPolygon(neighbourElements[index], recvBuffer2[rank], pos[rank]);
+                        neighbourElements[index].id.ind = sstree.localTree.leafs.size() + index;
+                        index++;
+                }
+        }
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbRecvNode[rank] > 0) delete [] recvBuffer2[rank];
+                if (nbSendNode[rank] > 0) delete [] sendBuffer2[rank];
+        }
+        delete [] recvBuffer2;
+        delete [] sendBuffer2;
+        delete [] sendMessageSize;
+        delete [] recvMessageSize;
+        delete [] nbSendNode;
+        delete [] nbRecvNode;
+        delete [] sendRequest;
+        delete [] recvRequest;
+        delete [] status;
+        delete [] pos;
+        /* re-compute on received elements to avoid having to send this information */
+        neighbourNodes.resize(nbNeighbourNodes);
+        setCirclesAndLinks(neighbourElements, neighbourNodes);
+        cptAllEltsGeom(neighbourElements, nbNeighbourNodes, srcGrid.pole);
+        /* the local SS tree must include nodes from other cpus if they are potential
+           intersector of a local node */
+        sstree.localTree.insertNodes(neighbourNodes);
+        /* for every local element,
+           use the SS-tree to find all elements (including neighbourElements)
+           who are potential neighbours because their circles intersect,
+           then check all canditates for common edges to build up connectivity information
+        */
+        for (int j = 0; j < sstree.localTree.leafs.size(); j++)
+        {
+                Node& node = sstree.localTree.leafs[j];
+                /* find all leafs whoes circles that intersect node's circle and save into node->intersectors */
+                node.search(sstree.localTree.root);
+                Elt *elt = (Elt *)(node.data);
+                for (int i = 0; i < elt->n; i++) elt->neighbour[i] = NOT_FOUND;
+                /* for element `elt` loop through all nodes in the SS-tree
+                   whoes circles intersect with the circle around `elt` (the SS intersectors)
+                   and check if they are neighbours in the sense that the two elements share an edge.
+                   If they do, save this information for elt */
+                for (list<NodePtr>::iterator it = (node.intersectors).begin(); it != (node.intersectors).end(); ++it)
+                {
+                        Elt *elt2 = (Elt *)((*it)->data);
+                        set_neighbour(*elt, *elt2);
+                }
+        }
+}
 …
 void Mapper::computeIntersection(Elt *elements, int nbElements)
+{
     int mpiSize, mpiRank;
     MPI_Comm_size(communicator, &mpiSize);
     MPI_Comm_rank(communicator, &mpiRank);
     MPI_Barrier(communicator);
     vector<Node> *routingList = new vector<Node>[mpiSize];
     vector<Node> routeNodes;  routeNodes.reserve(nbElements);
     for (int j = 0; j < nbElements; j++)
+    {
         elements[j].id.ind = j;
         elements[j].id.rank = mpiRank;
         routeNodes.push_back(Node(elements[j].x, cptRadius(elements[j]), &elements[j]));
+    }
     vector<vector<int> > routes(routeNodes.size());
     sstree.routeIntersections(routes, routeNodes);
     for (int i = 0; i < routes.size(); ++i)
         for (int k = 0; k < routes[i].size(); ++k)
             routingList[routes[i][k]].push_back(routeNodes[i]);
     if (verbose >= 2)
+    {
         cout << " --> rank  " << mpiRank << " nbElements " << nbElements << " : ";
         for (int rank = 0; rank < mpiSize; rank++)
             cout << routingList[rank].size() << "   ";
         cout << endl;
+    }
     MPI_Barrier(communicator);
     int *nbSendNode = new int[mpiSize];
     int *nbRecvNode = new int[mpiSize];
     int *sentMessageSize = new int[mpiSize];
     int *recvMessageSize = new int[mpiSize];
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         nbSendNode[rank] = routingList[rank].size();
         sentMessageSize[rank] = 0;
         for (size_t j = 0; j < routingList[rank].size(); j++)
+        {
             Elt *elt = (Elt *) (routingList[rank][j].data);
             sentMessageSize[rank] += packedPolygonSize(*elt);
+        }
+    }
     MPI_Alltoall(nbSendNode, 1, MPI_INT, nbRecvNode, 1, MPI_INT, communicator);
     MPI_Alltoall(sentMessageSize, 1, MPI_INT, recvMessageSize, 1, MPI_INT, communicator);
     int total = 0;
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         total = total + nbRecvNode[rank];
+    }
     if (verbose >= 2) cout << "---> rank " << mpiRank << " : compute intersection : total received nodes  " << total << endl;
     char **sendBuffer = new char*[mpiSize];
     char **recvBuffer = new char*[mpiSize];
     int *pos = new int[mpiSize];
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         if (nbSendNode[rank] > 0) sendBuffer[rank] = new char[sentMessageSize[rank]];
         if (nbRecvNode[rank] > 0) recvBuffer[rank] = new char[recvMessageSize[rank]];
+    }
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         pos[rank] = 0;
         for (size_t j = 0; j < routingList[rank].size(); j++)
+        {
             Elt* elt = (Elt *) (routingList[rank][j].data);
             packPolygon(*elt, sendBuffer[rank], pos[rank]);
+        }
+    }
     delete [] routingList;
     int nbSendRequest = 0;
     int nbRecvRequest = 0;
     MPI_Request *sendRequest = new MPI_Request[mpiSize];
     MPI_Request *recvRequest = new MPI_Request[mpiSize];
     MPI_Status   *status = new MPI_Status[mpiSize];
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         if (nbSendNode[rank] > 0)
+        {
             MPI_Issend(sendBuffer[rank], sentMessageSize[rank], MPI_CHAR, rank, 0, communicator, &sendRequest[nbSendRequest]);
             nbSendRequest++;
+        }
         if (nbRecvNode[rank] > 0)
+        {
             MPI_Irecv(recvBuffer[rank], recvMessageSize[rank], MPI_CHAR, rank, 0, communicator, &recvRequest[nbRecvRequest]);
             nbRecvRequest++;
+        }
+    }
     MPI_Waitall(nbRecvRequest, recvRequest, status);
     MPI_Waitall(nbSendRequest, sendRequest, status);
     char **sendBuffer2 = new char*[mpiSize];
     char **recvBuffer2 = new char*[mpiSize];
     double tic = cputime();
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         sentMessageSize[rank] = 0;
         if (nbRecvNode[rank] > 0)
+        {
             Elt *recvElt = new Elt[nbRecvNode[rank]];
             pos[rank] = 0;
             for (int j = 0; j < nbRecvNode[rank]; j++)
+            {
                 unpackPolygon(recvElt[j], recvBuffer[rank], pos[rank]);
                 cptEltGeom(recvElt[j], tgtGrid.pole);
                 Node recvNode(recvElt[j].x, cptRadius(recvElt[j]), &recvElt[j]);
                 recvNode.search(sstree.localTree.root);
                 /* for a node holding an element of the target, loop throught candidates for intersecting source */
                 for (list<NodePtr>::iterator it = (recvNode.intersectors).begin(); it != (recvNode.intersectors).end(); ++it)
+                {
                     Elt *elt2 = (Elt *) ((*it)->data);
                     /* recvElt is target, elt2 is source */
                     intersect(&recvElt[j], elt2);
                     //intersect_ym(&recvElt[j], elt2);
+                }
                 if (recvElt[j].is.size() > 0) sentMessageSize[rank] += packIntersectionSize(recvElt[j]);
                 // here recvNode goes out of scope
+            }
             if (sentMessageSize[rank] > 0)
+            {
                 sentMessageSize[rank] += sizeof(int);
                 sendBuffer2[rank] = new char[sentMessageSize[rank]];
                 *((int *) sendBuffer2[rank]) = 0;
                 pos[rank] = sizeof(int);
                 for (int j = 0; j < nbRecvNode[rank]; j++)
+                {
                     packIntersection(recvElt[j], sendBuffer2[rank], pos[rank]);
                     //FIXME should be deleted: recvElt[j].delete_intersections(); // intersection areas have been packed to buffer and won't be used any more
+                }
+            }
             delete [] recvElt;
+        }
+    }
     delete [] pos;
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         if (nbSendNode[rank] > 0) delete [] sendBuffer[rank];
         if (nbRecvNode[rank] > 0) delete [] recvBuffer[rank];
         nbSendNode[rank] = 0;
+    }
     if (verbose >= 2) cout << "Rank " << mpiRank << "  Compute (internal) intersection " << cputime() - tic << " s" << endl;
     MPI_Alltoall(sentMessageSize, 1, MPI_INT, recvMessageSize, 1, MPI_INT, communicator);
     for (int rank = 0; rank < mpiSize; rank++)
         if (recvMessageSize[rank] > 0)
             recvBuffer2[rank] = new char[recvMessageSize[rank]];
     nbSendRequest = 0;
     nbRecvRequest = 0;
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         if (sentMessageSize[rank] > 0)
+        {
             MPI_Issend(sendBuffer2[rank], sentMessageSize[rank], MPI_CHAR, rank, 0, communicator, &sendRequest[nbSendRequest]);
             nbSendRequest++;
+        }
         if (recvMessageSize[rank] > 0)
+        {
             MPI_Irecv(recvBuffer2[rank], recvMessageSize[rank], MPI_CHAR, rank, 0, communicator, &recvRequest[nbRecvRequest]);
             nbRecvRequest++;
+        }
+    }
     MPI_Waitall(nbRecvRequest, recvRequest, status);
     MPI_Waitall(nbSendRequest, sendRequest, status);
     delete [] sendRequest;
     delete [] recvRequest;
     delete [] status;
     for (int rank = 0; rank < mpiSize; rank++)
+    {
         if (nbRecvNode[rank] > 0)
+        {
             if (sentMessageSize[rank] > 0)
                 delete [] sendBuffer2[rank];
+        }
         if (recvMessageSize[rank] > 0)
+        {
             unpackIntersection(elements, recvBuffer2[rank]);
             delete [] recvBuffer2[rank];
+        }
+    }
     delete [] sendBuffer2;
     delete [] recvBuffer2;
     delete [] sendBuffer;
     delete [] recvBuffer;
     delete [] nbSendNode;
     delete [] nbRecvNode;
     delete [] sentMessageSize;
     delete [] recvMessageSize;
+        int mpiSize, mpiRank;
+        MPI_Comm_size(communicator, &mpiSize);
+        MPI_Comm_rank(communicator, &mpiRank);
+        MPI_Barrier(communicator);
+        vector<Node> *routingList = new vector<Node>[mpiSize];
+        vector<Node> routeNodes;  routeNodes.reserve(nbElements);
+        for (int j = 0; j < nbElements; j++)
+        {
+                elements[j].id.ind = j;
+                elements[j].id.rank = mpiRank;
+                routeNodes.push_back(Node(elements[j].x, cptRadius(elements[j]), &elements[j]));
+        }
+        vector<vector<int> > routes(routeNodes.size());
+        sstree.routeIntersections(routes, routeNodes);
+        for (int i = 0; i < routes.size(); ++i)
+                for (int k = 0; k < routes[i].size(); ++k)
+                        routingList[routes[i][k]].push_back(routeNodes[i]);
+        if (verbose >= 2)
+        {
+                cout << " --> rank  " << mpiRank << " nbElements " << nbElements << " : ";
+                for (int rank = 0; rank < mpiSize; rank++)
+                        cout << routingList[rank].size() << "   ";
+                cout << endl;
+        }
+        MPI_Barrier(communicator);
+        int *nbSendNode = new int[mpiSize];
+        int *nbRecvNode = new int[mpiSize];
+        int *sentMessageSize = new int[mpiSize];
+        int *recvMessageSize = new int[mpiSize];
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                nbSendNode[rank] = routingList[rank].size();
+                sentMessageSize[rank] = 0;
+                for (size_t j = 0; j < routingList[rank].size(); j++)
+                {
+                        Elt *elt = (Elt *) (routingList[rank][j].data);
+                        sentMessageSize[rank] += packedPolygonSize(*elt);
+                }
+        }
+        MPI_Alltoall(nbSendNode, 1, MPI_INT, nbRecvNode, 1, MPI_INT, communicator);
+        MPI_Alltoall(sentMessageSize, 1, MPI_INT, recvMessageSize, 1, MPI_INT, communicator);
+        int total = 0;
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                total = total + nbRecvNode[rank];
+        }
+        if (verbose >= 2) cout << "---> rank " << mpiRank << " : compute intersection : total received nodes  " << total << endl;
+        char **sendBuffer = new char*[mpiSize];
+        char **recvBuffer = new char*[mpiSize];
+        int *pos = new int[mpiSize];
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbSendNode[rank] > 0) sendBuffer[rank] = new char[sentMessageSize[rank]];
+                if (nbRecvNode[rank] > 0) recvBuffer[rank] = new char[recvMessageSize[rank]];
+        }
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                pos[rank] = 0;
+                for (size_t j = 0; j < routingList[rank].size(); j++)
+                {
+                        Elt* elt = (Elt *) (routingList[rank][j].data);
+                        packPolygon(*elt, sendBuffer[rank], pos[rank]);
+                }
+        }
+        delete [] routingList;
+        int nbSendRequest = 0;
+        int nbRecvRequest = 0;
+        MPI_Request *sendRequest = new MPI_Request[mpiSize];
+        MPI_Request *recvRequest = new MPI_Request[mpiSize];
+        MPI_Status   *status = new MPI_Status[mpiSize];
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbSendNode[rank] > 0)
+                {
+                        MPI_Issend(sendBuffer[rank], sentMessageSize[rank], MPI_CHAR, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                }
+                if (nbRecvNode[rank] > 0)
+                {
+                        MPI_Irecv(recvBuffer[rank], recvMessageSize[rank], MPI_CHAR, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                }
+        }
+        MPI_Waitall(nbRecvRequest, recvRequest, status);
+        MPI_Waitall(nbSendRequest, sendRequest, status);
+        char **sendBuffer2 = new char*[mpiSize];
+        char **recvBuffer2 = new char*[mpiSize];
+        double tic = cputime();
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                sentMessageSize[rank] = 0;
+                if (nbRecvNode[rank] > 0)
+                {
+                        Elt *recvElt = new Elt[nbRecvNode[rank]];
+                        pos[rank] = 0;
+                        for (int j = 0; j < nbRecvNode[rank]; j++)
+                        {
+                                unpackPolygon(recvElt[j], recvBuffer[rank], pos[rank]);
+                                cptEltGeom(recvElt[j], tgtGrid.pole);
+                                Node recvNode(recvElt[j].x, cptRadius(recvElt[j]), &recvElt[j]);
+                                recvNode.search(sstree.localTree.root);
+                                /* for a node holding an element of the target, loop throught candidates for intersecting source */
+                                for (list<NodePtr>::iterator it = (recvNode.intersectors).begin(); it != (recvNode.intersectors).end(); ++it)
+                                {
+                                        Elt *elt2 = (Elt *) ((*it)->data);
+                                        /* recvElt is target, elt2 is source */
+//                                      intersect(&recvElt[j], elt2);
+                                        intersect_ym(&recvElt[j], elt2);
+                                }
+                                if (recvElt[j].is.size() > 0) sentMessageSize[rank] += packIntersectionSize(recvElt[j]);
+                                // here recvNode goes out of scope
+                        }
+                        if (sentMessageSize[rank] > 0)
+                        {
+                                sentMessageSize[rank] += sizeof(int);
+                                sendBuffer2[rank] = new char[sentMessageSize[rank]];
+                                *((int *) sendBuffer2[rank]) = 0;
+                                pos[rank] = sizeof(int);
+                                for (int j = 0; j < nbRecvNode[rank]; j++)
+                                {
+                                        packIntersection(recvElt[j], sendBuffer2[rank], pos[rank]);
+                                        //FIXME should be deleted: recvElt[j].delete_intersections(); // intersection areas have been packed to buffer and won't be used any more
+                                }
+                        }
+                        delete [] recvElt;
+                }
+        }
+        delete [] pos;
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbSendNode[rank] > 0) delete [] sendBuffer[rank];
+                if (nbRecvNode[rank] > 0) delete [] recvBuffer[rank];
+                nbSendNode[rank] = 0;
+        }
+        if (verbose >= 2) cout << "Rank " << mpiRank << "  Compute (internal) intersection " << cputime() - tic << " s" << endl;
+        MPI_Alltoall(sentMessageSize, 1, MPI_INT, recvMessageSize, 1, MPI_INT, communicator);
+        for (int rank = 0; rank < mpiSize; rank++)
+                if (recvMessageSize[rank] > 0)
+                        recvBuffer2[rank] = new char[recvMessageSize[rank]];
+        nbSendRequest = 0;
+        nbRecvRequest = 0;
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (sentMessageSize[rank] > 0)
+                {
+                        MPI_Issend(sendBuffer2[rank], sentMessageSize[rank], MPI_CHAR, rank, 0, communicator, &sendRequest[nbSendRequest]);
+                        nbSendRequest++;
+                }
+                if (recvMessageSize[rank] > 0)
+                {
+                        MPI_Irecv(recvBuffer2[rank], recvMessageSize[rank], MPI_CHAR, rank, 0, communicator, &recvRequest[nbRecvRequest]);
+                        nbRecvRequest++;
+                }
+        }
+  MPI_Waitall(nbRecvRequest, recvRequest, status);
+  MPI_Waitall(nbSendRequest, sendRequest, status);
+        delete [] sendRequest;
+        delete [] recvRequest;
+        delete [] status;
+        for (int rank = 0; rank < mpiSize; rank++)
+        {
+                if (nbRecvNode[rank] > 0)
+                {
+                        if (sentMessageSize[rank] > 0)
+                                delete [] sendBuffer2[rank];
+                }
+                if (recvMessageSize[rank] > 0)
+                {
+                        unpackIntersection(elements, recvBuffer2[rank]);
+                        delete [] recvBuffer2[rank];
+                }
+        }
+        delete [] sendBuffer2;
+        delete [] recvBuffer2;
+        delete [] sendBuffer;
+        delete [] recvBuffer;
+        delete [] nbSendNode;
+        delete [] nbRecvNode;
+        delete [] sentMessageSize;
+        delete [] recvMessageSize;
+}
 Mapper::~Mapper()
+{
     delete [] remapMatrix;
     delete [] srcAddress;
     delete [] srcRank;
     delete [] dstAddress;
     if (neighbourElements) delete [] neighbourElements;
+}
+}
+        delete [] remapMatrix;
+        delete [] srcAddress;
+        delete [] srcRank;
+        delete [] dstAddress;
+        if (neighbourElements) delete [] neighbourElements;
+}
+}

XIOS/dev/branch_openmp/extern/remap/src/mapper.hpp

-                      r1134
+                      r1328
 #include "parallel_tree.hpp"
 #include "mpi.hpp"
-#ifdef _usingEP
-#include "ep_declaration.hpp"
-#endif
 namespace sphereRemap {
 …
+{
 public:
        Mapper(ep_lib::MPI_Comm comm=MPI_COMM_WORLD) : communicator(comm), verbose(SILENT), neighbourElements(NULL), sstree(comm) {}
+       Mapper(ep_lib::MPI_Comm comm) : communicator(comm), verbose(SILENT), neighbourElements(NULL), sstree(comm) {}
        ~Mapper();
        void setVerbosity(verbosity v) {verbose=v ;}

XIOS/dev/branch_openmp/extern/remap/src/mpi_cascade.cpp

r688	r1328
1	1	#include "mpi_cascade.hpp"
2	2	#include <iostream>
	3	using namespace ep_lib;
3	4
4	5	namespace sphereRemap {

XIOS/dev/branch_openmp/extern/remap/src/mpi_cascade.hpp

-                      r694
+                      r1328
+{
 public:
         CCascadeLevel(MPI_Comm comm) : comm(comm)
+        {
                 MPI_Comm_size(comm, &size);
                 MPI_Comm_rank(comm, &rank);
+        }
         int colour() const { return rank % group_size; };
         int key() const { return p_colour() + rank/(p_grp_size*group_size)*p_grp_size; }
+  CCascadeLevel(ep_lib::MPI_Comm comm) : comm(comm)
+  {
+    ep_lib::MPI_Comm_size(comm, &size);
+    ep_lib::MPI_Comm_rank(comm, &rank);
+  }
+  int colour() const { return rank % group_size; };
+  int key() const { return p_colour() + rank/(p_grp_size*group_size)*p_grp_size; }
         // perpendicular group
         int p_colour() const { return (rank%group_size + rank/group_size) % p_grp_size; }
         int p_key() const { return colour() + rank/(p_grp_size*group_size)*group_size; }
+  // perpendicular group
+  int p_colour() const { return (rank%group_size + rank/group_size) % p_grp_size; }
+  int p_key() const { return colour() + rank/(p_grp_size*group_size)*group_size; }
         MPI_Comm comm, pg_comm;
         int rank;
         int size;
         int group_size; // group_size and p_grp_size are interchanged?? FIXME
         int p_grp_size;
+  ep_lib::MPI_Comm comm, pg_comm;
+  int rank;
+  int size;
+  int group_size; // group_size and p_grp_size are interchanged?? FIXME
+  int p_grp_size;
 };
 …
+{
 public:
+        //
+        CMPICascade(int nodes_per_level, MPI_Comm comm);
+  CMPICascade(int nodes_per_level, ep_lib::MPI_Comm comm);
         int num_levels;
         std::vector<CCascadeLevel> level;
+  int num_levels;
+  std::vector<CCascadeLevel> level;
 };

XIOS/dev/branch_openmp/extern/remap/src/mpi_routing.cpp

-                      r1289
+                      r1328
 #include "timerRemap.hpp"
 #include <iostream>
+#ifdef _usingEP
+#include "ep_declaration.hpp"
+#endif
+using namespace ep_lib;
 namespace sphereRemap {
 …
         CTimer::get("CMPIRouting::init(reduce_scatter)").print();
-        MPI_Info info_null;
-        // MPI_Alloc_mem(nbTarget *sizeof(int), info_null, &targetRank);
-        // MPI_Alloc_mem(nbSource *sizeof(int), info_null, &sourceRank);
         MPI_Alloc_mem(nbTarget *sizeof(int), MPI_INFO_NULL, &targetRank);
         MPI_Alloc_mem(nbSource *sizeof(int), MPI_INFO_NULL, &sourceRank);
 …
+        {
                 #ifdef _usingEP
                 MPI_Irecv(&sourceRank[i], 1, MPI_INT, -1, 0, communicator, &request[indexRequest]);
+                MPI_Irecv(&sourceRank[i], 1, MPI_INT, -2, 0, communicator, &request[indexRequest]);
                 #else
                 MPI_Irecv(&sourceRank[i], 1, MPI_INT, MPI_ANY_SOURCE, 0, communicator, &request[indexRequest]);
 …
+        {
                 #ifdef _usingEP
                 MPI_Irecv(&sourceRank[i], 1, MPI_INT, -1, 0, communicator, &request[indexRequest]);
+                MPI_Irecv(&sourceRank[i], 1, MPI_INT, -2, 0, communicator, &request[indexRequest]);
                 #else
                 MPI_Irecv(&sourceRank[i], 1, MPI_INT, MPI_ANY_SOURCE, 0, communicator, &request[indexRequest]);

XIOS/dev/branch_openmp/extern/remap/src/mpi_routing.hpp

-                      r694
+                      r1328
+{
         MPI_Comm communicator;
+        ep_lib::MPI_Comm communicator;
         int mpiRank;
         int mpiSize;
 …
 public:
         CMPIRouting(MPI_Comm comm);
+        CMPIRouting(ep_lib::MPI_Comm comm);
         ~CMPIRouting();
         template<typename T> void init(const std::vector<T>& route, CMPICascade *cascade = NULL);
 …
 template <typename T>
 void alltoalls_known(const std::vector<std::vector<T> >& send, std::vector<std::vector<T> >& recv,
                      const std::vector<int>& ranks, MPI_Comm communicator);
+                     const std::vector<int>& ranks, ep_lib::MPI_Comm communicator);
 template <typename T>
 void alltoalls_unknown(const std::vector<std::vector<T> >& send, std::vector<std::vector<T> >& recv,
                        const std::vector<int>& ranks, MPI_Comm communicator);
+                       const std::vector<int>& ranks, ep_lib::MPI_Comm communicator);
+}
 #endif

XIOS/dev/branch_openmp/extern/remap/src/node.cpp

-                      r1205
+                      r1328
 NodePtr insert(NodePtr thIs, NodePtr node)
+{
   int la = thIs->level; // node to be inserted
   int lb = node->level; // node where insertation
   assert(la < lb); // node to be inserted must have lower level then parent
   //if (thIs->parent) assert(find_in_tree1(thIs) == true);
   NodePtr q = NULL;
   NodePtr chd = NULL;
   node->move(thIs);
   if (la == lb - 1)
+  {
+        int la = thIs->level; // node to be inserted
+        int lb = node->level; // node where insertation
+        assert(la < lb); // node to be inserted must have lower level then parent
+        //if (thIs->parent) assert(find_in_tree1(thIs) == true);
+        NodePtr q = NULL;
+        NodePtr chd = NULL;
+        node->move(thIs);
+        if (la == lb - 1)
+        {
     node->child.push_back(thIs);
     thIs->parent = node;
     if (node->child.size() > MAX_NODE_SZ &&  node->tree->canSplit() ) // with us as additional child `node` is now too large :(
     return (node->reinserted || node->parent == NULL) ? split(node) : reinsert(node);
+  }
   else // la < lb - 1
+  {
     chd = thIs->closest(node->child);
     q = insert(thIs, chd);
+  }
   if ((node->updateCount + 1) % UPDATE_EVERY == 0)
     node->update();
   else
+  {
     if (q) node->remove(q);
     node->inflate(chd);
+  }
+                thIs->parent = node;
+                if (node->child.size() > MAX_NODE_SZ &&  node->tree->canSplit() ) // with us as additional child `node` is now too large :(
+                        return (node->reinserted || node->parent == NULL) ? split(node) : reinsert(node);
+        }
+        else // la < lb - 1
+        {
+                chd = thIs->closest(node->child);
+                q = insert(thIs, chd);
+        }
+        if ((node->updateCount + 1) % UPDATE_EVERY == 0)
+                node->update();
+        else
+        {
+                if (q) node->remove(q);
+                node->inflate(chd);
+        }
   return q;

XIOS/dev/branch_openmp/extern/remap/src/node.hpp

-                      r1153
+                      r1328
 struct Circle
+{
   Coord centre;
   double radius;
+        Coord centre;
+        double radius;
 };
 …
 struct Node
+{
   int level; /* FIXME leafs are 0 and root is max level? */
   int leafCount; /* number of leafs that are descendants of this node (the elements in it's cycle) */
   Coord centre;
   double radius;
   NodePtr parent, ref;
   std::vector<NodePtr> child;
   std::list<NodePtr> intersectors;
   bool reinserted;
   int updateCount;  // double var;
   CBasicTree* tree;
   void *data;
   int route;
+        int level; /* FIXME leafs are 0 and root is max level? */
+        int leafCount; /* number of leafs that are descendants of this node (the elements in it's cycle) */
+        Coord centre;
+        double radius;
+        NodePtr parent, ref;
+        std::vector<NodePtr> child;
+        std::list<NodePtr> intersectors;
+        bool reinserted;
+        int updateCount;  // double var;
+        CBasicTree* tree;
+        void *data;
+        int route;
   bool toDelete ;
   Node() : level(0), leafCount(1), centre(ORIGIN), radius(0), reinserted(false), updateCount(0), toDelete(false) {}
   Node(const Coord& centre, double radius, void *data)
     : level(0), leafCount(1), centre(centre), radius(radius), reinserted(false), updateCount(0), data(data), toDelete(false) {}
+        Node() : level(0), leafCount(1), centre(ORIGIN), radius(0), reinserted(false), updateCount(0), toDelete(false) {}
+        Node(const Coord& centre, double radius, void *data)
+                : level(0), leafCount(1), centre(centre), radius(radius), reinserted(false), updateCount(0), data(data), toDelete(false) {}
 //#ifdef DEBUG
 …
 //#endif
   void move(const NodePtr node);
   void remove(const NodePtr node);
   void inflate(const NodePtr node);
   void update();
+        void move(const NodePtr node);
+        void remove(const NodePtr node);
+        void inflate(const NodePtr node);
+        void update();
   void output(std::ostream& flux, int level, int color) ;
   NodePtr closest(std::vector<NodePtr>& list, int n = CLOSEST);
   NodePtr farthest(std::vector<NodePtr>& list);
   void findClosest(int level, NodePtr src, double& minDist, NodePtr &closest);
   void search(NodePtr node);
   bool centreInside(Node &node);
   bool intersects(NodePtr node);
   bool isInside(Node &node);
   int incluCheck();
+        NodePtr closest(std::vector<NodePtr>& list, int n = CLOSEST);
+        NodePtr farthest(std::vector<NodePtr>& list);
+        void findClosest(int level, NodePtr src, double& minDist, NodePtr &closest);
+        void search(NodePtr node);
+        bool centreInside(Node &node);
+        bool intersects(NodePtr node);
+        bool isInside(Node &node);
+        int incluCheck();
   void checkParent(void) ;
   void printChildren();
   void assignRoute(std::vector<int>::iterator& rank, int level);
   void assignCircleAndPropagateUp(Coord *centres, double *radia, int level);
   void printLevel(int level);
   void routeNode(NodePtr node, int level);
   void routingIntersecting(std::vector<Node>* routingList, NodePtr node);
   void routeIntersection(std::vector<int>& routes, NodePtr node);
+        void printChildren();
+        void assignRoute(std::vector<int>::iterator& rank, int level);
+        void assignCircleAndPropagateUp(Coord *centres, double *radia, int level);
+        void printLevel(int level);
+        void routeNode(NodePtr node, int level);
+        void routingIntersecting(std::vector<Node>* routingList, NodePtr node);
+        void routeIntersection(std::vector<int>& routes, NodePtr node);
   void getNodeLevel(int level,std::list<NodePtr>& NodeList) ;
   bool removeDeletedNodes(int assignLevel) ;

XIOS/dev/branch_openmp/extern/remap/src/parallel_tree.cpp

-                      r1295
+                      r1328
 #include "parallel_tree.hpp"
+using namespace ep_lib;
 namespace sphereRemap {
-extern CRemapGrid srcGrid;
-#pragma omp threadprivate(srcGrid)
-extern CRemapGrid tgtGrid;
-#pragma omp threadprivate(tgtGrid)
 static const int assignLevel = 2;
 …
+{
   int assignLevel = 2;
   int nbSampleNodes = 2*ipow(MAX_NODE_SZ + 1, assignLevel);
+        int assignLevel = 2;
+        int nbSampleNodes = 2*ipow(MAX_NODE_SZ + 1, assignLevel);
 …
   MPI_Comm_size(communicator,&commSize) ;
   // make multiple of two
   nbSampleNodes /= 2;
   nbSampleNodes *= 2;
   //assert( nbTot > nbSampleNodes*commSize) ;
+        // make multiple of two
+        nbSampleNodes /= 2;
+        nbSampleNodes *= 2;
+//  assert( nbTot > nbSampleNodes*commSize) ;
   int nbSampleNodes1 = nbSampleNodes * (nb1*commSize)/(1.*nbTot) ;
 …
   //assert(node.size() > nbSampleNodes);
   //assert(node2.size() > nbSampleNodes);
   //assert(node.size() + node2.size() > nbSampleNodes);
   vector<Node> sampleNodes; sampleNodes.reserve(nbSampleNodes1+nbSampleNodes2);
   vector<int> randomArray1(node.size());
   randomizeArray(randomArray1);
   vector<int> randomArray2(node2.size());
   randomizeArray(randomArray2);
   for (int i = 0; i <nbSampleNodes1; i++) sampleNodes.push_back(Node(node[randomArray1[i%nb1]].centre,  node[randomArray1[i%nb1]].radius, NULL));
   for (int i = 0; i <nbSampleNodes2; i++) sampleNodes.push_back(Node(node2[randomArray2[i%nb2]].centre, node2[randomArray2[i%nb2]].radius, NULL));
+//      assert(node.size() > nbSampleNodes);
+//      assert(node2.size() > nbSampleNodes);
+//      assert(node.size() + node2.size() > nbSampleNodes);
+        vector<Node> sampleNodes; sampleNodes.reserve(nbSampleNodes1+nbSampleNodes2);
+        vector<int> randomArray1(node.size());
+        randomizeArray(randomArray1);
+        vector<int> randomArray2(node2.size());
+        randomizeArray(randomArray2);
+        for (int i = 0; i <nbSampleNodes1; i++) sampleNodes.push_back(Node(node[randomArray1[i%nb1]].centre,  node[randomArray1[i%nb1]].radius, NULL));
+        for (int i = 0; i <nbSampleNodes2; i++) sampleNodes.push_back(Node(node2[randomArray2[i%nb2]].centre, node2[randomArray2[i%nb2]].radius, NULL));
         CTimer::get("buildParallelSampleTree").resume();
 …
         CTimer::get("parallelRouteNode").resume();
         vector<int> route(node.size());
+        cout<<"node.size = "<<node.size()<<endl;
         routeNodes(route /*out*/, node);
         CTimer::get("parallelRouteNode").suspend();

XIOS/dev/branch_openmp/extern/remap/src/parallel_tree.hpp

-                      r1134
+                      r1328
 #include "mpi_cascade.hpp"
 #include "mpi.hpp"
-#ifdef _usingEP
-#include "ep_declaration.hpp"
-#endif
 namespace sphereRemap {
 …
+{
 public:
         CParallelTree(ep_lib::MPI_Comm comm);
         ~CParallelTree();
+  CParallelTree(ep_lib::MPI_Comm comm);
+  ~CParallelTree();
         void build(vector<Node>& node, vector<Node>& node2);
+  void build(vector<Node>& node, vector<Node>& node2);
         void routeNodes(vector<int>& route, vector<Node>& nodes, int level = 0);
         void routeIntersections(vector<vector<int> >& route, vector<Node>& nodes, int level = 0);
+  void routeNodes(vector<int>& route, vector<Node>& nodes, int level = 0);
+  void routeIntersections(vector<vector<int> >& route, vector<Node>& nodes, int level = 0);
         int nbLocalElements;
         Elt* localElements;
+  int nbLocalElements;
+  Elt* localElements;
         CTree localTree;
+  CTree localTree;
 private:
         void updateCirclesForRouting(Coord rootCentre, double rootRadius, int level = 0);
         void buildSampleTreeCascade(vector<Node>& sampleNodes, int level = 0);
         void buildLocalTree(const vector<Node>& node, const vector<int>& route);
         void buildRouteTree();
+  void updateCirclesForRouting(Coord rootCentre, double rootRadius, int level = 0);
+  void buildSampleTreeCascade(vector<Node>& sampleNodes, int level = 0);
+  void buildLocalTree(const vector<Node>& node, const vector<int>& route);
+  void buildRouteTree();
         //CSampleTree sampleTree;
         vector<CSampleTree> treeCascade; // first for sample tree, then for routing tree
         CMPICascade cascade;
         ep_lib::MPI_Comm communicator ;
+  //CSampleTree sampleTree;
+  vector<CSampleTree> treeCascade; // first for sample tree, then for routing tree
+  CMPICascade cascade;
+  ep_lib::MPI_Comm communicator ;
 };

XIOS/dev/branch_openmp/extern/remap/src/polyg.cpp

-                      r1289
+                      r1328
 #include "polyg.hpp"
-#include <stdio.h>
 namespace sphereRemap {
 …
 Coord barycentre(const Coord *x, int n)
+{
         if (n == 0) return ORIGIN;
         Coord bc = ORIGIN;
         for (int i = 0; i < n; i++)
+        {
                 bc = bc + x[i];
+        }
         /* both distances can be equal down to roundoff when norm(bc) < mashineepsilon
            which can occur when weighted with tiny area */
+  //assert(squaredist(bc, proj(bc)) <= squaredist(bc, proj(bc * (-1.0))));
+        assert(squaredist(bc, proj(bc)) <= squaredist(bc, proj(bc * (-1.0))));
+        //if (squaredist(bc, proj(bc)) > squaredist(bc, proj(bc * (-1.0)))) return proj(bc * (-1.0));
         return proj(bc);
 …
+{
         if (N < 3)
                 return 0; /* polygons with less than three vertices have zero area */
+          return 0; /* polygons with less than three vertices have zero area */
         Coord t[3];
         t[0] = barycentre(x, N);
 …
                 t[1] = x[i];
                 t[2] = x[ii];
     double sc=scalarprod(crossprod(t[1] - t[0], t[2] - t[0]), t[0]) ;
+                double sc=scalarprod(crossprod(t[1] - t[0], t[2] - t[0]), t[0]) ;
                 assert(sc >= -1e-10); // Error: tri a l'env (wrong orientation)
                 double area_gc = triarea(t[0], t[1], t[2]);
-                //if(area_gc<=0) printf("area_gc = %e\n", area_gc);
                 double area_sc_gc_moon = 0;
                 if (d[i]) /* handle small circle case */
 …
                         char sgl = (mext > 0) ? -1 : 1;
                         area_sc_gc_moon = sgl * alun(arcdist(t[1], t[2]), fabs(scalarprod(t[1], pole)));
-                        //if(area_sc_gc_moon<=0) printf("area_sc_gc_moon = %e\n", area_sc_gc_moon);
                         gg_exact = gg_exact + sc_gc_moon_normalintegral(t[1], t[2], pole);
+                }
                 area += area_gc + area_sc_gc_moon; /* for "spherical circle segment" sum triangular part (at) and "small moon" and => account for small circle */
                 g[i] = barycentre(t, 3) * (area_gc + area_sc_gc_moon);
-                //printf("g[%d] = (%e,%e,%e) * (%e+%e) = (%e,%e,%e) norm = %e\n", i, barycentre(t, 3).x, barycentre(t, 3).y, barycentre(t, 3).z, area_gc,  area_sc_gc_moon, g[i].x, g[i].y, g[i].z, norm(g[i]));
+        }
         gg = barycentre(g, N);

XIOS/dev/branch_openmp/extern/remap/src/timerRemap.cpp

-                      r1146
+                      r1328
 #include <map>
 #include <iostream>
+using namespace ep_lib;
 namespace sphereRemap {
 …
 using namespace std;
+map<string,CTimer*> *CTimer::allTimer = 0;
+//map<string,CTimer*> CTimer::allTimer;
+map<string,CTimer*> *CTimer::allTimer_ptr = 0;
 CTimer::CTimer(const string& name_) : name(name_)
 …
 CTimer& CTimer::get(const string name)
+{
-        if(allTimer == 0) allTimer = new map<string,CTimer*>;
         map<string,CTimer*>::iterator it;
+        it=(*allTimer).find(name);
+        if (it==(*allTimer).end()) it=(*allTimer).insert(pair<string,CTimer*>(name,new CTimer(name))).first;
+        if(allTimer_ptr == 0) allTimer_ptr = new map<string,CTimer*>;
+        //it=allTimer.find(name);
+        it=allTimer_ptr->find(name);
+        //if (it==allTimer.end()) it=allTimer.insert(pair<string,CTimer*>(name,new CTimer(name))).first;
+        if (it==allTimer_ptr->end()) it=allTimer_ptr->insert(pair<string,CTimer*>(name,new CTimer(name))).first;
         return *(it->second);
+}

XIOS/dev/branch_openmp/extern/remap/src/timerRemap.hpp

-                      r1146
+                      r1328
     void print(void);
     //static map<string,CTimer*> allTimer;
+    static map<string,CTimer*> *allTimer;
+    #pragma omp threadprivate(allTimer)
+    static map<string,CTimer*> *allTimer_ptr;
     static double getTime(void);
     static CTimer& get(string name);

XIOS/dev/branch_openmp/extern/remap/src/tree.cpp

-                      r1172
+                      r1328
 void CBasicTree::routeNodes(vector<int>& route, vector<Node>& nodes, int assignLevel)
+{
   for (int i = 0; i < nodes.size(); i++)
+  {
     root->routeNode(&nodes[i], assignLevel);
     route[i] = nodes[i].route;
+  }
+        for (int i = 0; i < nodes.size(); i++)
+        {
+                root->routeNode(&nodes[i], assignLevel);
+                route[i] = nodes[i].route;
+        }
+}
 void CBasicTree::routeIntersections(vector<vector<int> >& routes, vector<Node>& nodes)
+{
   for (int i = 0; i < nodes.size(); i++)
     root->routeIntersection(routes[i], &nodes[i]);
+        for (int i = 0; i < nodes.size(); i++)
+                root->routeIntersection(routes[i], &nodes[i]);
+}
 void CBasicTree::build(vector<Node>& nodes)
+{
   newRoot(1);
   insertNodes(nodes);
+        newRoot(1);
+        insertNodes(nodes);
+}
 void CBasicTree::output(ostream& flux, int level)
+{
   root->output(flux,level,0) ;
+        root->output(flux,level,0) ;
+}
 void CBasicTree::slim(int nbIts)
+{
   for (int i = 0; i < nbIts; i++)
+  {
     for (int level = root->level - 1; level > 0; level--)
+    {
       slim2(root, level);
       ri = 0;
       emptyPool();
+    }
     for (int level = 2; level < root->level; level++)
+    {
       slim2(root, level);
       ri = 0;
       emptyPool();
+    }
+  }
+        for (int i = 0; i < nbIts; i++)
+        {
+                for (int level = root->level - 1; level > 0; level--)
+                {
+                        slim2(root, level);
+                        ri = 0;
+                        emptyPool();
+                }
+                for (int level = 2; level < root->level; level++)
+                {
+                        slim2(root, level);
+                        ri = 0;
+                        emptyPool();
+                }
+        }
+}
 …
 void CBasicTree::insertNode(NodePtr node)
+{
   node->tree = this;
   increaseLevelSize(0);
   push_back(node);
   NodePtr q;
   while (pool.size())
+  {
     q = pool.front();
     pool.pop_front();
     q = insert(q, root);
     if (ri)
+    {
       delete q;
       ri = 0;
+    }
+  }
+        node->tree = this;
+        increaseLevelSize(0);
+        push_back(node);
+        NodePtr q;
+        while (pool.size())
+        {
+                q = pool.front();
+                pool.pop_front();
+                q = insert(q, root);
+                if (ri)
+                {
+                        delete q;
+                        ri = 0;
+                }
+        }
+}
 void CBasicTree::emptyPool(void)
+{
   while (pool.size())
+  {
     NodePtr q = pool.front();
     pool.pop_front();
     q = insert(q, root);
     if (ri)
+    {
       delete q;
       ri = 0;
+    }
+  }
+        while (pool.size())
+        {
+                NodePtr q = pool.front();
+                pool.pop_front();
+                q = insert(q, root);
+                if (ri)
+                {
+                        delete q;
+                        ri = 0;
+                }
+        }
+}
 …
         root->parent = 0;
         root->leafCount = 0;
+        // initialize root node on the sphere
+        root->centre.x=1 ;
+        root->centre.y=0 ;
+        root->centre.z=0 ;
+// initialize root node on the sphere
+  root->centre.x=1 ; root->centre.y=0 ; root->centre.z=0 ;
         root->radius = 0.;
         root->reinserted = false;

XIOS/dev/branch_openmp/extern/remap/src/tree.hpp

-                      r1172
+                      r1328
 class CBasicTree
+{
   public:
+public:
   NodePtr root; /* The main tree is stored as Nodes which can be reached through traversal starting here */
   NodePtr ref; // FIXME this reference, set by a node is odd, try to remove
   int ri; /** this is set to one by a node in case of reinsertion */
   vector<int> levelSize; /** e.g. levelSize[0] == leafs.size() */
   vector<Node> leafs; /** leafs are stored in vector for easy access and rest of the tree nodes as separate allocations, only reachable through tree traversal */
+        NodePtr root; /* The main tree is stored as Nodes which can be reached through traversal starting here */
+        NodePtr ref; // FIXME this reference, set by a node is odd, try to remove
+        int ri; /** this is set to one by a node in case of reinsertion */
+        vector<int> levelSize; /** e.g. levelSize[0] == leafs.size() */
+        vector<Node> leafs; /** leafs are stored in vector for easy access and rest of the tree nodes as separate allocations, only reachable through tree traversal */
   CBasicTree() : ri(0), levelSize(MAX_LEVEL_SIZE), root(NULL), isAssignedLevel(false), okSplit(true), isActiveOkSplit(false) {}
   ~CBasicTree();
   void build(vector<Node>& nodes);
   void slim(int nbIts = 1);
   virtual void insertNodes(vector<Node>& node) = 0;
+        CBasicTree() : ri(0), levelSize(MAX_LEVEL_SIZE), root(NULL), isAssignedLevel(false), okSplit(true), isActiveOkSplit(false) {}
+        ~CBasicTree();
+        void build(vector<Node>& nodes);
+        void slim(int nbIts = 1);
+        virtual void insertNodes(vector<Node>& node) = 0;
   void routeNodes(vector<int>& route, vector<Node>& nodes, int assignLevel);
   void routeIntersections(vector<vector<int> >& route, vector<Node>& nodes);
+        void routeNodes(vector<int>& route, vector<Node>& nodes, int assignLevel);
+        void routeIntersections(vector<vector<int> >& route, vector<Node>& nodes);
   void push_back(NodePtr node);
   void push_front(NodePtr node);
   void increaseLevelSize(int level);
   void decreaseLevelSize(int level);
   void newRoot(int level);
   void insertNode(NodePtr node);
+        void push_back(NodePtr node);
+        void push_front(NodePtr node);
+        void increaseLevelSize(int level);
+        void decreaseLevelSize(int level);
+        void newRoot(int level);
+        void insertNode(NodePtr node);
   void output(ostream& flux, int level) ;
   int keepNodes;
+        int keepNodes;
   bool isAssignedLevel ;
   int assignLevel;
 …
   private:
   deque<NodePtr > pool;
+private:
+        deque<NodePtr > pool;
   bool okSplit ;
   protected:
+protected:
   void emptyPool();
+  CBasicTree(int keepNodes_, int assignLevel_) : ri(0), levelSize(MAX_LEVEL_SIZE), root(NULL), keepNodes(keepNodes_), assignLevel(assignLevel_), isAssignedLevel(true),
+                                                 okSplit(true), isActiveOkSplit(false) {}
+  CBasicTree(int keepNodes_, int assignLevel_) : ri(0), levelSize(MAX_LEVEL_SIZE), root(NULL), keepNodes(keepNodes_), assignLevel(assignLevel_), isAssignedLevel(true), okSplit(true), isActiveOkSplit(false) {}
 };
 class CTree : public CBasicTree
+{
   public:
   void insertNodes(vector<Node>& nodes);
+public:
+        void insertNodes(vector<Node>& nodes);
 };
 …
+{
   public:
   CSampleTree(int keepNodes_, int assignLevel_) : CBasicTree(keepNodes_,assignLevel_) {}
+public:
+        CSampleTree(int keepNodes_, int assignLevel_) : CBasicTree(keepNodes_,assignLevel_) {}
   void slimAssignedLevel() ;
   void removeExtraNode(void) ;
   void insertNodes(vector<Node>& nodes);
+        void insertNodes(vector<Node>& nodes);
 };

XIOS/dev/branch_openmp/extern/src_ep_dev/ep_fortran.cpp

r1287	r1328
31	31	{
32	32	fc_comm_map.insert(std::make_pair( std::make_pair( fint, omp_get_thread_num()) , comm));
33		//printf("EP_Comm_c2f : MAP %p insert: %d, %d, %p\n", &fc_comm_map, fint, omp_get_thread_num(), comm.ep_comm_ptr);
	33	printf("EP_Comm_c2f : MAP %p insert: %d, %d, %p\n", &fc_comm_map, fint, omp_get_thread_num(), comm.ep_comm_ptr);
34	34	}
35	35	}
…	…
54	54	MPI_Comm comm_ptr;
55	55	comm_ptr = it->second;
56		//printf("EP_Comm_f2c : MAP %p find: %d, %d, %p\n", &fc_comm_map, it->first.first, it->first.second, comm_ptr.ep_comm_ptr);
	56	printf("EP_Comm_f2c : MAP %p find: %d, %d, %p\n", &fc_comm_map, it->first.first, it->first.second, comm_ptr.ep_comm_ptr);
57	57	return comm_ptr;
58	58	}

XIOS/dev/branch_openmp/extern/src_ep_dev/ep_intercomm.cpp

-                      r1287
+                      r1328
       MPI_Waitall(2, request, status);
+      //MPI_Send(&leader_ranks[0], 3, static_cast< ::MPI_Datatype>(MPI_INT), remote_leader, tag, peer_comm);
+      //MPI_Recv(&leader_ranks[3], 3, static_cast< ::MPI_Datatype>(MPI_INT), remote_leader, tag, peer_comm, &status[1]);
+    }

XIOS/dev/branch_openmp/extern/src_ep_dev/ep_lib.cpp

-                      r1295
+                      r1328
 #pragma omp threadprivate(EP_PendingRequests)
 namespace ep_lib
+{
+  bool MPI_Comm::is_null()
+  {
+    if(!this->is_intercomm)
+      return this->mpi_comm == MPI_COMM_NULL.mpi_comm;
+    else
+      return this->ep_comm_ptr->intercomm->mpi_inter_comm == MPI_COMM_NULL.mpi_comm;
+  }
   int tag_combine(int real_tag, int src, int dest)

XIOS/dev/branch_openmp/extern/src_ep_dev/ep_send.cpp

-                      r1295
+                      r1328
     if(!comm.is_ep)
       return ::MPI_Send(buf, count, to_mpi_type(datatype), dest, tag, to_mpi_comm(comm.mpi_comm));
+    MPI_Request request;
+    MPI_Status status;
+    MPI_Isend(buf, count, datatype, dest, tag, comm, &request);
+    MPI_Wait(&request, &status);
+    if(comm.is_intercomm)
+    {
+      MPI_Request request;
+      MPI_Status status;
+      MPI_Isend(buf, count, datatype, dest, tag, comm, &request);
+      MPI_Wait(&request, &status);
+    }
+    else
+    {
+      int ep_src_loc  = comm.ep_comm_ptr->size_rank_info[1].first;
+      int ep_dest_loc = comm.ep_comm_ptr->comm_list->rank_map->at(dest).first;
+      int mpi_tag     = tag_combine(tag, ep_src_loc, ep_dest_loc);
+      int mpi_dest    = comm.ep_comm_ptr->comm_list->rank_map->at(dest).second;
+      ::MPI_Send(buf, count, to_mpi_type(datatype), mpi_dest, mpi_tag, to_mpi_comm(comm.mpi_comm));
+      //printf("call mpi_send for intracomm, dest = %d, tag = %d\n", dest, tag);
+    }
     //check_sum_send(buf, count, datatype, dest, tag, comm);

XIOS/dev/branch_openmp/extern/src_ep_dev/ep_type.hpp

r1295	r1328
344	344	}
345	345
	346	bool is_null();
	347
346	348	};
347	349

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 1328 for XIOS/dev/branch_openmp/extern

Legend:

Download in other formats: