Ignore:
Timestamp:
06/01/18 19:36:12 (6 years ago)
Author:
yushan
Message:

optimisation partially finished. To do : special case for intercomm_create and intercomm_merge

Location:
XIOS/dev/branch_openmp/extern/ep_dev
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • XIOS/dev/branch_openmp/extern/ep_dev/ep_dup.cpp

    r1517 r1518  
    8585        printf("new out_comm[%d]->ep_comm_ptr->intercomm = %p\n", i, out_comm[i]->ep_comm_ptr->intercomm); 
    8686#endif 
    87         out_comm[i]->ep_comm_ptr->intercomm->mpi_inter_comm = mpi_inter_comm;       
     87        out_comm[i]->ep_comm_ptr->intercomm->mpi_inter_comm = mpi_inter_comm; 
    8888      } 
    8989 
     
    104104    (*newcomm)->ep_comm_ptr->intercomm->size_rank_info[2] = comm->ep_comm_ptr->intercomm->size_rank_info[2]; 
    105105 
     106    (*newcomm)->ep_comm_ptr->intercomm->intercomm_tag = comm->ep_comm_ptr->intercomm->intercomm_tag; 
     107 
    106108 
    107109    int ep_rank_loc = (*newcomm)->ep_comm_ptr->size_rank_info[1].first; 
     
    109111    if(ep_rank_loc == 0) 
    110112    { 
    111       //printf("in dup , ep_rank_loc = 0 :  ep %d\n", (*newcomm)->ep_comm_ptr->size_rank_info[0].first); 
     113      int world_rank; 
     114      MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); 
     115 
    112116      (*newcomm)->ep_comm_ptr->intercomm->intercomm_rank_map = new INTERCOMM_RANK_MAP; 
    113117      (*newcomm)->ep_comm_ptr->intercomm->local_rank_map = new EP_RANK_MAP; 
    114118 
    115119      *(*newcomm)->ep_comm_ptr->intercomm->intercomm_rank_map = *comm->ep_comm_ptr->intercomm->intercomm_rank_map; 
    116       *(*newcomm)->ep_comm_ptr->intercomm->local_rank_map = *comm->ep_comm_ptr->intercomm->local_rank_map; 
     120      *(*newcomm)->ep_comm_ptr->intercomm->local_rank_map     = *comm->ep_comm_ptr->intercomm->local_rank_map; 
    117121    } 
    118122 
     
    121125    if(ep_rank_loc !=0 ) 
    122126    { 
    123       (*newcomm)->ep_comm_ptr->intercomm->intercomm_rank_map = (*newcomm)->ep_comm_ptr->comm_list[0]->ep_comm_ptr->intercomm->intercomm_rank_map; 
    124       (*newcomm)->ep_comm_ptr->intercomm->local_rank_map = (*newcomm)->ep_comm_ptr->comm_list[0]->ep_comm_ptr->intercomm->local_rank_map; 
     127      int target = (*newcomm)->ep_comm_ptr->intercomm->intercomm_tag; 
     128      (*newcomm)->ep_comm_ptr->intercomm->intercomm_rank_map = (*newcomm)->ep_comm_ptr->comm_list[target]->ep_comm_ptr->intercomm->intercomm_rank_map;  
     129      (*newcomm)->ep_comm_ptr->intercomm->local_rank_map     = (*newcomm)->ep_comm_ptr->comm_list[target]->ep_comm_ptr->intercomm->local_rank_map; 
    125130    } 
    126131 
  • XIOS/dev/branch_openmp/extern/ep_dev/ep_intercomm.hpp

    r1517 r1518  
    3232 
    3333 
    34     int intercomm_tag; 
     34    int intercomm_tag;                       // newcomm_ep_rank_loc of ep with ep_rank_loc==0  
     35                                             // used in comm_dup_intercomm 
    3536     
    3637  }; 
  • XIOS/dev/branch_openmp/extern/ep_dev/ep_intercomm_kernel.cpp

    r1517 r1518  
    554554        { 
    555555          target = i; 
     556          (*newintercomm)->ep_comm_ptr->intercomm->intercomm_tag = target; 
    556557          break; 
    557558        } 
     
    559560      (*newintercomm)->ep_comm_ptr->intercomm->intercomm_rank_map = (*newintercomm)->ep_comm_ptr->comm_list[target]->ep_comm_ptr->intercomm->intercomm_rank_map; 
    560561      (*newintercomm)->ep_comm_ptr->intercomm->local_rank_map     = (*newintercomm)->ep_comm_ptr->comm_list[target]->ep_comm_ptr->intercomm->local_rank_map; 
     562    } 
     563    else 
     564    { 
     565      (*newintercomm)->ep_comm_ptr->intercomm->intercomm_tag = -1; 
    561566    } 
    562567 
  • XIOS/dev/branch_openmp/extern/ep_dev/ep_merge.cpp

    r1513 r1518  
    124124  int MPI_Intercomm_merge(MPI_Comm inter_comm, bool high, MPI_Comm *newintracomm) 
    125125  { 
     126     
    126127 
    127128    assert(inter_comm->is_intercomm); 
    128129 
    129     //if(inter_comm->ep_comm_ptr->intercomm->local_comm->ep_comm_ptr->comm_label == -99) 
    130     //{ 
    131     //    return MPI_Intercomm_merge_unique_leader(inter_comm, high, newintracomm); 
    132     //} 
     130    // determine if only one MPI proc 
     131 
     132        // to be completed ...... 
     133 
     134    // multiple MPI proc and high differs 
     135 
     136    int newcomm_ep_rank = inter_comm->ep_comm_ptr->intercomm->size_rank_info[0].first; 
     137    int newcomm_ep_rank_loc = inter_comm->ep_comm_ptr->intercomm->size_rank_info[1].first; 
     138    int newcomm_num_ep = inter_comm->ep_comm_ptr->intercomm->size_rank_info[1].second; 
     139 
     140    int ep_rank = inter_comm->ep_comm_ptr->size_rank_info[0].first; 
     141    int ep_rank_loc = inter_comm->ep_comm_ptr->size_rank_info[1].first; 
     142    int num_ep = inter_comm->ep_comm_ptr->size_rank_info[1].second; 
     143 
     144    if(newcomm_ep_rank_loc == 0) 
     145    { 
     146      ::MPI_Comm *mpi_intracomm = new ::MPI_Comm; 
     147      ::MPI_Intercomm_merge(to_mpi_comm(inter_comm->ep_comm_ptr->intercomm->mpi_inter_comm), high, mpi_intracomm); 
     148       
     149      MPI_Info info; 
     150      MPI_Comm *ep_comm; 
     151      MPI_Comm_create_endpoints(mpi_intracomm, newcomm_num_ep, info, ep_comm); 
     152 
     153      inter_comm->ep_comm_ptr->comm_list[0]->mem_bridge = ep_comm; 
     154    } 
     155 
     156    MPI_Barrier_local(inter_comm); 
     157 
     158    int remote_num_ep = newcomm_num_ep - num_ep; 
     159 
     160    *newintracomm = inter_comm->ep_comm_ptr->comm_list[0]->mem_bridge[high? remote_num_ep+ep_rank_loc : ep_rank_loc]; 
     161 
     162    int ep_size = inter_comm->ep_comm_ptr->size_rank_info[0].second; 
     163    int remote_ep_size = inter_comm->ep_comm_ptr->intercomm->intercomm_rank_map->size(); 
     164 
     165    //printf("ep_size = %d, remote_ep_size = %d\n", ep_size, remote_ep_size); 
     166 
     167    (*newintracomm)->ep_comm_ptr->size_rank_info[0].first = high? remote_ep_size+ep_rank : ep_rank; 
     168 
     169    int my_triple[3]; 
     170    my_triple[0] = (*newintracomm)->ep_comm_ptr->size_rank_info[0].first; 
     171    my_triple[1] = (*newintracomm)->ep_comm_ptr->size_rank_info[1].first; 
     172    my_triple[2] = (*newintracomm)->ep_comm_ptr->size_rank_info[2].first; 
     173 
     174    int *my_triple_list = new int[3 * (*newintracomm)->ep_comm_ptr->size_rank_info[0].second]; 
     175 
     176 
     177    MPI_Allgather(my_triple, 3, MPI_INT, my_triple_list, 3, MPI_INT, *newintracomm); 
     178 
     179    if((*newintracomm)->ep_comm_ptr->size_rank_info[1].first == 0) 
     180    { 
     181      (*newintracomm)->ep_rank_map->clear(); 
     182      for(int i=0; i<(*newintracomm)->ep_comm_ptr->size_rank_info[0].second; i++) 
     183      { 
     184        (*newintracomm)->ep_rank_map->insert(std::pair< int, std::pair<int,int> >(my_triple_list[3*i], my_triple_list[3*i+1], my_triple_list[3*i+2])); 
     185      } 
     186    } 
     187 
     188#ifdef _showinfo 
     189    MPI_Barrier_local(inter_comm); 
     190    if((*newintracomm)->ep_comm_ptr->size_rank_info[0].first == 15) 
     191    { 
     192      for(std::map<int, std::pair<int, int> >::iterator it = (*newintracomm)->ep_rank_map->begin(); it != (*newintracomm)->ep_rank_map->end(); it++) 
     193      { 
     194        printf("(%d  %d  %d)\n", it->first, it->second.first, it->second.second); 
     195      } 
     196    } 
     197#endif 
     198     
     199    delete my_triple_list; 
     200  
     201 
     202    return 0; 
     203 
    133204 
    134205 
    135206    Debug("intercomm_merge kernel\n"); 
    136207 
    137     int ep_rank_loc; 
    138     int num_ep; 
     208    //int ep_rank_loc; 
     209    //int num_ep; 
    139210 
    140211    ep_rank_loc = inter_comm->ep_comm_ptr->size_rank_info[1].first; 
     
    143214 
    144215 
    145     int remote_ep_size = inter_comm->ep_comm_ptr->intercomm->remote_rank_map->size(); 
     216    //int remote_ep_size = inter_comm->ep_comm_ptr->intercomm->remote_rank_map->size(); 
    146217 
    147218 
  • XIOS/dev/branch_openmp/extern/ep_dev/ep_recv.cpp

    r1503 r1518  
    6565    EP_PendingRequests->push_back(request);   
    6666    int ep_rank = comm->ep_comm_ptr->size_rank_info[0].first; 
    67     memcheck("EP_PendingRequests["<<ep_rank<<"]->size() = " << EP_PendingRequests->size());                                                               
    68                                            
     67    memcheck("EP_PendingRequests["<<ep_rank<<"]->size() = " << EP_PendingRequests->size());     
     68 
     69#ifdef _showinfo 
     70    if(comm->is_intercomm) 
     71    { 
     72      int ep_dest_loc  = comm->ep_rank_map->at(dest_rank).first; 
     73      int ep_src_loc = comm->ep_comm_ptr->intercomm->intercomm_rank_map->at(src).first; 
     74      int mpi_tag     = tag_combine(tag, ep_src_loc, ep_dest_loc); 
     75      int mpi_dest    = comm->ep_comm_ptr->intercomm->intercomm_rank_map->at(src).second.first; 
     76 
     77      printf("Irecv : ep_src_loc = %d, ep_dest_loc = %d, mpi_src = %d, mpi_dest = %d, mpi_tag = %d\n", ep_src_loc, ep_dest_loc, comm->ep_comm_ptr->size_rank_info[2].first, mpi_dest, mpi_tag); 
     78    }                                                           
     79#endif                                       
     80 
    6981    return Request_Check(); 
    7082  } 
  • XIOS/dev/branch_openmp/extern/ep_dev/ep_send.cpp

    r1515 r1518  
    223223 
    224224#ifdef _showinfo 
    225     printf("ep_src_loc = %d, ep_dest_loc = %d, mpi_src = %d, mpi_dest = %d, mpi_tag = %d\n", ep_src_loc, ep_dest_loc, comm->ep_comm_ptr->size_rank_info[2].first, mpi_dest, mpi_tag); 
     225    printf("Send : ep_src_loc = %d, ep_dest_loc = %d, mpi_src = %d, mpi_dest = %d, mpi_tag = %d\n", ep_src_loc, ep_dest_loc, comm->ep_comm_ptr->size_rank_info[2].first, mpi_dest, mpi_tag); 
    226226#endif 
    227227     
  • XIOS/dev/branch_openmp/extern/ep_dev/main.cpp

    r1517 r1518  
    826826      MPI_Barrier(comm); 
    827827 
    828       if(color==2 && split_rank==1) 
     828      if(color==2 && split_rank==0) 
    829829      { 
    830830        double sendbuf[9]={1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9}; 
    831831        MPI_Request send_request; 
    832832        MPI_Status send_status; 
    833         MPI_Isend(sendbuf, 9, MPI_DOUBLE, 5, 10, inter_comm, &send_request); 
     833        MPI_Isend(sendbuf, 9, MPI_DOUBLE, 0, 10, inter_comm, &send_request); 
    834834        MPI_Wait(&send_request, &send_status); 
    835835      } 
    836836 
    837       if(color==1 && split_rank==5) 
     837      if(color==1 && split_rank==0) 
    838838      { 
    839839        double recvbuf[9]; 
    840840        MPI_Request recv_request; 
    841841        MPI_Status recv_status; 
    842         MPI_Irecv(recvbuf, 9, MPI_DOUBLE, 1, 10, inter_comm, &recv_request); 
     842        MPI_Irecv(recvbuf, 9, MPI_DOUBLE, 0, 10, inter_comm, &recv_request); 
    843843        MPI_Wait(&recv_request, &recv_status); 
    844844        for(int i=0; i<9; i++) 
     
    867867      MPI_Comm_dup(inter_comm, &inter_comm_dup); 
    868868 
     869      int inter_comm_dup_size; 
     870      int inter_comm_dup_remote_size; 
     871      MPI_Comm_size(inter_comm_dup, &inter_comm_dup_size); 
     872 
     873      MPI_Comm_remote_size(inter_comm_dup, &inter_comm_dup_remote_size); 
     874 
     875      bool high = inter_comm_dup_size>inter_comm_dup_remote_size; 
     876 
     877 
     878      //printf("rank = %d, high = %d, inter_comm_dup_size = %d, inter_comm_dup_remote_size = %d\n", rank, high, inter_comm_dup_size, inter_comm_dup_remote_size); 
     879 
     880      MPI_Comm inter_comm_dup_merged; 
     881 
     882      MPI_Intercomm_merge(inter_comm_dup, high, &inter_comm_dup_merged); 
     883       
     884      int inter_comm_dup_merged_rank; 
     885      MPI_Comm_rank(inter_comm_dup_merged, &inter_comm_dup_merged_rank); 
     886 
     887      int inter_comm_dup_merged_size; 
     888      MPI_Comm_size(inter_comm_dup_merged, &inter_comm_dup_merged_size); 
     889 
     890      printf("rank = %d, inter_comm_dup_merged_rank = %d, inter_comm_dup_merged_size = %d\n", rank, inter_comm_dup_merged_rank, inter_comm_dup_merged_size); 
     891 
     892      // TEST OF GATHER 
     893      { 
     894        int gather_root = 99; 
     895   
     896        if(inter_comm_dup_merged_rank == 0)  
     897        { 
     898          gather_root = rand() % inter_comm_dup_merged_size; 
     899        } 
     900   
     901        MPI_Bcast(&gather_root, 1, MPI_INT, 0, inter_comm_dup_merged); 
     902 
     903        double sendbuf[2]; 
     904        sendbuf[0] = inter_comm_dup_merged_rank * 1.0; 
     905        sendbuf[1] = inter_comm_dup_merged_size * (-1.0); 
     906 
     907        std::vector<double>recvbuf(2*inter_comm_dup_merged_size, 0); 
     908 
     909        MPI_Gather(sendbuf, 2, MPI_DOUBLE, recvbuf.data(), 2, MPI_DOUBLE, gather_root, inter_comm_dup_merged); 
     910 
     911        bool gather_result = true; 
     912 
     913        if(inter_comm_dup_merged_rank == gather_root) 
     914        { 
     915          for(int i=0; i<inter_comm_dup_merged_size; i++) 
     916          { 
     917            if(abs(recvbuf[2*i] - i) > 1.e-10 || abs(recvbuf[2*i+1] + inter_comm_dup_merged_size) > 1.e-10) 
     918            { 
     919              gather_result = false; 
     920              break; 
     921            }   
     922          } 
     923 
     924          if(gather_result) printf("root = %d : \t test MPI_Gather for merged comm\t OK \n", gather_root); 
     925          else              printf("root = %d : \t test MPI_Gather for merged comm\t FAILED\n", gather_root); 
     926        } 
     927      } 
     928 
     929      // TEST OF ALLREDUCE 
     930      { 
     931  
     932        int sendbuf[2]; 
     933        sendbuf[0] = inter_comm_dup_merged_rank; 
     934        sendbuf[1] = -inter_comm_dup_merged_size; 
     935 
     936        std::vector<int>recvbuf(2, 0); 
     937 
     938        MPI_Op op = MPI_MIN; 
     939 
     940        MPI_Allreduce(sendbuf, recvbuf.data(), 2, MPI_INT, op, inter_comm_dup_merged); 
     941 
     942 
     943        int allreduce_test = 1; 
     944 
     945       
     946        if((op == MPI_SUM && (abs(recvbuf[0]-(inter_comm_dup_merged_size-1)*inter_comm_dup_merged_size/2) > 1.e-10 || abs(recvbuf[1] + inter_comm_dup_merged_size * inter_comm_dup_merged_size) > 1.e-10) ) || 
     947           (op == MPI_MAX && (abs(recvbuf[0]-(inter_comm_dup_merged_size-1)) > 1.e-10 || abs(recvbuf[1] + inter_comm_dup_merged_size) > 1.e-10) )               || 
     948           (op == MPI_MIN && (abs(recvbuf[0]) > 1.e-10 || abs(recvbuf[1] + inter_comm_dup_merged_size) > 1.e-10) ) ) 
     949        { 
     950          allreduce_test = 0; printf("%d %d\n", recvbuf[0], recvbuf[1]); 
     951        }   
     952       
     953 
     954        int allreduce_result; 
     955        MPI_Reduce(&allreduce_test, &allreduce_result, 1, MPI_INT, MPI_MIN, 0, inter_comm_dup_merged); 
     956 
     957        if(inter_comm_dup_merged_rank == 0 && allreduce_result)  printf("            \t test MPI_Allreduce for merged comm \t OK\n"); 
     958        if(inter_comm_dup_merged_rank == 0 && !allreduce_result) printf("            \t test MPI_Allreduce for merged comm \t FAILED\n"); 
     959     
     960      } 
     961 
     962      // TEST OF EXSCAN 
     963      { 
     964  
     965        std::vector<int>sendbuf(2, inter_comm_dup_merged_rank); 
     966        std::vector<int>recvbuf(2, -1); 
     967 
     968        MPI_Op op = MPI_SUM; 
     969            
     970 
     971        MPI_Exscan(sendbuf.data(), recvbuf.data(), 2, MPI_INT, op, inter_comm_dup_merged); 
     972 
     973        int exscan_test = 1; 
     974 
     975        if(inter_comm_dup_merged_rank >0) 
     976        if((op == MPI_SUM && (abs(recvbuf[0]-inter_comm_dup_merged_rank*(inter_comm_dup_merged_rank-1)/2) > 1.e-10 || abs(recvbuf[1]-inter_comm_dup_merged_rank*(inter_comm_dup_merged_rank-1)/2) > 1.e-10) ) || 
     977           (op == MPI_MIN && (abs(recvbuf[0] ) > 1.e-10 || abs(recvbuf[1]) > 1.e-10) )  ||  
     978           (op == MPI_MAX && (abs(recvbuf[0] - inter_comm_dup_merged_rank+1) > 1.e-10 || abs(recvbuf[1] - inter_comm_dup_merged_rank+1) > 1.e-10) ) ) 
     979        { 
     980          exscan_test = 0;  
     981        } 
     982 
     983        //printf(" ID=%d : sendbuf = (%d, %d), recvbuf = (%d, %d), exscan_test = %d \n", inter_comm_dup_merged_rank, sendbuf[0], sendbuf[1], recvbuf[0], recvbuf[1], exscan_test);  
     984         
     985        int exscan_result; 
     986        MPI_Reduce(&exscan_test, &exscan_result, 1, MPI_INT, MPI_MIN, 0, inter_comm_dup_merged); 
     987     
     988        if(inter_comm_dup_merged_rank == 0 && exscan_result)  printf("            \t test MPI_Exscan for merged comm \t OK\n"); 
     989        if(inter_comm_dup_merged_rank == 0 && !exscan_result) printf("            \t test MPI_Exscan for merged comm \t FAILED %d\n", exscan_result); 
     990      } 
     991 
     992      // TEST OF SCATTERV 
     993      { 
     994 
     995        int scatterv_root; 
     996   
     997        if(inter_comm_dup_merged_rank == 0) scatterv_root = rand() % inter_comm_dup_merged_size; 
     998   
     999        MPI_Bcast(&scatterv_root, 1, MPI_INT, 0, inter_comm_dup_merged); 
     1000  
     1001        std::vector<int>sendbuf(2*inter_comm_dup_merged_size, inter_comm_dup_merged_rank); 
     1002        std::vector<int>recvbuf(2, -1); 
     1003        std::vector<int>sendcounts(inter_comm_dup_merged_size, 2); 
     1004        std::vector<int>displs(inter_comm_dup_merged_size, 0); 
     1005 
     1006        for(int i=0; i<inter_comm_dup_merged_size; i++) displs[i] = 2*(inter_comm_dup_merged_size-1-i); 
     1007 
     1008        if(inter_comm_dup_merged_rank == scatterv_root)  
     1009        { 
     1010          for(int i=0; i<inter_comm_dup_merged_size; i++)  
     1011          { 
     1012            sendbuf[2*i] = i; 
     1013            sendbuf[2*i+1] = inter_comm_dup_merged_size; 
     1014          } 
     1015        } 
     1016 
     1017 
     1018        MPI_Scatterv(sendbuf.data(), sendcounts.data(), displs.data(), MPI_INT, recvbuf.data(), 2, MPI_INT, scatterv_root, inter_comm_dup_merged); 
     1019 
     1020        //printf("ID = %d : %d %d\n", inter_comm_dup_merged_rank, recvbuf[0], recvbuf[1]); 
     1021 
     1022        int scatterv_test = 1; 
     1023 
     1024      
     1025        if( abs(recvbuf[0]-(inter_comm_dup_merged_size-1-inter_comm_dup_merged_rank)) > 1.e-10 || abs(recvbuf[1]-inter_comm_dup_merged_size) > 1.e-10 ) 
     1026        { 
     1027          scatterv_test = 0; printf("%d %d  id = %d\n", recvbuf[0], recvbuf[1], inter_comm_dup_merged_rank); 
     1028        }   
     1029     
     1030 
     1031        int scatterv_result; 
     1032        MPI_Reduce(&scatterv_test, &scatterv_result, 1, MPI_INT, MPI_MIN, scatterv_root, inter_comm_dup_merged); 
     1033 
     1034        if(inter_comm_dup_merged_rank == scatterv_root && scatterv_result)  printf("root = %d : \t test MPI_Scatterv for merged comm \t OK\n", scatterv_root); 
     1035        if(inter_comm_dup_merged_rank == scatterv_root && !scatterv_result) printf("root = %d : \t test MPI_Scatterv for merged comm \t FAILED\n", scatterv_root); 
     1036      } 
     1037 
     1038 
     1039 
     1040 
    8691041      MPI_Barrier(comm); 
    8701042      MPI_Barrier(comm); 
     
    8721044 
    8731045      MPI_Comm_free(&inter_comm_dup); 
     1046 
     1047      MPI_Barrier(comm); 
     1048      MPI_Barrier(comm); 
     1049 
     1050 
     1051      MPI_Comm_free(&inter_comm_dup_merged); 
     1052 
    8741053 
    8751054 
Note: See TracChangeset for help on using the changeset viewer.