#include "ep_lib.hpp" #include #include #include "ep_declaration.hpp" #include #include /* time */ #include #include #include using namespace ep_lib; using namespace std::chrono; int main(int argc, char **argv) { srand (time(NULL)); printf("Testing ep_lib\n"); int required=3, provided; MPI_Init_thread(&argc, &argv, required, &provided); assert(required==provided); int mpi_rank; int mpi_size; MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); #pragma omp parallel default(shared) { MPI_Comm_rank(MPI_COMM_WORLD , &mpi_rank); int num_ep = omp_get_num_threads(); MPI_Info info; //printf("mpi_rank = %d, thread_num = %d\n", mpi_rank, omp_get_thread_num()); MPI_Comm *ep_comm; #pragma omp master { MPI_Comm *ep_comm; MPI_Comm_create_endpoints(MPI_COMM_WORLD->mpi_comm, num_ep, info, ep_comm); passage = ep_comm; } #pragma omp barrier MPI_Comm comm_for_dup; // this should act as EP_COMM_WORLD MPI_Comm comm; // this should act as EP_COMM_WORLD comm_for_dup = passage[omp_get_thread_num()]; MPI_Comm_dup(comm_for_dup, &comm); MPI_Comm_free(&comm_for_dup); MPI_Barrier(comm); int rank, size; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); if(rank == 0) printf(" \t test MPI_Comm_dup \t OK \n"); /* // TIMING SYCHRONIZATION { int n=100000; MPI_Barrier(comm); high_resolution_clock::time_point t1 = high_resolution_clock::now(); for(int i=0; i time_span = duration_cast>(t2 - t1); #pragma omp master std::cout << "proc "<< mpi_rank <<" ep_barrier "<< time_span.count() << " seconds."<>(t2 - t1); #pragma omp master std::cout << "proc "<< mpi_rank <<" omp_barrier "<< time_span.count() << " seconds."<>(t2 - t1); MPI_Barrier(comm); #pragma omp master std::cout << "proc "<< mpi_rank <<" for_loop "<< time_span.count() << " seconds."<recvbuf(2*size, 0); MPI_Gather(sendbuf, 2, MPI_DOUBLE, recvbuf.data(), 2, MPI_DOUBLE, gather_root, comm); bool gather_result = true; if(rank == gather_root) { for(int i=0; i 1.e-10 || abs(recvbuf[2*i+1] + size) > 1.e-10) { gather_result = false; break; } } if(gather_result) printf("root = %d : \t test MPI_Gather \t OK \n", gather_root); else printf("root = %d : \t test MPI_Gather \t FAILED\n", gather_root); } } MPI_Barrier(comm); // TEST OF GATHERV FROM A RAMDOM ROOT { int gatherv_root; if(rank == 0) gatherv_root = rand() % size; MPI_Bcast(&gatherv_root, 1, MPI_INT, 0, comm); int sendbuf[2]; sendbuf[0] = rank; sendbuf[1] = -size; std::vectorrecvbuf(2*size, 0); std::vectorrecvcounts(size, 2); std::vectordispls(size, 0); for(int i=0; i 1.e-10 || abs(recvbuf[2*i+1] + size) > 1.e-10) { gatherv_result = false; printf("%lf %lf root = %d, i = %d\n", recvbuf[2*i], recvbuf[2*i+1], gatherv_root, i); break; } } //for(int i=0; irecvbuf(2*size, 0); MPI_Allgather(sendbuf, 2, MPI_DOUBLE, recvbuf.data(), 2, MPI_DOUBLE, comm); int allgather_test = 1; for(int i=0; i 1.e-10 || abs(recvbuf[2*i+1] + size) > 1.e-10) { allgather_test = 0; break; } } int allgather_result; MPI_Reduce(&allgather_test, &allgather_result, 1, MPI_INT, MPI_MIN, 0, comm); if(rank == 0 && allgather_result) printf(" \t test MPI_Allgather \t OK \n"); if(rank == 0 && !allgather_result) printf(" \t test MPI_Allgather \t OK \n"); } MPI_Barrier(comm); // TEST OF ALLGATHERV { int sendbuf[2]; sendbuf[0] = rank; sendbuf[1] = -size; std::vectorrecvbuf(2*size, 0); std::vectorrecvcounts(size, 2); std::vectordispls(size, 0); for(int i=0; i 1.e-10 || abs(recvbuf[2*i+1] + size) > 1.e-10) { allgatherv_test = 0; printf("ID : %d %d %d %d %d\n", rank, recvbuf[2*i], recvbuf[2*i+1] , recvbuf[2*i] - (size-1-i), recvbuf[2*i+1] + size); break; } } int allgatherv_result; MPI_Reduce(&allgatherv_test, &allgatherv_result, 1, MPI_INT, MPI_MIN, 0, comm); if(rank == 0 && allgatherv_result) printf(" \t test MPI_Allgatherv \t OK \n"); if(rank == 0 && !allgatherv_result) printf(" \t test MPI_Allgatherv \t FAILED %d\n", allgatherv_result); } MPI_Barrier(comm); // TEST OF REDUCE { int reduce_root; if(rank == 0) reduce_root = rand() % size; MPI_Bcast(&reduce_root, 1, MPI_INT, 0, comm); int sendbuf[2]; sendbuf[0] = rank; sendbuf[1] = -size; std::vectorrecvbuf(2, 0); MPI_Op op = MPI_MIN; MPI_Reduce(sendbuf, recvbuf.data(), 2, MPI_INT, op, reduce_root, comm); bool reduce_result = true; if(rank == reduce_root) { for(int i=0; i<2; i++) { if((op == MPI_SUM && (abs(recvbuf[0]-(size-1)*size/2) > 1.e-10 || abs(recvbuf[1] + size * size) > 1.e-10) ) || (op == MPI_MAX && (abs(recvbuf[0]-(size-1)) > 1.e-10 || abs(recvbuf[1] + size) > 1.e-10) ) || (op == MPI_MIN && (abs(recvbuf[0]) > 1.e-10 || abs(recvbuf[1] + size) > 1.e-10) ) ) { reduce_result = false; printf("%d %d root = %d, i = %d\n", recvbuf[0], recvbuf[1], reduce_root, i); break; } } } if(rank == reduce_root && reduce_result) printf("root = %d : \t test MPI_Reduce \t OK\n", reduce_root); if(rank == reduce_root && !reduce_result) printf("root = %d : \t test MPI_Reduce \t FAILED\n", reduce_root); } MPI_Barrier(comm); // TEST OF ALLREDUCE { int sendbuf[2]; sendbuf[0] = rank; sendbuf[1] = -size; std::vectorrecvbuf(2, 0); MPI_Op op = MPI_MIN; MPI_Allreduce(sendbuf, recvbuf.data(), 2, MPI_INT, op, comm); int allreduce_test = 1; if((op == MPI_SUM && (abs(recvbuf[0]-(size-1)*size/2) > 1.e-10 || abs(recvbuf[1] + size * size) > 1.e-10) ) || (op == MPI_MAX && (abs(recvbuf[0]-(size-1)) > 1.e-10 || abs(recvbuf[1] + size) > 1.e-10) ) || (op == MPI_MIN && (abs(recvbuf[0]) > 1.e-10 || abs(recvbuf[1] + size) > 1.e-10) ) ) { allreduce_test = 0; printf("%d %d\n", recvbuf[0], recvbuf[1]); } int allreduce_result; MPI_Reduce(&allreduce_test, &allreduce_result, 1, MPI_INT, MPI_MIN, 0, comm); if(rank == 0 && allreduce_result) printf(" \t test MPI_Allreduce \t OK\n"); if(rank == 0 && !allreduce_result) printf(" \t test MPI_Allreduce \t FAILED\n"); } MPI_Barrier(comm); // TEST OF REDUCE_SCATTER { std::vectorsendbuf(2*size, rank); std::vectorrecvbuf(2, -1); std::vectorrecvcounts(size, 2); MPI_Op op = MPI_MIN; MPI_Reduce_scatter(sendbuf.data(), recvbuf.data(), recvcounts.data(), MPI_INT, op, comm); int reduce_scatter_test = 1; if((op == MPI_SUM && (abs(recvbuf[0]-(size-1)*size/2) > 1.e-10 || abs(recvbuf[0]-(size-1)*size/2) > 1.e-10) ) || (op == MPI_MAX && (abs(recvbuf[0]-(size-1)) > 1.e-10 || abs(recvbuf[1]-(size-1)) > 1.e-10) ) || (op == MPI_MIN && (abs(recvbuf[0]) > 1.e-10 || abs(recvbuf[1] ) > 1.e-10) ) ) { reduce_scatter_test = 0; //printf("%d %d id = %d\n", recvbuf[0], recvbuf[1], rank); } int reduce_scatter_result; MPI_Reduce(&reduce_scatter_test, &reduce_scatter_result, 1, MPI_INT, MPI_MIN, 0, comm); if(rank == 0 && reduce_scatter_result) printf(" \t test MPI_Reduce_scatter OK\n"); if(rank == 0 && !reduce_scatter_result) printf(" \t test MPI_Reduce_scatter FAILED\n"); } MPI_Barrier(comm); // TEST OF SCATTER { int scatter_root; if(rank == 0) scatter_root = rand() % size; MPI_Bcast(&scatter_root, 1, MPI_INT, 0, comm); std::vectorsendbuf(2*size, rank); std::vectorrecvbuf(2, -1); std::vectorrecvcounts(size, 2); if(rank == scatter_root) { for(int i=0; i 1.e-10 || abs(recvbuf[1]-size) > 1.e-10 ) { scatter_test = 0; //printf("%d %d id = %d\n", recvbuf[0], recvbuf[1], rank); } int scatter_result; MPI_Reduce(&scatter_test, &scatter_result, 1, MPI_INT, MPI_MIN, scatter_root, comm); if(rank == scatter_root && scatter_result) printf("root = %d : \t test MPI_Scatter \t OK\n", scatter_root); if(rank == scatter_root && !scatter_result) printf("root = %d : \t test MPI_Scatter \t FAILED\n", scatter_root); } MPI_Barrier(comm); // TEST OF SCATTERV { int scatterv_root; if(rank == 0) scatterv_root = rand() % size; MPI_Bcast(&scatterv_root, 1, MPI_INT, 0, comm); std::vectorsendbuf(2*size, rank); std::vectorrecvbuf(2, -1); std::vectorsendcounts(size, 2); std::vectordispls(size, 0); for(int i=0; i 1.e-10 || abs(recvbuf[1]-size) > 1.e-10 ) { scatterv_test = 0; printf("%d %d id = %d\n", recvbuf[0], recvbuf[1], rank); } int scatterv_result; MPI_Reduce(&scatterv_test, &scatterv_result, 1, MPI_INT, MPI_MIN, scatterv_root, comm); if(rank == scatterv_root && scatterv_result) printf("root = %d : \t test MPI_Scatterv \t OK\n", scatterv_root); if(rank == scatterv_root && !scatterv_result) printf("root = %d : \t test MPI_Scatterv \t FAILED\n", scatterv_root); } MPI_Barrier(comm); // TEST OF ALLTOALL { std::vectorsendbuf(size, rank); std::vectorrecvbuf(size, -1); MPI_Alltoall(sendbuf.data(), 1, MPI_INT, recvbuf.data(), 1, MPI_INT, comm); int alltoall_result = 1; for(int i=0; i 1.e-10 ) { alltoall_result = 0; printf("%d id = %d\n", recvbuf[i], rank); } if(rank == 0 && alltoall_result) printf(" \t test MPI_Alltoall \t OK\n"); if(rank == 0 && !alltoall_result) printf(" \t test MPI_Alltoall \t FAILED\n"); } // TEST OF SCAN { std::vectorsendbuf(2, rank); std::vectorrecvbuf(2, -1); MPI_Op op = MPI_SUM; MPI_Scan(sendbuf.data(), recvbuf.data(), 2, MPI_INT, op, comm); int scan_test = 1; // printf(" ID=%d : %d %d \n", rank, recvbuf[0], recvbuf[1]); if((op == MPI_SUM && (abs(recvbuf[0]-rank*(rank+1)/2) > 1.e-10 || abs(recvbuf[1]-rank*(rank+1)/2) > 1.e-10) ) || (op == MPI_MIN && (abs(recvbuf[0]) > 1.e-10 || abs(recvbuf[1]) > 1.e-10) ) || (op == MPI_MAX && (abs(recvbuf[0] - rank) > 1.e-10 || abs(recvbuf[1] - rank) > 1.e-10) ) ) { scan_test = 0; //printf("%d id = %d\n", recvbuf[i], rank); } int scan_result; MPI_Reduce(&scan_test, &scan_result, 1, MPI_INT, MPI_MIN, 0, comm); if(rank == 0 && scan_result) printf(" \t test MPI_Scan \t\t OK\n"); if(rank == 0 && !scan_result) printf(" \t test MPI_Scan \t\t FAILED\n"); } // TEST OF EXSCAN { std::vectorsendbuf(2, rank); std::vectorrecvbuf(2, -1); MPI_Op op = MPI_SUM; MPI_Exscan(sendbuf.data(), recvbuf.data(), 2, MPI_INT, op, comm); int exscan_test = 1; // printf(" ID=%d : %d %d \n", rank, recvbuf[0], recvbuf[1]); if(rank >0) if((op == MPI_SUM && (abs(recvbuf[0]-rank*(rank-1)/2) > 1.e-10 || abs(recvbuf[1]-rank*(rank-1)/2) > 1.e-10) ) || (op == MPI_MIN && (abs(recvbuf[0] ) > 1.e-10 || abs(recvbuf[1]) > 1.e-10) ) || (op == MPI_MAX && (abs(recvbuf[0] - rank+1) > 1.e-10 || abs(recvbuf[1] - rank+1) > 1.e-10) ) ) { exscan_test = 0; //printf("%d id = %d\n", recvbuf[i], rank); } int exscan_result; MPI_Reduce(&exscan_test, &exscan_result, 1, MPI_INT, MPI_MIN, 0, comm); if(rank == 0 && exscan_result) printf(" \t test MPI_Exscan \t OK\n"); if(rank == 0 && !exscan_result) printf(" \t test MPI_Exscan \t FAILED\n"); } MPI_Barrier(comm); { int rank, size; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); //int color = rank%2; int tab_color[16]={2,2,2,3,0,1,1,3,2,1,3,0,0,2,0,0}; int tab_key[16]={3,11,10,5,6,8,15,7,2,1,9,13,4,14,12,0}; int color = tab_color[rank]; int key = tab_key[rank]; MPI_Comm split_comm; MPI_Comm_split(comm, color, key, &split_comm); int split_rank; MPI_Comm_rank(split_comm, &split_rank); printf("rank = %d, color = %d, key = %d, split_rank = %d\n", rank, color, key, split_rank); MPI_Barrier(comm); if(rank == 0) printf("\tMPI_Comm_split OK\n"); MPI_Barrier(comm); /* MPI_Comm inter_comm; //MPI_Intercomm_create(sub_comm, 0, comm, (color+1)%2, 99, &inter_comm); MPI_Intercomm_create(sub_comm, 0, comm, remote_leader, 99, &inter_comm); MPI_Barrier(comm); if(rank == 0) printf("\tMPI_Intercomm_create OK\n"); MPI_Barrier(comm); int high=color; MPI_Comm intra_comm; MPI_Intercomm_merge(inter_comm, high, &intra_comm); int intra_rank, intra_size; MPI_Comm_rank(intra_comm, &intra_rank); MPI_Comm_size(intra_comm, &intra_size); MPI_Barrier(comm); if(rank == 0) printf("\tMPI_Intercomm_merge OK\n"); MPI_Barrier(comm); //check_test_gatherv(comm); */ MPI_Barrier(comm); MPI_Comm_free(&split_comm); //MPI_Barrier(comm); //MPI_Comm_free(&inter_comm); MPI_Barrier(comm); MPI_Comm_free(&comm); } } /* int num_threads; if(mpi_rank < mpi_size-2) { printf("Proc %d is client\n", mpi_rank); num_threads = 2;//+mpi_rank; } else { printf("Proc %d is server\n", mpi_rank); num_threads = 1; } omp_set_num_threads(num_threads); #pragma omp parallel default(shared) firstprivate(num_threads) { int num_ep = num_threads; MPI_Info info; //printf("omp_get_thread_num() = %d, omp_get_num_threads() = %d, num_threads = %d\n", omp_get_thread_num(), omp_get_num_threads(), num_threads); MPI_Comm *ep_comm; #pragma omp master { MPI_Comm *ep_comm; MPI_Comm_create_endpoints(MPI_COMM_WORLD->mpi_comm, num_ep, info, ep_comm); passage = ep_comm; } #pragma omp barrier MPI_Comm comm; // this should act as EP_COMM_WORLD comm = passage[omp_get_thread_num()]; int rank, size; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); bool isClient = false; bool isServer = false; if(omp_get_num_threads()>1) isClient = true; else isServer = true; printf("mpi_rank = %d, ep_rank = %d, isClient = %d\n", mpi_rank, rank, isClient); MPI_Win ep_win; MPI_Aint buf_size=1; int buf = rank; int local_buf = rank; int result_buf = -1; MPI_Win_create(&buf, buf_size, sizeof(int), info, comm, &ep_win); MPI_Barrier(comm); // MPI_Win_fence(MPI_MODE_NOPRECEDE, ep_win); MPI_Barrier(comm); sleep(0.2); MPI_Barrier(comm); MPI_Win_fence(0, ep_win); if(rank == 0) { local_buf = 99; MPI_Aint displs=0; MPI_Put(&local_buf, 1, MPI_INT, size-1, displs, 1, MPI_INT, ep_win); } if(rank == size-2) { MPI_Aint displs(0); MPI_Get(&local_buf, 1, MPI_INT, 2, displs, 1, MPI_INT, ep_win); } MPI_Win_fence(0, ep_win); if(rank == 1) { MPI_Aint displs=0; MPI_Accumulate(&local_buf, 1, MPI_INT, size-1, displs, 1, MPI_INT, MPI_REPLACE, ep_win); } MPI_Barrier(comm); MPI_Win_fence(0, ep_win); if(rank == 2) { MPI_Aint displs = 0; MPI_Get_accumulate(&local_buf, 1, MPI_INT, &result_buf, 1, MPI_INT, size-2, displs, 1, MPI_INT, MPI_SUM, ep_win); } MPI_Win_fence(0, ep_win); if(rank == 6) { MPI_Aint displs = 0; MPI_Fetch_and_op(&local_buf, &result_buf, MPI_INT, size-1, displs, MPI_SUM, ep_win); } MPI_Win_fence(0, ep_win); if(rank == 7) { MPI_Aint displs = 0; MPI_Compare_and_swap(&local_buf, &buf, &result_buf, MPI_INT, size-1, displs, ep_win); } MPI_Win_fence(0, ep_win); //::MPI_Compare_and_swap(origin_addr, compare_addr, result_addr, to_mpi_type(datatype), target_mpi_rank, to_mpi_aint(target_disp), to_mpi_win(win.server_win[target_local_rank])); MPI_Win ep_win_allocated; int* baseptr = new int[10]; MPI_Aint base_size = 4; MPI_Win_allocate (base_size, sizeof(int), info, comm, baseptr, &ep_win_allocated); MPI_Win_fence(0, ep_win_allocated); MPI_Win_free(&ep_win_allocated); delete[] baseptr; MPI_Win_free(&ep_win); printf("rank = %d, buf = %d, local_buf = %d, result_buf = %d\n", rank, buf, local_buf, result_buf); MPI_Comm_free(&comm); } */ MPI_Finalize(); }