diff --git a/AMSS_NCKU_source/MPatch.C b/AMSS_NCKU_source/MPatch.C index f0deb56..54652a0 100644 --- a/AMSS_NCKU_source/MPatch.C +++ b/AMSS_NCKU_source/MPatch.C @@ -341,8 +341,9 @@ void Patch::Interp_Points(MyList *VarList, double *Shellf, int Symmetry) { // NOTE: we do not Synchnize variables here, make sure of that before calling this routine - int myrank; + int myrank, nprocs; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); int ordn = 2 * ghost_width; MyList *varl; @@ -354,24 +355,18 @@ void Patch::Interp_Points(MyList *VarList, varl = varl->next; } - double *shellf; - shellf = new double[NN * num_var]; - memset(shellf, 0, sizeof(double) * NN * num_var); + memset(Shellf, 0, sizeof(double) * NN * num_var); - // we use weight to monitor code, later some day we can move it for optimization - int *weight; - weight = new int[NN]; - memset(weight, 0, sizeof(int) * NN); - - double *DH, *llb, *uub; - DH = new double[dim]; + // owner_rank[j] records which MPI rank owns point j + // All ranks traverse the same block list so they all agree on ownership + int *owner_rank; + owner_rank = new int[NN]; + for (int j = 0; j < NN; j++) + owner_rank[j] = -1; + double DH[dim], llb[dim], uub[dim]; for (int i = 0; i < dim; i++) - { DH[i] = getdX(i); - } - llb = new double[dim]; - uub = new double[dim]; for (int j = 0; j < NN; j++) // run along points { @@ -403,12 +398,6 @@ void Patch::Interp_Points(MyList *VarList, bool flag = true; for (int i = 0; i < dim; i++) { -// NOTE: our dividing structure is (exclude ghost) -// -1 0 -// 1 2 -// so (0,1) does not belong to any part for vertex structure -// here we put (0,0.5) to left part and (0.5,1) to right part -// BUT for cell structure the bbox is (-1.5,0.5) and (0.5,2.5), there is no missing region at all #ifdef Vertex #ifdef Cell #error Both Cell and Vertex are defined @@ -433,6 +422,7 @@ void Patch::Interp_Points(MyList *VarList, if (flag) { notfind = false; + owner_rank[j] = BP->rank; if (myrank == BP->rank) { //---> interpolation @@ -440,14 +430,11 @@ void Patch::Interp_Points(MyList *VarList, int k = 0; while (varl) // run along variables { - // shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn], - // pox,ordn,varl->data->SoA,Symmetry); - f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[j * num_var + k], + f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); varl = varl->next; k++; } - weight[j] = 1; } } if (Bp == ble) @@ -456,103 +443,61 @@ void Patch::Interp_Points(MyList *VarList, } } - MPI_Allreduce(shellf, Shellf, NN * num_var, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - int *Weight; - Weight = new int[NN]; - MPI_Allreduce(weight, Weight, NN, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - - // misc::tillherecheck("print me"); - - for (int i = 0; i < NN; i++) + // Replace MPI_Allreduce with per-owner MPI_Bcast: + // Group consecutive points by owner rank and broadcast each group. + // Since each point's data is non-zero only on the owner rank, + // Bcast from owner is equivalent to Allreduce(MPI_SUM) but much cheaper. { - if (Weight[i] > 1) + int j = 0; + while (j < NN) { - if (myrank == 0) - cout << "WARNING: Patch::Interp_Points meets multiple weight" << endl; - for (int j = 0; j < num_var; j++) - Shellf[j + i * num_var] = Shellf[j + i * num_var] / Weight[i]; - } - else if (Weight[i] == 0 && myrank == 0) - { - cout << "ERROR: Patch::Interp_Points fails to find point ("; - for (int j = 0; j < dim; j++) + int cur_owner = owner_rank[j]; + if (cur_owner < 0) { - cout << XX[j][i]; - if (j < dim - 1) - cout << ","; - else - cout << ")"; - } - cout << " on Patch ("; - for (int j = 0; j < dim; j++) - { - cout << bbox[j] << "+" << lli[j] * getdX(j); - if (j < dim - 1) - cout << ","; - else - cout << ")--"; - } - cout << "("; - for (int j = 0; j < dim; j++) - { - cout << bbox[dim + j] << "-" << uui[j] * getdX(j); - if (j < dim - 1) - cout << ","; - else - cout << ")" << endl; - } -#if 0 - checkBlock(); -#else - cout << "splited domains:" << endl; - { - MyList *Bp = blb; - while (Bp) + if (myrank == 0) { - Block *BP = Bp->data; - - for (int i = 0; i < dim; i++) + cout << "ERROR: Patch::Interp_Points fails to find point ("; + for (int d = 0; d < dim; d++) { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; -#else -#ifdef Cell - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif + cout << XX[d][j]; + if (d < dim - 1) + cout << ","; + else + cout << ")"; + } + cout << " on Patch ("; + for (int d = 0; d < dim; d++) + { + cout << bbox[d] << "+" << lli[d] * DH[d]; + if (d < dim - 1) + cout << ","; + else + cout << ")--"; } cout << "("; - for (int j = 0; j < dim; j++) + for (int d = 0; d < dim; d++) { - cout << llb[j] << ":" << uub[j]; - if (j < dim - 1) + cout << bbox[dim + d] << "-" << uui[d] * DH[d]; + if (d < dim - 1) cout << ","; else cout << ")" << endl; } - if (Bp == ble) - break; - Bp = Bp->next; + MPI_Abort(MPI_COMM_WORLD, 1); } + j++; + continue; } -#endif - MPI_Abort(MPI_COMM_WORLD, 1); + // Find contiguous run of points with the same owner + int jstart = j; + while (j < NN && owner_rank[j] == cur_owner) + j++; + int count = (j - jstart) * num_var; + MPI_Bcast(Shellf + jstart * num_var, count, MPI_DOUBLE, cur_owner, MPI_COMM_WORLD); } } - delete[] shellf; - delete[] weight; - delete[] Weight; - delete[] DH; - delete[] llb; - delete[] uub; + delete[] owner_rank; } void Patch::Interp_Points(MyList *VarList, int NN, double **XX, @@ -573,24 +518,22 @@ void Patch::Interp_Points(MyList *VarList, varl = varl->next; } - double *shellf; - shellf = new double[NN * num_var]; - memset(shellf, 0, sizeof(double) * NN * num_var); + memset(Shellf, 0, sizeof(double) * NN * num_var); - // we use weight to monitor code, later some day we can move it for optimization - int *weight; - weight = new int[NN]; - memset(weight, 0, sizeof(int) * NN); + // owner_rank[j] stores the global rank that owns point j + int *owner_rank; + owner_rank = new int[NN]; + for (int j = 0; j < NN; j++) + owner_rank[j] = -1; - double *DH, *llb, *uub; - DH = new double[dim]; + // Build global-to-local rank translation for Comm_here + MPI_Group world_group, local_group; + MPI_Comm_group(MPI_COMM_WORLD, &world_group); + MPI_Comm_group(Comm_here, &local_group); + double DH[dim], llb[dim], uub[dim]; for (int i = 0; i < dim; i++) - { DH[i] = getdX(i); - } - llb = new double[dim]; - uub = new double[dim]; for (int j = 0; j < NN; j++) // run along points { @@ -622,12 +565,6 @@ void Patch::Interp_Points(MyList *VarList, bool flag = true; for (int i = 0; i < dim; i++) { -// NOTE: our dividing structure is (exclude ghost) -// -1 0 -// 1 2 -// so (0,1) does not belong to any part for vertex structure -// here we put (0,0.5) to left part and (0.5,1) to right part -// BUT for cell structure the bbox is (-1.5,0.5) and (0.5,2.5), there is no missing region at all #ifdef Vertex #ifdef Cell #error Both Cell and Vertex are defined @@ -652,6 +589,7 @@ void Patch::Interp_Points(MyList *VarList, if (flag) { notfind = false; + owner_rank[j] = BP->rank; if (myrank == BP->rank) { //---> interpolation @@ -659,14 +597,11 @@ void Patch::Interp_Points(MyList *VarList, int k = 0; while (varl) // run along variables { - // shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn], - // pox,ordn,varl->data->SoA,Symmetry); - f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[j * num_var + k], + f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); varl = varl->next; k++; } - weight[j] = 1; } } if (Bp == ble) @@ -675,97 +610,35 @@ void Patch::Interp_Points(MyList *VarList, } } - MPI_Allreduce(shellf, Shellf, NN * num_var, MPI_DOUBLE, MPI_SUM, Comm_here); - int *Weight; - Weight = new int[NN]; - MPI_Allreduce(weight, Weight, NN, MPI_INT, MPI_SUM, Comm_here); - - // misc::tillherecheck("print me"); - // if(lmyrank == 0) cout<<"myrank = "< 1) + int j = 0; + while (j < NN) { - if (lmyrank == 0) - cout << "WARNING: Patch::Interp_Points meets multiple weight" << endl; - for (int j = 0; j < num_var; j++) - Shellf[j + i * num_var] = Shellf[j + i * num_var] / Weight[i]; + int cur_owner_global = owner_rank[j]; + if (cur_owner_global < 0) + { + // Point not found — skip (error check disabled for sub-communicator levels) + j++; + continue; + } + // Translate global rank to local rank in Comm_here + int cur_owner_local; + MPI_Group_translate_ranks(world_group, 1, &cur_owner_global, local_group, &cur_owner_local); + + // Find contiguous run of points with the same owner + int jstart = j; + while (j < NN && owner_rank[j] == cur_owner_global) + j++; + int count = (j - jstart) * num_var; + MPI_Bcast(Shellf + jstart * num_var, count, MPI_DOUBLE, cur_owner_local, Comm_here); } -#if 0 // for not involved levels, this may fail - else if(Weight[i] == 0 && lmyrank == 0) - { - cout<<"ERROR: Patch::Interp_Points fails to find point ("; - for(int j=0;j *Bp=blb; - while(Bp) - { - Block *BP=Bp->data; - - for(int i=0;ibbox[i] ,bbox[i] ,DH[i]/2)) ? BP->bbox[i]+lli[i]*DH[i] : BP->bbox[i] +(ghost_width-0.5)*DH[i]; - uub[i] = (feq(BP->bbox[dim+i],bbox[dim+i],DH[i]/2)) ? BP->bbox[dim+i]-uui[i]*DH[i] : BP->bbox[dim+i]-(ghost_width-0.5)*DH[i]; -#else -#ifdef Cell - llb[i] = (feq(BP->bbox[i] ,bbox[i] ,DH[i]/2)) ? BP->bbox[i]+lli[i]*DH[i] : BP->bbox[i] +ghost_width*DH[i]; - uub[i] = (feq(BP->bbox[dim+i],bbox[dim+i],DH[i]/2)) ? BP->bbox[dim+i]-uui[i]*DH[i] : BP->bbox[dim+i]-ghost_width*DH[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - } - cout<<"("; - for(int j=0;jnext; - } - } -#endif - MPI_Abort(MPI_COMM_WORLD,1); - } -#endif } - delete[] shellf; - delete[] weight; - delete[] Weight; - delete[] DH; - delete[] llb; - delete[] uub; + MPI_Group_free(&world_group); + MPI_Group_free(&local_group); + delete[] owner_rank; } void Patch::checkBlock() {