diff --git a/AMSS_NCKU_source/MPatch.C b/AMSS_NCKU_source/MPatch.C index 54652a0..91ead8a 100644 --- a/AMSS_NCKU_source/MPatch.C +++ b/AMSS_NCKU_source/MPatch.C @@ -499,6 +499,272 @@ void Patch::Interp_Points(MyList *VarList, delete[] owner_rank; } +void Patch::Interp_Points(MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry, + int Nmin_consumer, int Nmax_consumer) +{ + // Targeted point-to-point overload: each owner sends each point only to + // the one rank that needs it for integration (consumer), reducing + // communication volume by ~nprocs times compared to the Bcast version. + int myrank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + int ordn = 2 * ghost_width; + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + memset(Shellf, 0, sizeof(double) * NN * num_var); + + // owner_rank[j] records which MPI rank owns point j + int *owner_rank; + owner_rank = new int[NN]; + for (int j = 0; j < NN; j++) + owner_rank[j] = -1; + + double DH[dim], llb[dim], uub[dim]; + for (int i = 0; i < dim; i++) + DH[i] = getdX(i); + + // --- Interpolation phase (identical to original) --- + for (int j = 0; j < NN; j++) + { + double pox[dim]; + for (int i = 0; i < dim; i++) + { + pox[i] = XX[i][j]; + if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i])) + { + cout << "Patch::Interp_Points: point ("; + for (int k = 0; k < dim; k++) + { + cout << XX[k][j]; + if (k < dim - 1) + cout << ","; + else + cout << ") is out of current Patch." << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + MyList *Bp = blb; + bool notfind = true; + while (notfind && Bp) + { + Block *BP = Bp->data; + + bool flag = true; + for (int i = 0; i < dim; i++) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; +#else +#ifdef Cell + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2) + { + flag = false; + break; + } + } + + if (flag) + { + notfind = false; + owner_rank[j] = BP->rank; + if (myrank == BP->rank) + { + varl = VarList; + int k = 0; + while (varl) + { + f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], + pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); + varl = varl->next; + k++; + } + } + } + if (Bp == ble) + break; + Bp = Bp->next; + } + } + + // --- Error check for unfound points --- + for (int j = 0; j < NN; j++) + { + if (owner_rank[j] < 0 && myrank == 0) + { + cout << "ERROR: Patch::Interp_Points fails to find point ("; + for (int d = 0; d < dim; d++) + { + cout << XX[d][j]; + if (d < dim - 1) + cout << ","; + else + cout << ")"; + } + cout << " on Patch ("; + for (int d = 0; d < dim; d++) + { + cout << bbox[d] << "+" << lli[d] * DH[d]; + if (d < dim - 1) + cout << ","; + else + cout << ")--"; + } + cout << "("; + for (int d = 0; d < dim; d++) + { + cout << bbox[dim + d] << "-" << uui[d] * DH[d]; + if (d < dim - 1) + cout << ","; + else + cout << ")" << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + // --- Targeted point-to-point communication phase --- + // Compute consumer_rank[j] using the same deterministic formula as surface_integral + int *consumer_rank = new int[NN]; + { + int mp = NN / nprocs; + int Lp = NN - nprocs * mp; + for (int j = 0; j < NN; j++) + { + if (j < Lp * (mp + 1)) + consumer_rank[j] = j / (mp + 1); + else + consumer_rank[j] = Lp + (j - Lp * (mp + 1)) / mp; + } + } + + // Count sends and recvs per rank + int *send_count = new int[nprocs]; + int *recv_count = new int[nprocs]; + memset(send_count, 0, sizeof(int) * nprocs); + memset(recv_count, 0, sizeof(int) * nprocs); + + for (int j = 0; j < NN; j++) + { + int own = owner_rank[j]; + int con = consumer_rank[j]; + if (own == con) + continue; // local — no communication needed + if (own == myrank) + send_count[con]++; + if (con == myrank) + recv_count[own]++; + } + + // Build send buffers: for each destination rank, pack (index, data) pairs + // Each entry: 1 int (point index j) + num_var doubles + int total_send = 0, total_recv = 0; + int *send_offset = new int[nprocs]; + int *recv_offset = new int[nprocs]; + for (int r = 0; r < nprocs; r++) + { + send_offset[r] = total_send; + total_send += send_count[r]; + recv_offset[r] = total_recv; + total_recv += recv_count[r]; + } + + // Pack send buffers: each message contains (j, data[0..num_var-1]) per point + int stride = 1 + num_var; // 1 double for index + num_var doubles for data + double *sendbuf = new double[total_send * stride]; + double *recvbuf = new double[total_recv * stride]; + + // Temporary counters for packing + int *pack_pos = new int[nprocs]; + memset(pack_pos, 0, sizeof(int) * nprocs); + + for (int j = 0; j < NN; j++) + { + int own = owner_rank[j]; + int con = consumer_rank[j]; + if (own != myrank || con == myrank) + continue; + int pos = (send_offset[con] + pack_pos[con]) * stride; + sendbuf[pos] = (double)j; // point index + for (int v = 0; v < num_var; v++) + sendbuf[pos + 1 + v] = Shellf[j * num_var + v]; + pack_pos[con]++; + } + + // Post non-blocking recvs and sends + int n_req = 0; + for (int r = 0; r < nprocs; r++) + { + if (recv_count[r] > 0) n_req++; + if (send_count[r] > 0) n_req++; + } + + MPI_Request *reqs = new MPI_Request[n_req]; + int req_idx = 0; + + for (int r = 0; r < nprocs; r++) + { + if (recv_count[r] > 0) + { + MPI_Irecv(recvbuf + recv_offset[r] * stride, + recv_count[r] * stride, MPI_DOUBLE, + r, 0, MPI_COMM_WORLD, &reqs[req_idx++]); + } + } + for (int r = 0; r < nprocs; r++) + { + if (send_count[r] > 0) + { + MPI_Isend(sendbuf + send_offset[r] * stride, + send_count[r] * stride, MPI_DOUBLE, + r, 0, MPI_COMM_WORLD, &reqs[req_idx++]); + } + } + + if (n_req > 0) + MPI_Waitall(n_req, reqs, MPI_STATUSES_IGNORE); + + // Unpack recv buffers into Shellf + for (int i = 0; i < total_recv; i++) + { + int pos = i * stride; + int j = (int)recvbuf[pos]; + for (int v = 0; v < num_var; v++) + Shellf[j * num_var + v] = recvbuf[pos + 1 + v]; + } + + delete[] reqs; + delete[] sendbuf; + delete[] recvbuf; + delete[] pack_pos; + delete[] send_offset; + delete[] recv_offset; + delete[] send_count; + delete[] recv_count; + delete[] consumer_rank; + delete[] owner_rank; +} void Patch::Interp_Points(MyList *VarList, int NN, double **XX, double *Shellf, int Symmetry, MPI_Comm Comm_here) diff --git a/AMSS_NCKU_source/MPatch.h b/AMSS_NCKU_source/MPatch.h index ca19ca5..b993be6 100644 --- a/AMSS_NCKU_source/MPatch.h +++ b/AMSS_NCKU_source/MPatch.h @@ -39,6 +39,10 @@ public: bool Find_Point(double *XX); + void Interp_Points(MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry, + int Nmin_consumer, int Nmax_consumer); void Interp_Points(MyList *VarList, int NN, double **XX, double *Shellf, int Symmetry, MPI_Comm Comm_here); diff --git a/AMSS_NCKU_source/surface_integral.C b/AMSS_NCKU_source/surface_integral.C index e725ae0..c2b7b67 100644 --- a/AMSS_NCKU_source/surface_integral.C +++ b/AMSS_NCKU_source/surface_integral.C @@ -220,16 +220,9 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var * pox[2][n] = rex * nz_g[n]; } - double *shellf; - shellf = new double[n_tot * InList]; - - GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); - int mp, Lp, Nmin, Nmax; - mp = n_tot / cpusize; Lp = n_tot - cpusize * mp; - if (Lp > myrank) { Nmin = myrank * mp + myrank; @@ -241,6 +234,11 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var * Nmax = Nmin + mp - 1; } + double *shellf; + shellf = new double[n_tot * InList]; + + GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax); + //|~~~~~> Integrate the dot product of Dphi with the surface normal. double *RP_out, *IP_out; @@ -2386,25 +2384,9 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var pox[2][n] = rex * nz_g[n]; } - double *shellf; - shellf = new double[n_tot * InList]; - - // we have assumed there is only one box on this level, - // so we do not need loop boxes - GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); - - double Mass_out = 0; - double ang_outx, ang_outy, ang_outz; - double p_outx, p_outy, p_outz; - ang_outx = ang_outy = ang_outz = 0.0; - p_outx = p_outy = p_outz = 0.0; - const double f1o8 = 0.125; - int mp, Lp, Nmin, Nmax; - mp = n_tot / cpusize; Lp = n_tot - cpusize * mp; - if (Lp > myrank) { Nmin = myrank * mp + myrank; @@ -2416,6 +2398,20 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var Nmax = Nmin + mp - 1; } + double *shellf; + shellf = new double[n_tot * InList]; + + // we have assumed there is only one box on this level, + // so we do not need loop boxes + GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax); + + double Mass_out = 0; + double ang_outx, ang_outy, ang_outz; + double p_outx, p_outy, p_outz; + ang_outx = ang_outy = ang_outz = 0.0; + p_outx = p_outy = p_outz = 0.0; + const double f1o8 = 0.125; + double Chi, Psi; double Gxx, Gxy, Gxz, Gyy, Gyz, Gzz; double gupxx, gupxy, gupxz, gupyy, gupyz, gupzz;