From d06d5b4db80660e3e277667f08b93f6149af1c7f Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Tue, 10 Feb 2026 19:18:56 +0800 Subject: [PATCH] Add targeted point-to-point Interp_Points overload for surface_integral Instead of broadcasting all interpolated point data to every MPI rank, the new overload sends each point only to the one rank that needs it for integration, reducing communication volume by ~nprocs times. The consumer rank is computed deterministically using the same Nmin/Nmax work distribution formula used by surface_integral callers. Two active call sites (surf_Wave and surf_MassPAng with MPI_COMM_WORLD) now use the new overload. Other callers (ShellPatch, Comm_here variants, etc.) remain unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) --- AMSS_NCKU_source/MPatch.C | 266 ++++++++++++++++++++++++++++ AMSS_NCKU_source/MPatch.h | 4 + AMSS_NCKU_source/surface_integral.C | 42 ++--- 3 files changed, 289 insertions(+), 23 deletions(-) diff --git a/AMSS_NCKU_source/MPatch.C b/AMSS_NCKU_source/MPatch.C index 54652a0..91ead8a 100644 --- a/AMSS_NCKU_source/MPatch.C +++ b/AMSS_NCKU_source/MPatch.C @@ -499,6 +499,272 @@ void Patch::Interp_Points(MyList *VarList, delete[] owner_rank; } +void Patch::Interp_Points(MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry, + int Nmin_consumer, int Nmax_consumer) +{ + // Targeted point-to-point overload: each owner sends each point only to + // the one rank that needs it for integration (consumer), reducing + // communication volume by ~nprocs times compared to the Bcast version. + int myrank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + int ordn = 2 * ghost_width; + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + memset(Shellf, 0, sizeof(double) * NN * num_var); + + // owner_rank[j] records which MPI rank owns point j + int *owner_rank; + owner_rank = new int[NN]; + for (int j = 0; j < NN; j++) + owner_rank[j] = -1; + + double DH[dim], llb[dim], uub[dim]; + for (int i = 0; i < dim; i++) + DH[i] = getdX(i); + + // --- Interpolation phase (identical to original) --- + for (int j = 0; j < NN; j++) + { + double pox[dim]; + for (int i = 0; i < dim; i++) + { + pox[i] = XX[i][j]; + if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i])) + { + cout << "Patch::Interp_Points: point ("; + for (int k = 0; k < dim; k++) + { + cout << XX[k][j]; + if (k < dim - 1) + cout << ","; + else + cout << ") is out of current Patch." << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + MyList *Bp = blb; + bool notfind = true; + while (notfind && Bp) + { + Block *BP = Bp->data; + + bool flag = true; + for (int i = 0; i < dim; i++) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; +#else +#ifdef Cell + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2) + { + flag = false; + break; + } + } + + if (flag) + { + notfind = false; + owner_rank[j] = BP->rank; + if (myrank == BP->rank) + { + varl = VarList; + int k = 0; + while (varl) + { + f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], + pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); + varl = varl->next; + k++; + } + } + } + if (Bp == ble) + break; + Bp = Bp->next; + } + } + + // --- Error check for unfound points --- + for (int j = 0; j < NN; j++) + { + if (owner_rank[j] < 0 && myrank == 0) + { + cout << "ERROR: Patch::Interp_Points fails to find point ("; + for (int d = 0; d < dim; d++) + { + cout << XX[d][j]; + if (d < dim - 1) + cout << ","; + else + cout << ")"; + } + cout << " on Patch ("; + for (int d = 0; d < dim; d++) + { + cout << bbox[d] << "+" << lli[d] * DH[d]; + if (d < dim - 1) + cout << ","; + else + cout << ")--"; + } + cout << "("; + for (int d = 0; d < dim; d++) + { + cout << bbox[dim + d] << "-" << uui[d] * DH[d]; + if (d < dim - 1) + cout << ","; + else + cout << ")" << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + // --- Targeted point-to-point communication phase --- + // Compute consumer_rank[j] using the same deterministic formula as surface_integral + int *consumer_rank = new int[NN]; + { + int mp = NN / nprocs; + int Lp = NN - nprocs * mp; + for (int j = 0; j < NN; j++) + { + if (j < Lp * (mp + 1)) + consumer_rank[j] = j / (mp + 1); + else + consumer_rank[j] = Lp + (j - Lp * (mp + 1)) / mp; + } + } + + // Count sends and recvs per rank + int *send_count = new int[nprocs]; + int *recv_count = new int[nprocs]; + memset(send_count, 0, sizeof(int) * nprocs); + memset(recv_count, 0, sizeof(int) * nprocs); + + for (int j = 0; j < NN; j++) + { + int own = owner_rank[j]; + int con = consumer_rank[j]; + if (own == con) + continue; // local — no communication needed + if (own == myrank) + send_count[con]++; + if (con == myrank) + recv_count[own]++; + } + + // Build send buffers: for each destination rank, pack (index, data) pairs + // Each entry: 1 int (point index j) + num_var doubles + int total_send = 0, total_recv = 0; + int *send_offset = new int[nprocs]; + int *recv_offset = new int[nprocs]; + for (int r = 0; r < nprocs; r++) + { + send_offset[r] = total_send; + total_send += send_count[r]; + recv_offset[r] = total_recv; + total_recv += recv_count[r]; + } + + // Pack send buffers: each message contains (j, data[0..num_var-1]) per point + int stride = 1 + num_var; // 1 double for index + num_var doubles for data + double *sendbuf = new double[total_send * stride]; + double *recvbuf = new double[total_recv * stride]; + + // Temporary counters for packing + int *pack_pos = new int[nprocs]; + memset(pack_pos, 0, sizeof(int) * nprocs); + + for (int j = 0; j < NN; j++) + { + int own = owner_rank[j]; + int con = consumer_rank[j]; + if (own != myrank || con == myrank) + continue; + int pos = (send_offset[con] + pack_pos[con]) * stride; + sendbuf[pos] = (double)j; // point index + for (int v = 0; v < num_var; v++) + sendbuf[pos + 1 + v] = Shellf[j * num_var + v]; + pack_pos[con]++; + } + + // Post non-blocking recvs and sends + int n_req = 0; + for (int r = 0; r < nprocs; r++) + { + if (recv_count[r] > 0) n_req++; + if (send_count[r] > 0) n_req++; + } + + MPI_Request *reqs = new MPI_Request[n_req]; + int req_idx = 0; + + for (int r = 0; r < nprocs; r++) + { + if (recv_count[r] > 0) + { + MPI_Irecv(recvbuf + recv_offset[r] * stride, + recv_count[r] * stride, MPI_DOUBLE, + r, 0, MPI_COMM_WORLD, &reqs[req_idx++]); + } + } + for (int r = 0; r < nprocs; r++) + { + if (send_count[r] > 0) + { + MPI_Isend(sendbuf + send_offset[r] * stride, + send_count[r] * stride, MPI_DOUBLE, + r, 0, MPI_COMM_WORLD, &reqs[req_idx++]); + } + } + + if (n_req > 0) + MPI_Waitall(n_req, reqs, MPI_STATUSES_IGNORE); + + // Unpack recv buffers into Shellf + for (int i = 0; i < total_recv; i++) + { + int pos = i * stride; + int j = (int)recvbuf[pos]; + for (int v = 0; v < num_var; v++) + Shellf[j * num_var + v] = recvbuf[pos + 1 + v]; + } + + delete[] reqs; + delete[] sendbuf; + delete[] recvbuf; + delete[] pack_pos; + delete[] send_offset; + delete[] recv_offset; + delete[] send_count; + delete[] recv_count; + delete[] consumer_rank; + delete[] owner_rank; +} void Patch::Interp_Points(MyList *VarList, int NN, double **XX, double *Shellf, int Symmetry, MPI_Comm Comm_here) diff --git a/AMSS_NCKU_source/MPatch.h b/AMSS_NCKU_source/MPatch.h index ca19ca5..b993be6 100644 --- a/AMSS_NCKU_source/MPatch.h +++ b/AMSS_NCKU_source/MPatch.h @@ -39,6 +39,10 @@ public: bool Find_Point(double *XX); + void Interp_Points(MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry, + int Nmin_consumer, int Nmax_consumer); void Interp_Points(MyList *VarList, int NN, double **XX, double *Shellf, int Symmetry, MPI_Comm Comm_here); diff --git a/AMSS_NCKU_source/surface_integral.C b/AMSS_NCKU_source/surface_integral.C index e725ae0..c2b7b67 100644 --- a/AMSS_NCKU_source/surface_integral.C +++ b/AMSS_NCKU_source/surface_integral.C @@ -220,16 +220,9 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var * pox[2][n] = rex * nz_g[n]; } - double *shellf; - shellf = new double[n_tot * InList]; - - GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); - int mp, Lp, Nmin, Nmax; - mp = n_tot / cpusize; Lp = n_tot - cpusize * mp; - if (Lp > myrank) { Nmin = myrank * mp + myrank; @@ -241,6 +234,11 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var * Nmax = Nmin + mp - 1; } + double *shellf; + shellf = new double[n_tot * InList]; + + GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax); + //|~~~~~> Integrate the dot product of Dphi with the surface normal. double *RP_out, *IP_out; @@ -2386,25 +2384,9 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var pox[2][n] = rex * nz_g[n]; } - double *shellf; - shellf = new double[n_tot * InList]; - - // we have assumed there is only one box on this level, - // so we do not need loop boxes - GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); - - double Mass_out = 0; - double ang_outx, ang_outy, ang_outz; - double p_outx, p_outy, p_outz; - ang_outx = ang_outy = ang_outz = 0.0; - p_outx = p_outy = p_outz = 0.0; - const double f1o8 = 0.125; - int mp, Lp, Nmin, Nmax; - mp = n_tot / cpusize; Lp = n_tot - cpusize * mp; - if (Lp > myrank) { Nmin = myrank * mp + myrank; @@ -2416,6 +2398,20 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var Nmax = Nmin + mp - 1; } + double *shellf; + shellf = new double[n_tot * InList]; + + // we have assumed there is only one box on this level, + // so we do not need loop boxes + GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax); + + double Mass_out = 0; + double ang_outx, ang_outy, ang_outz; + double p_outx, p_outy, p_outz; + ang_outx = ang_outy = ang_outz = 0.0; + p_outx = p_outy = p_outz = 0.0; + const double f1o8 = 0.125; + double Chi, Psi; double Gxx, Gxy, Gxz, Gyy, Gyz, Gzz; double gupxx, gupxy, gupxz, gupyy, gupyz, gupzz;