Add targeted point-to-point Interp_Points overload for surface_integral

Instead of broadcasting all interpolated point data to every MPI rank,
the new overload sends each point only to the one rank that needs it
for integration, reducing communication volume by ~nprocs times.

The consumer rank is computed deterministically using the same Nmin/Nmax
work distribution formula used by surface_integral callers. Two active
call sites (surf_Wave and surf_MassPAng with MPI_COMM_WORLD) now use
the new overload. Other callers (ShellPatch, Comm_here variants, etc.)
remain unchanged.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-02-10 19:18:56 +08:00
parent 50e2a845f8
commit d06d5b4db8
3 changed files with 289 additions and 23 deletions

View File

@@ -499,6 +499,272 @@ void Patch::Interp_Points(MyList<var> *VarList,
delete[] owner_rank; delete[] owner_rank;
} }
void Patch::Interp_Points(MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetry,
int Nmin_consumer, int Nmax_consumer)
{
// Targeted point-to-point overload: each owner sends each point only to
// the one rank that needs it for integration (consumer), reducing
// communication volume by ~nprocs times compared to the Bcast version.
int myrank, nprocs;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
int ordn = 2 * ghost_width;
MyList<var> *varl;
int num_var = 0;
varl = VarList;
while (varl)
{
num_var++;
varl = varl->next;
}
memset(Shellf, 0, sizeof(double) * NN * num_var);
// owner_rank[j] records which MPI rank owns point j
int *owner_rank;
owner_rank = new int[NN];
for (int j = 0; j < NN; j++)
owner_rank[j] = -1;
double DH[dim], llb[dim], uub[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
// --- Interpolation phase (identical to original) ---
for (int j = 0; j < NN; j++)
{
double pox[dim];
for (int i = 0; i < dim; i++)
{
pox[i] = XX[i][j];
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
{
cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; k++)
{
cout << XX[k][j];
if (k < dim - 1)
cout << ",";
else
cout << ") is out of current Patch." << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
MyList<Block> *Bp = blb;
bool notfind = true;
while (notfind && Bp)
{
Block *BP = Bp->data;
bool flag = true;
for (int i = 0; i < dim; i++)
{
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{
flag = false;
break;
}
}
if (flag)
{
notfind = false;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
varl = VarList;
int k = 0;
while (varl)
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
}
}
if (Bp == ble)
break;
Bp = Bp->next;
}
}
// --- Error check for unfound points ---
for (int j = 0; j < NN; j++)
{
if (owner_rank[j] < 0 && myrank == 0)
{
cout << "ERROR: Patch::Interp_Points fails to find point (";
for (int d = 0; d < dim; d++)
{
cout << XX[d][j];
if (d < dim - 1)
cout << ",";
else
cout << ")";
}
cout << " on Patch (";
for (int d = 0; d < dim; d++)
{
cout << bbox[d] << "+" << lli[d] * DH[d];
if (d < dim - 1)
cout << ",";
else
cout << ")--";
}
cout << "(";
for (int d = 0; d < dim; d++)
{
cout << bbox[dim + d] << "-" << uui[d] * DH[d];
if (d < dim - 1)
cout << ",";
else
cout << ")" << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
// --- Targeted point-to-point communication phase ---
// Compute consumer_rank[j] using the same deterministic formula as surface_integral
int *consumer_rank = new int[NN];
{
int mp = NN / nprocs;
int Lp = NN - nprocs * mp;
for (int j = 0; j < NN; j++)
{
if (j < Lp * (mp + 1))
consumer_rank[j] = j / (mp + 1);
else
consumer_rank[j] = Lp + (j - Lp * (mp + 1)) / mp;
}
}
// Count sends and recvs per rank
int *send_count = new int[nprocs];
int *recv_count = new int[nprocs];
memset(send_count, 0, sizeof(int) * nprocs);
memset(recv_count, 0, sizeof(int) * nprocs);
for (int j = 0; j < NN; j++)
{
int own = owner_rank[j];
int con = consumer_rank[j];
if (own == con)
continue; // local — no communication needed
if (own == myrank)
send_count[con]++;
if (con == myrank)
recv_count[own]++;
}
// Build send buffers: for each destination rank, pack (index, data) pairs
// Each entry: 1 int (point index j) + num_var doubles
int total_send = 0, total_recv = 0;
int *send_offset = new int[nprocs];
int *recv_offset = new int[nprocs];
for (int r = 0; r < nprocs; r++)
{
send_offset[r] = total_send;
total_send += send_count[r];
recv_offset[r] = total_recv;
total_recv += recv_count[r];
}
// Pack send buffers: each message contains (j, data[0..num_var-1]) per point
int stride = 1 + num_var; // 1 double for index + num_var doubles for data
double *sendbuf = new double[total_send * stride];
double *recvbuf = new double[total_recv * stride];
// Temporary counters for packing
int *pack_pos = new int[nprocs];
memset(pack_pos, 0, sizeof(int) * nprocs);
for (int j = 0; j < NN; j++)
{
int own = owner_rank[j];
int con = consumer_rank[j];
if (own != myrank || con == myrank)
continue;
int pos = (send_offset[con] + pack_pos[con]) * stride;
sendbuf[pos] = (double)j; // point index
for (int v = 0; v < num_var; v++)
sendbuf[pos + 1 + v] = Shellf[j * num_var + v];
pack_pos[con]++;
}
// Post non-blocking recvs and sends
int n_req = 0;
for (int r = 0; r < nprocs; r++)
{
if (recv_count[r] > 0) n_req++;
if (send_count[r] > 0) n_req++;
}
MPI_Request *reqs = new MPI_Request[n_req];
int req_idx = 0;
for (int r = 0; r < nprocs; r++)
{
if (recv_count[r] > 0)
{
MPI_Irecv(recvbuf + recv_offset[r] * stride,
recv_count[r] * stride, MPI_DOUBLE,
r, 0, MPI_COMM_WORLD, &reqs[req_idx++]);
}
}
for (int r = 0; r < nprocs; r++)
{
if (send_count[r] > 0)
{
MPI_Isend(sendbuf + send_offset[r] * stride,
send_count[r] * stride, MPI_DOUBLE,
r, 0, MPI_COMM_WORLD, &reqs[req_idx++]);
}
}
if (n_req > 0)
MPI_Waitall(n_req, reqs, MPI_STATUSES_IGNORE);
// Unpack recv buffers into Shellf
for (int i = 0; i < total_recv; i++)
{
int pos = i * stride;
int j = (int)recvbuf[pos];
for (int v = 0; v < num_var; v++)
Shellf[j * num_var + v] = recvbuf[pos + 1 + v];
}
delete[] reqs;
delete[] sendbuf;
delete[] recvbuf;
delete[] pack_pos;
delete[] send_offset;
delete[] recv_offset;
delete[] send_count;
delete[] recv_count;
delete[] consumer_rank;
delete[] owner_rank;
}
void Patch::Interp_Points(MyList<var> *VarList, void Patch::Interp_Points(MyList<var> *VarList,
int NN, double **XX, int NN, double **XX,
double *Shellf, int Symmetry, MPI_Comm Comm_here) double *Shellf, int Symmetry, MPI_Comm Comm_here)

View File

@@ -39,6 +39,10 @@ public:
bool Find_Point(double *XX); bool Find_Point(double *XX);
void Interp_Points(MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetry,
int Nmin_consumer, int Nmax_consumer);
void Interp_Points(MyList<var> *VarList, void Interp_Points(MyList<var> *VarList,
int NN, double **XX, int NN, double **XX,
double *Shellf, int Symmetry, MPI_Comm Comm_here); double *Shellf, int Symmetry, MPI_Comm Comm_here);

View File

@@ -220,16 +220,9 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
pox[2][n] = rex * nz_g[n]; pox[2][n] = rex * nz_g[n];
} }
double *shellf;
shellf = new double[n_tot * InList];
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry);
int mp, Lp, Nmin, Nmax; int mp, Lp, Nmin, Nmax;
mp = n_tot / cpusize; mp = n_tot / cpusize;
Lp = n_tot - cpusize * mp; Lp = n_tot - cpusize * mp;
if (Lp > myrank) if (Lp > myrank)
{ {
Nmin = myrank * mp + myrank; Nmin = myrank * mp + myrank;
@@ -241,6 +234,11 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
Nmax = Nmin + mp - 1; Nmax = Nmin + mp - 1;
} }
double *shellf;
shellf = new double[n_tot * InList];
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax);
//|~~~~~> Integrate the dot product of Dphi with the surface normal. //|~~~~~> Integrate the dot product of Dphi with the surface normal.
double *RP_out, *IP_out; double *RP_out, *IP_out;
@@ -2386,25 +2384,9 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var
pox[2][n] = rex * nz_g[n]; pox[2][n] = rex * nz_g[n];
} }
double *shellf;
shellf = new double[n_tot * InList];
// we have assumed there is only one box on this level,
// so we do not need loop boxes
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry);
double Mass_out = 0;
double ang_outx, ang_outy, ang_outz;
double p_outx, p_outy, p_outz;
ang_outx = ang_outy = ang_outz = 0.0;
p_outx = p_outy = p_outz = 0.0;
const double f1o8 = 0.125;
int mp, Lp, Nmin, Nmax; int mp, Lp, Nmin, Nmax;
mp = n_tot / cpusize; mp = n_tot / cpusize;
Lp = n_tot - cpusize * mp; Lp = n_tot - cpusize * mp;
if (Lp > myrank) if (Lp > myrank)
{ {
Nmin = myrank * mp + myrank; Nmin = myrank * mp + myrank;
@@ -2416,6 +2398,20 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var
Nmax = Nmin + mp - 1; Nmax = Nmin + mp - 1;
} }
double *shellf;
shellf = new double[n_tot * InList];
// we have assumed there is only one box on this level,
// so we do not need loop boxes
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax);
double Mass_out = 0;
double ang_outx, ang_outy, ang_outz;
double p_outx, p_outy, p_outz;
ang_outx = ang_outy = ang_outz = 0.0;
p_outx = p_outy = p_outz = 0.0;
const double f1o8 = 0.125;
double Chi, Psi; double Chi, Psi;
double Gxx, Gxy, Gxz, Gyy, Gyz, Gzz; double Gxx, Gxy, Gxz, Gyy, Gyz, Gzz;
double gupxx, gupxy, gupxz, gupyy, gupyz, gupzz; double gupxx, gupxy, gupxz, gupyy, gupyz, gupzz;