Cache repeated interpolation plans

This commit is contained in:
2026-04-09 15:21:01 +08:00
parent 06fa643365
commit 42e851d19a
4 changed files with 428 additions and 370 deletions

View File

@@ -2,13 +2,14 @@
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cstdlib>
#include <cstdio>
#include <string>
#include <cmath>
#include <new>
#include <vector>
using namespace std;
#include <cstdlib>
#include <cstdio>
#include <string>
#include <cmath>
#include <new>
#include <map>
#include <vector>
using namespace std;
#include "misc.h"
#include "MPatch.h"
@@ -40,6 +41,44 @@ struct InterpVarDesc
double soa[dim];
};
struct InterpPlanKey
{
const Patch *patch;
const double *x;
const double *y;
const double *z;
int NN;
int Symmetry;
int myrank;
};
struct InterpPlanKeyLess
{
bool operator()(const InterpPlanKey &lhs, const InterpPlanKey &rhs) const
{
if (lhs.patch != rhs.patch) return lhs.patch < rhs.patch;
if (lhs.x != rhs.x) return lhs.x < rhs.x;
if (lhs.y != rhs.y) return lhs.y < rhs.y;
if (lhs.z != rhs.z) return lhs.z < rhs.z;
if (lhs.NN != rhs.NN) return lhs.NN < rhs.NN;
if (lhs.Symmetry != rhs.Symmetry) return lhs.Symmetry < rhs.Symmetry;
return lhs.myrank < rhs.myrank;
}
};
struct CachedInterpPlan
{
int nblocks;
vector<int> owner_rank;
vector<int> owner_block;
vector<vector<int> > block_points;
vector<vector<double> > block_px;
vector<vector<double> > block_py;
vector<vector<double> > block_pz;
CachedInterpPlan() : nblocks(0) {}
};
struct InterpBlockView
{
Block *bp;
@@ -229,10 +268,124 @@ bool should_try_cuda_interp(int ordn, int num_points, int num_var)
return num_points * num_var >= 256;
}
CachedInterpPlan &get_cached_interp_plan(Patch *patch,
int NN, double **XX,
int Symmetry, int myrank,
const double *DH,
const BlockBinIndex &block_index,
bool report_bounds_here,
bool allow_missing_points)
{
static map<InterpPlanKey, CachedInterpPlan, InterpPlanKeyLess> cache;
InterpPlanKey key;
key.patch = patch;
key.x = XX[0];
key.y = XX[1];
key.z = XX[2];
key.NN = NN;
key.Symmetry = Symmetry;
key.myrank = myrank;
map<InterpPlanKey, CachedInterpPlan, InterpPlanKeyLess>::iterator it = cache.find(key);
if (it != cache.end() && it->second.nblocks == static_cast<int>(block_index.views.size()))
return it->second;
CachedInterpPlan &plan = cache[key];
plan = CachedInterpPlan();
plan.nblocks = static_cast<int>(block_index.views.size());
plan.owner_rank.assign(NN, -1);
plan.owner_block.assign(NN, -1);
plan.block_points.resize(plan.nblocks);
plan.block_px.resize(plan.nblocks);
plan.block_py.resize(plan.nblocks);
plan.block_pz.resize(plan.nblocks);
for (int j = 0; j < NN; ++j)
{
double pox[dim];
for (int i = 0; i < dim; ++i)
{
pox[i] = XX[i][j];
if (report_bounds_here &&
(XX[i][j] < patch->bbox[i] + patch->lli[i] * DH[i] ||
XX[i][j] > patch->bbox[dim + i] - patch->uui[i] * DH[i]))
{
cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; ++k)
{
cout << XX[k][j];
if (k < dim - 1)
cout << ",";
else
cout << ") is out of current Patch." << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
const int block_i = find_block_index_for_point(block_index, pox, DH);
if (block_i >= 0)
{
Block *BP = block_index.views[block_i].bp;
plan.owner_rank[j] = BP->rank;
plan.owner_block[j] = block_i;
if (BP->rank == myrank)
{
plan.block_points[block_i].push_back(j);
plan.block_px[block_i].push_back(XX[0][j]);
plan.block_py[block_i].push_back(XX[1][j]);
plan.block_pz[block_i].push_back(XX[2][j]);
}
}
}
if (!allow_missing_points && report_bounds_here)
{
for (int j = 0; j < NN; ++j)
{
if (plan.owner_rank[j] >= 0)
continue;
cout << "ERROR: Patch::Interp_Points fails to find point (";
for (int d = 0; d < dim; ++d)
{
cout << XX[d][j];
if (d < dim - 1)
cout << ",";
else
cout << ")";
}
cout << " on Patch (";
for (int d = 0; d < dim; ++d)
{
cout << patch->bbox[d] << "+" << patch->lli[d] * DH[d];
if (d < dim - 1)
cout << ",";
else
cout << ")--";
}
cout << "(";
for (int d = 0; d < dim; ++d)
{
cout << patch->bbox[dim + d] << "-" << patch->uui[d] * DH[d];
if (d < dim - 1)
cout << ",";
else
cout << ")" << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
return plan;
}
bool run_cuda_interp_for_block(Block *BP,
const vector<InterpVarDesc> &vars,
const vector<int> &point_ids,
double **XX,
const vector<double> &px,
const vector<double> &py,
const vector<double> &pz,
double *Shellf,
int num_var,
int ordn,
@@ -251,15 +404,6 @@ bool run_cuda_interp_for_block(Block *BP,
}
const int npts = static_cast<int>(point_ids.size());
vector<double> px(npts), py(npts), pz(npts);
for (int p = 0; p < npts; ++p)
{
const int j = point_ids[p];
px[p] = XX[0][j];
py[p] = XX[1][j];
pz[p] = XX[2][j];
}
vector<double> out(static_cast<size_t>(npts) * static_cast<size_t>(num_var));
if (bssn_cuda_interp_points_batch(BP->shape,
BP->X[0], BP->X[1], BP->X[2],
@@ -286,7 +430,9 @@ bool run_cuda_interp_for_block(Block *BP,
void run_cpu_interp_for_block(Block *BP,
const vector<InterpVarDesc> &vars,
const vector<int> &point_ids,
double **XX,
const vector<double> &px,
const vector<double> &py,
const vector<double> &pz,
double *Shellf,
int num_var,
int ordn,
@@ -295,9 +441,9 @@ void run_cpu_interp_for_block(Block *BP,
for (size_t p = 0; p < point_ids.size(); ++p)
{
const int j = point_ids[p];
double x = XX[0][j];
double y = XX[1][j];
double z = XX[2][j];
double x = px[p];
double y = py[p];
double z = pz[p];
int ordn_local = ordn;
int symmetry_local = Symmetry;
for (int v = 0; v < num_var; ++v)
@@ -310,33 +456,34 @@ void run_cpu_interp_for_block(Block *BP,
}
void interpolate_owned_points(MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetry,
int myrank, int ordn,
int ordn,
const BlockBinIndex &block_index,
const int *owner_rank,
const int *owner_block)
const CachedInterpPlan &plan)
{
vector<InterpVarDesc> vars;
collect_interp_vars(VarList, vars);
const int num_var = static_cast<int>(vars.size());
vector<vector<int>> block_points(block_index.views.size());
for (int j = 0; j < NN; ++j)
for (size_t bi = 0; bi < plan.block_points.size(); ++bi)
{
if (owner_rank[j] == myrank && owner_block[j] >= 0)
block_points[owner_block[j]].push_back(j);
}
for (size_t bi = 0; bi < block_points.size(); ++bi)
{
if (block_points[bi].empty())
if (plan.block_points[bi].empty())
continue;
Block *BP = block_index.views[bi].bp;
bool done = run_cuda_interp_for_block(BP, vars, block_points[bi], XX, Shellf, num_var, ordn, Symmetry);
bool done = run_cuda_interp_for_block(BP, vars,
plan.block_points[bi],
plan.block_px[bi],
plan.block_py[bi],
plan.block_pz[bi],
Shellf, num_var, ordn, Symmetry);
if (!done)
run_cpu_interp_for_block(BP, vars, block_points[bi], XX, Shellf, num_var, ordn, Symmetry);
run_cpu_interp_for_block(BP, vars,
plan.block_points[bi],
plan.block_px[bi],
plan.block_py[bi],
plan.block_pz[bi],
Shellf, num_var, ordn, Symmetry);
}
}
} // namespace
@@ -684,55 +831,15 @@ void Patch::Interp_Points(MyList<var> *VarList,
memset(Shellf, 0, sizeof(double) * NN * num_var);
// owner_rank[j] records which MPI rank owns point j
// All ranks traverse the same block list so they all agree on ownership
int *owner_rank;
owner_rank = new int[NN];
int *owner_block;
owner_block = new int[NN];
for (int j = 0; j < NN; j++)
{
owner_rank[j] = -1;
owner_block[j] = -1;
}
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
for (int j = 0; j < NN; j++) // run along points
{
double pox[dim];
for (int i = 0; i < dim; i++)
{
pox[i] = XX[i][j];
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
{
cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; k++)
{
cout << XX[k][j];
if (k < dim - 1)
cout << ",";
else
cout << ") is out of current Patch." << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
const int block_i = find_block_index_for_point(block_index, pox, DH);
if (block_i >= 0)
{
Block *BP = block_index.views[block_i].bp;
owner_rank[j] = BP->rank;
owner_block[j] = block_i;
}
}
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, myrank == 0, false);
const int *owner_rank = plan.owner_rank.data();
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
// Replace MPI_Allreduce with per-owner MPI_Bcast:
// Group consecutive points by owner rank and broadcast each group.
@@ -788,8 +895,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
}
}
delete[] owner_rank;
delete[] owner_block;
}
void Patch::Interp_Points(MyList<var> *VarList,
int NN, double **XX,
@@ -818,98 +923,22 @@ void Patch::Interp_Points(MyList<var> *VarList,
memset(Shellf, 0, sizeof(double) * NN * num_var);
// owner_rank[j] records which MPI rank owns point j
int *owner_rank;
owner_rank = new int[NN];
int *owner_block;
owner_block = new int[NN];
for (int j = 0; j < NN; j++)
{
owner_rank[j] = -1;
owner_block[j] = -1;
}
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
// --- Interpolation phase (identical to original) ---
for (int j = 0; j < NN; j++)
{
double pox[dim];
for (int i = 0; i < dim; i++)
{
pox[i] = XX[i][j];
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
{
cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; k++)
{
cout << XX[k][j];
if (k < dim - 1)
cout << ",";
else
cout << ") is out of current Patch." << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
const int block_i = find_block_index_for_point(block_index, pox, DH);
if (block_i >= 0)
{
Block *BP = block_index.views[block_i].bp;
owner_rank[j] = BP->rank;
owner_block[j] = block_i;
}
}
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, myrank == 0, false);
const int *owner_rank = plan.owner_rank.data();
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
#ifdef INTERP_LB_PROFILE
double t_interp_end = MPI_Wtime();
double t_interp_local = t_interp_end - t_interp_start;
#endif
// --- Error check for unfound points ---
for (int j = 0; j < NN; j++)
{
if (owner_rank[j] < 0 && myrank == 0)
{
cout << "ERROR: Patch::Interp_Points fails to find point (";
for (int d = 0; d < dim; d++)
{
cout << XX[d][j];
if (d < dim - 1)
cout << ",";
else
cout << ")";
}
cout << " on Patch (";
for (int d = 0; d < dim; d++)
{
cout << bbox[d] << "+" << lli[d] * DH[d];
if (d < dim - 1)
cout << ",";
else
cout << ")--";
}
cout << "(";
for (int d = 0; d < dim; d++)
{
cout << bbox[dim + d] << "-" << uui[d] * DH[d];
if (d < dim - 1)
cout << ",";
else
cout << ")" << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
// --- Targeted point-to-point communication phase ---
// --- Targeted point-to-point communication phase ---
// Compute consumer_rank[j] using the same deterministic formula as surface_integral
int *consumer_rank = new int[NN];
{
@@ -1028,8 +1057,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
delete[] send_count;
delete[] recv_count;
delete[] consumer_rank;
delete[] owner_rank;
delete[] owner_block;
#ifdef INTERP_LB_PROFILE
{
@@ -1077,59 +1104,20 @@ void Patch::Interp_Points(MyList<var> *VarList,
memset(Shellf, 0, sizeof(double) * NN * num_var);
// owner_rank[j] stores the global rank that owns point j
int *owner_rank;
owner_rank = new int[NN];
int *owner_block;
owner_block = new int[NN];
for (int j = 0; j < NN; j++)
{
owner_rank[j] = -1;
owner_block[j] = -1;
}
// Build global-to-local rank translation for Comm_here
MPI_Group world_group, local_group;
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
MPI_Comm_group(Comm_here, &local_group);
// Build global-to-local rank translation for Comm_here
MPI_Group world_group, local_group;
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
MPI_Comm_group(Comm_here, &local_group);
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
for (int j = 0; j < NN; j++) // run along points
{
double pox[dim];
for (int i = 0; i < dim; i++)
{
pox[i] = XX[i][j];
if (lmyrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
{
cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; k++)
{
cout << XX[k][j];
if (k < dim - 1)
cout << ",";
else
cout << ") is out of current Patch." << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
const int block_i = find_block_index_for_point(block_index, pox, DH);
if (block_i >= 0)
{
Block *BP = block_index.views[block_i].bp;
owner_rank[j] = BP->rank;
owner_block[j] = block_i;
}
}
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, lmyrank == 0, true);
const int *owner_rank = plan.owner_rank.data();
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
// Collect unique global owner ranks and translate to local ranks in Comm_here
// Then broadcast each owner's points via MPI_Bcast on Comm_here
@@ -1159,8 +1147,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
MPI_Group_free(&world_group);
MPI_Group_free(&local_group);
delete[] owner_rank;
delete[] owner_block;
}
void Patch::checkBlock()
{