Cache repeated interpolation plans
This commit is contained in:
@@ -2,13 +2,14 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
#include <new>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
#include <new>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
#include "misc.h"
|
||||
#include "MPatch.h"
|
||||
@@ -40,6 +41,44 @@ struct InterpVarDesc
|
||||
double soa[dim];
|
||||
};
|
||||
|
||||
struct InterpPlanKey
|
||||
{
|
||||
const Patch *patch;
|
||||
const double *x;
|
||||
const double *y;
|
||||
const double *z;
|
||||
int NN;
|
||||
int Symmetry;
|
||||
int myrank;
|
||||
};
|
||||
|
||||
struct InterpPlanKeyLess
|
||||
{
|
||||
bool operator()(const InterpPlanKey &lhs, const InterpPlanKey &rhs) const
|
||||
{
|
||||
if (lhs.patch != rhs.patch) return lhs.patch < rhs.patch;
|
||||
if (lhs.x != rhs.x) return lhs.x < rhs.x;
|
||||
if (lhs.y != rhs.y) return lhs.y < rhs.y;
|
||||
if (lhs.z != rhs.z) return lhs.z < rhs.z;
|
||||
if (lhs.NN != rhs.NN) return lhs.NN < rhs.NN;
|
||||
if (lhs.Symmetry != rhs.Symmetry) return lhs.Symmetry < rhs.Symmetry;
|
||||
return lhs.myrank < rhs.myrank;
|
||||
}
|
||||
};
|
||||
|
||||
struct CachedInterpPlan
|
||||
{
|
||||
int nblocks;
|
||||
vector<int> owner_rank;
|
||||
vector<int> owner_block;
|
||||
vector<vector<int> > block_points;
|
||||
vector<vector<double> > block_px;
|
||||
vector<vector<double> > block_py;
|
||||
vector<vector<double> > block_pz;
|
||||
|
||||
CachedInterpPlan() : nblocks(0) {}
|
||||
};
|
||||
|
||||
struct InterpBlockView
|
||||
{
|
||||
Block *bp;
|
||||
@@ -229,10 +268,124 @@ bool should_try_cuda_interp(int ordn, int num_points, int num_var)
|
||||
return num_points * num_var >= 256;
|
||||
}
|
||||
|
||||
CachedInterpPlan &get_cached_interp_plan(Patch *patch,
|
||||
int NN, double **XX,
|
||||
int Symmetry, int myrank,
|
||||
const double *DH,
|
||||
const BlockBinIndex &block_index,
|
||||
bool report_bounds_here,
|
||||
bool allow_missing_points)
|
||||
{
|
||||
static map<InterpPlanKey, CachedInterpPlan, InterpPlanKeyLess> cache;
|
||||
|
||||
InterpPlanKey key;
|
||||
key.patch = patch;
|
||||
key.x = XX[0];
|
||||
key.y = XX[1];
|
||||
key.z = XX[2];
|
||||
key.NN = NN;
|
||||
key.Symmetry = Symmetry;
|
||||
key.myrank = myrank;
|
||||
|
||||
map<InterpPlanKey, CachedInterpPlan, InterpPlanKeyLess>::iterator it = cache.find(key);
|
||||
if (it != cache.end() && it->second.nblocks == static_cast<int>(block_index.views.size()))
|
||||
return it->second;
|
||||
|
||||
CachedInterpPlan &plan = cache[key];
|
||||
plan = CachedInterpPlan();
|
||||
plan.nblocks = static_cast<int>(block_index.views.size());
|
||||
plan.owner_rank.assign(NN, -1);
|
||||
plan.owner_block.assign(NN, -1);
|
||||
plan.block_points.resize(plan.nblocks);
|
||||
plan.block_px.resize(plan.nblocks);
|
||||
plan.block_py.resize(plan.nblocks);
|
||||
plan.block_pz.resize(plan.nblocks);
|
||||
|
||||
for (int j = 0; j < NN; ++j)
|
||||
{
|
||||
double pox[dim];
|
||||
for (int i = 0; i < dim; ++i)
|
||||
{
|
||||
pox[i] = XX[i][j];
|
||||
if (report_bounds_here &&
|
||||
(XX[i][j] < patch->bbox[i] + patch->lli[i] * DH[i] ||
|
||||
XX[i][j] > patch->bbox[dim + i] - patch->uui[i] * DH[i]))
|
||||
{
|
||||
cout << "Patch::Interp_Points: point (";
|
||||
for (int k = 0; k < dim; ++k)
|
||||
{
|
||||
cout << XX[k][j];
|
||||
if (k < dim - 1)
|
||||
cout << ",";
|
||||
else
|
||||
cout << ") is out of current Patch." << endl;
|
||||
}
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
|
||||
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
||||
if (block_i >= 0)
|
||||
{
|
||||
Block *BP = block_index.views[block_i].bp;
|
||||
plan.owner_rank[j] = BP->rank;
|
||||
plan.owner_block[j] = block_i;
|
||||
if (BP->rank == myrank)
|
||||
{
|
||||
plan.block_points[block_i].push_back(j);
|
||||
plan.block_px[block_i].push_back(XX[0][j]);
|
||||
plan.block_py[block_i].push_back(XX[1][j]);
|
||||
plan.block_pz[block_i].push_back(XX[2][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!allow_missing_points && report_bounds_here)
|
||||
{
|
||||
for (int j = 0; j < NN; ++j)
|
||||
{
|
||||
if (plan.owner_rank[j] >= 0)
|
||||
continue;
|
||||
cout << "ERROR: Patch::Interp_Points fails to find point (";
|
||||
for (int d = 0; d < dim; ++d)
|
||||
{
|
||||
cout << XX[d][j];
|
||||
if (d < dim - 1)
|
||||
cout << ",";
|
||||
else
|
||||
cout << ")";
|
||||
}
|
||||
cout << " on Patch (";
|
||||
for (int d = 0; d < dim; ++d)
|
||||
{
|
||||
cout << patch->bbox[d] << "+" << patch->lli[d] * DH[d];
|
||||
if (d < dim - 1)
|
||||
cout << ",";
|
||||
else
|
||||
cout << ")--";
|
||||
}
|
||||
cout << "(";
|
||||
for (int d = 0; d < dim; ++d)
|
||||
{
|
||||
cout << patch->bbox[dim + d] << "-" << patch->uui[d] * DH[d];
|
||||
if (d < dim - 1)
|
||||
cout << ",";
|
||||
else
|
||||
cout << ")" << endl;
|
||||
}
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return plan;
|
||||
}
|
||||
|
||||
bool run_cuda_interp_for_block(Block *BP,
|
||||
const vector<InterpVarDesc> &vars,
|
||||
const vector<int> &point_ids,
|
||||
double **XX,
|
||||
const vector<double> &px,
|
||||
const vector<double> &py,
|
||||
const vector<double> &pz,
|
||||
double *Shellf,
|
||||
int num_var,
|
||||
int ordn,
|
||||
@@ -251,15 +404,6 @@ bool run_cuda_interp_for_block(Block *BP,
|
||||
}
|
||||
|
||||
const int npts = static_cast<int>(point_ids.size());
|
||||
vector<double> px(npts), py(npts), pz(npts);
|
||||
for (int p = 0; p < npts; ++p)
|
||||
{
|
||||
const int j = point_ids[p];
|
||||
px[p] = XX[0][j];
|
||||
py[p] = XX[1][j];
|
||||
pz[p] = XX[2][j];
|
||||
}
|
||||
|
||||
vector<double> out(static_cast<size_t>(npts) * static_cast<size_t>(num_var));
|
||||
if (bssn_cuda_interp_points_batch(BP->shape,
|
||||
BP->X[0], BP->X[1], BP->X[2],
|
||||
@@ -286,7 +430,9 @@ bool run_cuda_interp_for_block(Block *BP,
|
||||
void run_cpu_interp_for_block(Block *BP,
|
||||
const vector<InterpVarDesc> &vars,
|
||||
const vector<int> &point_ids,
|
||||
double **XX,
|
||||
const vector<double> &px,
|
||||
const vector<double> &py,
|
||||
const vector<double> &pz,
|
||||
double *Shellf,
|
||||
int num_var,
|
||||
int ordn,
|
||||
@@ -295,9 +441,9 @@ void run_cpu_interp_for_block(Block *BP,
|
||||
for (size_t p = 0; p < point_ids.size(); ++p)
|
||||
{
|
||||
const int j = point_ids[p];
|
||||
double x = XX[0][j];
|
||||
double y = XX[1][j];
|
||||
double z = XX[2][j];
|
||||
double x = px[p];
|
||||
double y = py[p];
|
||||
double z = pz[p];
|
||||
int ordn_local = ordn;
|
||||
int symmetry_local = Symmetry;
|
||||
for (int v = 0; v < num_var; ++v)
|
||||
@@ -310,33 +456,34 @@ void run_cpu_interp_for_block(Block *BP,
|
||||
}
|
||||
|
||||
void interpolate_owned_points(MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
double *Shellf, int Symmetry,
|
||||
int myrank, int ordn,
|
||||
int ordn,
|
||||
const BlockBinIndex &block_index,
|
||||
const int *owner_rank,
|
||||
const int *owner_block)
|
||||
const CachedInterpPlan &plan)
|
||||
{
|
||||
vector<InterpVarDesc> vars;
|
||||
collect_interp_vars(VarList, vars);
|
||||
const int num_var = static_cast<int>(vars.size());
|
||||
vector<vector<int>> block_points(block_index.views.size());
|
||||
|
||||
for (int j = 0; j < NN; ++j)
|
||||
for (size_t bi = 0; bi < plan.block_points.size(); ++bi)
|
||||
{
|
||||
if (owner_rank[j] == myrank && owner_block[j] >= 0)
|
||||
block_points[owner_block[j]].push_back(j);
|
||||
}
|
||||
|
||||
for (size_t bi = 0; bi < block_points.size(); ++bi)
|
||||
{
|
||||
if (block_points[bi].empty())
|
||||
if (plan.block_points[bi].empty())
|
||||
continue;
|
||||
|
||||
Block *BP = block_index.views[bi].bp;
|
||||
bool done = run_cuda_interp_for_block(BP, vars, block_points[bi], XX, Shellf, num_var, ordn, Symmetry);
|
||||
bool done = run_cuda_interp_for_block(BP, vars,
|
||||
plan.block_points[bi],
|
||||
plan.block_px[bi],
|
||||
plan.block_py[bi],
|
||||
plan.block_pz[bi],
|
||||
Shellf, num_var, ordn, Symmetry);
|
||||
if (!done)
|
||||
run_cpu_interp_for_block(BP, vars, block_points[bi], XX, Shellf, num_var, ordn, Symmetry);
|
||||
run_cpu_interp_for_block(BP, vars,
|
||||
plan.block_points[bi],
|
||||
plan.block_px[bi],
|
||||
plan.block_py[bi],
|
||||
plan.block_pz[bi],
|
||||
Shellf, num_var, ordn, Symmetry);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
@@ -684,55 +831,15 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
|
||||
memset(Shellf, 0, sizeof(double) * NN * num_var);
|
||||
|
||||
// owner_rank[j] records which MPI rank owns point j
|
||||
// All ranks traverse the same block list so they all agree on ownership
|
||||
int *owner_rank;
|
||||
owner_rank = new int[NN];
|
||||
int *owner_block;
|
||||
owner_block = new int[NN];
|
||||
for (int j = 0; j < NN; j++)
|
||||
{
|
||||
owner_rank[j] = -1;
|
||||
owner_block[j] = -1;
|
||||
}
|
||||
|
||||
double DH[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
DH[i] = getdX(i);
|
||||
BlockBinIndex block_index;
|
||||
build_block_bin_index(this, DH, block_index);
|
||||
|
||||
for (int j = 0; j < NN; j++) // run along points
|
||||
{
|
||||
double pox[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
pox[i] = XX[i][j];
|
||||
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
|
||||
{
|
||||
cout << "Patch::Interp_Points: point (";
|
||||
for (int k = 0; k < dim; k++)
|
||||
{
|
||||
cout << XX[k][j];
|
||||
if (k < dim - 1)
|
||||
cout << ",";
|
||||
else
|
||||
cout << ") is out of current Patch." << endl;
|
||||
}
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
|
||||
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
||||
if (block_i >= 0)
|
||||
{
|
||||
Block *BP = block_index.views[block_i].bp;
|
||||
owner_rank[j] = BP->rank;
|
||||
owner_block[j] = block_i;
|
||||
}
|
||||
}
|
||||
double DH[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
DH[i] = getdX(i);
|
||||
BlockBinIndex block_index;
|
||||
build_block_bin_index(this, DH, block_index);
|
||||
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, myrank == 0, false);
|
||||
const int *owner_rank = plan.owner_rank.data();
|
||||
|
||||
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
|
||||
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
|
||||
|
||||
// Replace MPI_Allreduce with per-owner MPI_Bcast:
|
||||
// Group consecutive points by owner rank and broadcast each group.
|
||||
@@ -788,8 +895,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
}
|
||||
}
|
||||
|
||||
delete[] owner_rank;
|
||||
delete[] owner_block;
|
||||
}
|
||||
void Patch::Interp_Points(MyList<var> *VarList,
|
||||
int NN, double **XX,
|
||||
@@ -818,98 +923,22 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
|
||||
memset(Shellf, 0, sizeof(double) * NN * num_var);
|
||||
|
||||
// owner_rank[j] records which MPI rank owns point j
|
||||
int *owner_rank;
|
||||
owner_rank = new int[NN];
|
||||
int *owner_block;
|
||||
owner_block = new int[NN];
|
||||
for (int j = 0; j < NN; j++)
|
||||
{
|
||||
owner_rank[j] = -1;
|
||||
owner_block[j] = -1;
|
||||
}
|
||||
|
||||
double DH[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
DH[i] = getdX(i);
|
||||
BlockBinIndex block_index;
|
||||
build_block_bin_index(this, DH, block_index);
|
||||
|
||||
// --- Interpolation phase (identical to original) ---
|
||||
for (int j = 0; j < NN; j++)
|
||||
{
|
||||
double pox[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
pox[i] = XX[i][j];
|
||||
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
|
||||
{
|
||||
cout << "Patch::Interp_Points: point (";
|
||||
for (int k = 0; k < dim; k++)
|
||||
{
|
||||
cout << XX[k][j];
|
||||
if (k < dim - 1)
|
||||
cout << ",";
|
||||
else
|
||||
cout << ") is out of current Patch." << endl;
|
||||
}
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
|
||||
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
||||
if (block_i >= 0)
|
||||
{
|
||||
Block *BP = block_index.views[block_i].bp;
|
||||
owner_rank[j] = BP->rank;
|
||||
owner_block[j] = block_i;
|
||||
}
|
||||
}
|
||||
double DH[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
DH[i] = getdX(i);
|
||||
BlockBinIndex block_index;
|
||||
build_block_bin_index(this, DH, block_index);
|
||||
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, myrank == 0, false);
|
||||
const int *owner_rank = plan.owner_rank.data();
|
||||
|
||||
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
|
||||
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
|
||||
|
||||
#ifdef INTERP_LB_PROFILE
|
||||
double t_interp_end = MPI_Wtime();
|
||||
double t_interp_local = t_interp_end - t_interp_start;
|
||||
#endif
|
||||
|
||||
// --- Error check for unfound points ---
|
||||
for (int j = 0; j < NN; j++)
|
||||
{
|
||||
if (owner_rank[j] < 0 && myrank == 0)
|
||||
{
|
||||
cout << "ERROR: Patch::Interp_Points fails to find point (";
|
||||
for (int d = 0; d < dim; d++)
|
||||
{
|
||||
cout << XX[d][j];
|
||||
if (d < dim - 1)
|
||||
cout << ",";
|
||||
else
|
||||
cout << ")";
|
||||
}
|
||||
cout << " on Patch (";
|
||||
for (int d = 0; d < dim; d++)
|
||||
{
|
||||
cout << bbox[d] << "+" << lli[d] * DH[d];
|
||||
if (d < dim - 1)
|
||||
cout << ",";
|
||||
else
|
||||
cout << ")--";
|
||||
}
|
||||
cout << "(";
|
||||
for (int d = 0; d < dim; d++)
|
||||
{
|
||||
cout << bbox[dim + d] << "-" << uui[d] * DH[d];
|
||||
if (d < dim - 1)
|
||||
cout << ",";
|
||||
else
|
||||
cout << ")" << endl;
|
||||
}
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Targeted point-to-point communication phase ---
|
||||
// --- Targeted point-to-point communication phase ---
|
||||
// Compute consumer_rank[j] using the same deterministic formula as surface_integral
|
||||
int *consumer_rank = new int[NN];
|
||||
{
|
||||
@@ -1028,8 +1057,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
delete[] send_count;
|
||||
delete[] recv_count;
|
||||
delete[] consumer_rank;
|
||||
delete[] owner_rank;
|
||||
delete[] owner_block;
|
||||
|
||||
#ifdef INTERP_LB_PROFILE
|
||||
{
|
||||
@@ -1077,59 +1104,20 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
|
||||
memset(Shellf, 0, sizeof(double) * NN * num_var);
|
||||
|
||||
// owner_rank[j] stores the global rank that owns point j
|
||||
int *owner_rank;
|
||||
owner_rank = new int[NN];
|
||||
int *owner_block;
|
||||
owner_block = new int[NN];
|
||||
for (int j = 0; j < NN; j++)
|
||||
{
|
||||
owner_rank[j] = -1;
|
||||
owner_block[j] = -1;
|
||||
}
|
||||
// Build global-to-local rank translation for Comm_here
|
||||
MPI_Group world_group, local_group;
|
||||
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
|
||||
MPI_Comm_group(Comm_here, &local_group);
|
||||
|
||||
// Build global-to-local rank translation for Comm_here
|
||||
MPI_Group world_group, local_group;
|
||||
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
|
||||
MPI_Comm_group(Comm_here, &local_group);
|
||||
|
||||
double DH[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
DH[i] = getdX(i);
|
||||
BlockBinIndex block_index;
|
||||
build_block_bin_index(this, DH, block_index);
|
||||
|
||||
for (int j = 0; j < NN; j++) // run along points
|
||||
{
|
||||
double pox[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
pox[i] = XX[i][j];
|
||||
if (lmyrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
|
||||
{
|
||||
cout << "Patch::Interp_Points: point (";
|
||||
for (int k = 0; k < dim; k++)
|
||||
{
|
||||
cout << XX[k][j];
|
||||
if (k < dim - 1)
|
||||
cout << ",";
|
||||
else
|
||||
cout << ") is out of current Patch." << endl;
|
||||
}
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
|
||||
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
||||
if (block_i >= 0)
|
||||
{
|
||||
Block *BP = block_index.views[block_i].bp;
|
||||
owner_rank[j] = BP->rank;
|
||||
owner_block[j] = block_i;
|
||||
}
|
||||
}
|
||||
double DH[dim];
|
||||
for (int i = 0; i < dim; i++)
|
||||
DH[i] = getdX(i);
|
||||
BlockBinIndex block_index;
|
||||
build_block_bin_index(this, DH, block_index);
|
||||
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, lmyrank == 0, true);
|
||||
const int *owner_rank = plan.owner_rank.data();
|
||||
|
||||
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
|
||||
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
|
||||
|
||||
// Collect unique global owner ranks and translate to local ranks in Comm_here
|
||||
// Then broadcast each owner's points via MPI_Bcast on Comm_here
|
||||
@@ -1159,8 +1147,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
||||
|
||||
MPI_Group_free(&world_group);
|
||||
MPI_Group_free(&local_group);
|
||||
delete[] owner_rank;
|
||||
delete[] owner_block;
|
||||
}
|
||||
void Patch::checkBlock()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user