Cache repeated interpolation plans
This commit is contained in:
@@ -7,6 +7,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <new>
|
#include <new>
|
||||||
|
#include <map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@@ -40,6 +41,44 @@ struct InterpVarDesc
|
|||||||
double soa[dim];
|
double soa[dim];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct InterpPlanKey
|
||||||
|
{
|
||||||
|
const Patch *patch;
|
||||||
|
const double *x;
|
||||||
|
const double *y;
|
||||||
|
const double *z;
|
||||||
|
int NN;
|
||||||
|
int Symmetry;
|
||||||
|
int myrank;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct InterpPlanKeyLess
|
||||||
|
{
|
||||||
|
bool operator()(const InterpPlanKey &lhs, const InterpPlanKey &rhs) const
|
||||||
|
{
|
||||||
|
if (lhs.patch != rhs.patch) return lhs.patch < rhs.patch;
|
||||||
|
if (lhs.x != rhs.x) return lhs.x < rhs.x;
|
||||||
|
if (lhs.y != rhs.y) return lhs.y < rhs.y;
|
||||||
|
if (lhs.z != rhs.z) return lhs.z < rhs.z;
|
||||||
|
if (lhs.NN != rhs.NN) return lhs.NN < rhs.NN;
|
||||||
|
if (lhs.Symmetry != rhs.Symmetry) return lhs.Symmetry < rhs.Symmetry;
|
||||||
|
return lhs.myrank < rhs.myrank;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CachedInterpPlan
|
||||||
|
{
|
||||||
|
int nblocks;
|
||||||
|
vector<int> owner_rank;
|
||||||
|
vector<int> owner_block;
|
||||||
|
vector<vector<int> > block_points;
|
||||||
|
vector<vector<double> > block_px;
|
||||||
|
vector<vector<double> > block_py;
|
||||||
|
vector<vector<double> > block_pz;
|
||||||
|
|
||||||
|
CachedInterpPlan() : nblocks(0) {}
|
||||||
|
};
|
||||||
|
|
||||||
struct InterpBlockView
|
struct InterpBlockView
|
||||||
{
|
{
|
||||||
Block *bp;
|
Block *bp;
|
||||||
@@ -229,10 +268,124 @@ bool should_try_cuda_interp(int ordn, int num_points, int num_var)
|
|||||||
return num_points * num_var >= 256;
|
return num_points * num_var >= 256;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CachedInterpPlan &get_cached_interp_plan(Patch *patch,
|
||||||
|
int NN, double **XX,
|
||||||
|
int Symmetry, int myrank,
|
||||||
|
const double *DH,
|
||||||
|
const BlockBinIndex &block_index,
|
||||||
|
bool report_bounds_here,
|
||||||
|
bool allow_missing_points)
|
||||||
|
{
|
||||||
|
static map<InterpPlanKey, CachedInterpPlan, InterpPlanKeyLess> cache;
|
||||||
|
|
||||||
|
InterpPlanKey key;
|
||||||
|
key.patch = patch;
|
||||||
|
key.x = XX[0];
|
||||||
|
key.y = XX[1];
|
||||||
|
key.z = XX[2];
|
||||||
|
key.NN = NN;
|
||||||
|
key.Symmetry = Symmetry;
|
||||||
|
key.myrank = myrank;
|
||||||
|
|
||||||
|
map<InterpPlanKey, CachedInterpPlan, InterpPlanKeyLess>::iterator it = cache.find(key);
|
||||||
|
if (it != cache.end() && it->second.nblocks == static_cast<int>(block_index.views.size()))
|
||||||
|
return it->second;
|
||||||
|
|
||||||
|
CachedInterpPlan &plan = cache[key];
|
||||||
|
plan = CachedInterpPlan();
|
||||||
|
plan.nblocks = static_cast<int>(block_index.views.size());
|
||||||
|
plan.owner_rank.assign(NN, -1);
|
||||||
|
plan.owner_block.assign(NN, -1);
|
||||||
|
plan.block_points.resize(plan.nblocks);
|
||||||
|
plan.block_px.resize(plan.nblocks);
|
||||||
|
plan.block_py.resize(plan.nblocks);
|
||||||
|
plan.block_pz.resize(plan.nblocks);
|
||||||
|
|
||||||
|
for (int j = 0; j < NN; ++j)
|
||||||
|
{
|
||||||
|
double pox[dim];
|
||||||
|
for (int i = 0; i < dim; ++i)
|
||||||
|
{
|
||||||
|
pox[i] = XX[i][j];
|
||||||
|
if (report_bounds_here &&
|
||||||
|
(XX[i][j] < patch->bbox[i] + patch->lli[i] * DH[i] ||
|
||||||
|
XX[i][j] > patch->bbox[dim + i] - patch->uui[i] * DH[i]))
|
||||||
|
{
|
||||||
|
cout << "Patch::Interp_Points: point (";
|
||||||
|
for (int k = 0; k < dim; ++k)
|
||||||
|
{
|
||||||
|
cout << XX[k][j];
|
||||||
|
if (k < dim - 1)
|
||||||
|
cout << ",";
|
||||||
|
else
|
||||||
|
cout << ") is out of current Patch." << endl;
|
||||||
|
}
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
||||||
|
if (block_i >= 0)
|
||||||
|
{
|
||||||
|
Block *BP = block_index.views[block_i].bp;
|
||||||
|
plan.owner_rank[j] = BP->rank;
|
||||||
|
plan.owner_block[j] = block_i;
|
||||||
|
if (BP->rank == myrank)
|
||||||
|
{
|
||||||
|
plan.block_points[block_i].push_back(j);
|
||||||
|
plan.block_px[block_i].push_back(XX[0][j]);
|
||||||
|
plan.block_py[block_i].push_back(XX[1][j]);
|
||||||
|
plan.block_pz[block_i].push_back(XX[2][j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!allow_missing_points && report_bounds_here)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < NN; ++j)
|
||||||
|
{
|
||||||
|
if (plan.owner_rank[j] >= 0)
|
||||||
|
continue;
|
||||||
|
cout << "ERROR: Patch::Interp_Points fails to find point (";
|
||||||
|
for (int d = 0; d < dim; ++d)
|
||||||
|
{
|
||||||
|
cout << XX[d][j];
|
||||||
|
if (d < dim - 1)
|
||||||
|
cout << ",";
|
||||||
|
else
|
||||||
|
cout << ")";
|
||||||
|
}
|
||||||
|
cout << " on Patch (";
|
||||||
|
for (int d = 0; d < dim; ++d)
|
||||||
|
{
|
||||||
|
cout << patch->bbox[d] << "+" << patch->lli[d] * DH[d];
|
||||||
|
if (d < dim - 1)
|
||||||
|
cout << ",";
|
||||||
|
else
|
||||||
|
cout << ")--";
|
||||||
|
}
|
||||||
|
cout << "(";
|
||||||
|
for (int d = 0; d < dim; ++d)
|
||||||
|
{
|
||||||
|
cout << patch->bbox[dim + d] << "-" << patch->uui[d] * DH[d];
|
||||||
|
if (d < dim - 1)
|
||||||
|
cout << ",";
|
||||||
|
else
|
||||||
|
cout << ")" << endl;
|
||||||
|
}
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return plan;
|
||||||
|
}
|
||||||
|
|
||||||
bool run_cuda_interp_for_block(Block *BP,
|
bool run_cuda_interp_for_block(Block *BP,
|
||||||
const vector<InterpVarDesc> &vars,
|
const vector<InterpVarDesc> &vars,
|
||||||
const vector<int> &point_ids,
|
const vector<int> &point_ids,
|
||||||
double **XX,
|
const vector<double> &px,
|
||||||
|
const vector<double> &py,
|
||||||
|
const vector<double> &pz,
|
||||||
double *Shellf,
|
double *Shellf,
|
||||||
int num_var,
|
int num_var,
|
||||||
int ordn,
|
int ordn,
|
||||||
@@ -251,15 +404,6 @@ bool run_cuda_interp_for_block(Block *BP,
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int npts = static_cast<int>(point_ids.size());
|
const int npts = static_cast<int>(point_ids.size());
|
||||||
vector<double> px(npts), py(npts), pz(npts);
|
|
||||||
for (int p = 0; p < npts; ++p)
|
|
||||||
{
|
|
||||||
const int j = point_ids[p];
|
|
||||||
px[p] = XX[0][j];
|
|
||||||
py[p] = XX[1][j];
|
|
||||||
pz[p] = XX[2][j];
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<double> out(static_cast<size_t>(npts) * static_cast<size_t>(num_var));
|
vector<double> out(static_cast<size_t>(npts) * static_cast<size_t>(num_var));
|
||||||
if (bssn_cuda_interp_points_batch(BP->shape,
|
if (bssn_cuda_interp_points_batch(BP->shape,
|
||||||
BP->X[0], BP->X[1], BP->X[2],
|
BP->X[0], BP->X[1], BP->X[2],
|
||||||
@@ -286,7 +430,9 @@ bool run_cuda_interp_for_block(Block *BP,
|
|||||||
void run_cpu_interp_for_block(Block *BP,
|
void run_cpu_interp_for_block(Block *BP,
|
||||||
const vector<InterpVarDesc> &vars,
|
const vector<InterpVarDesc> &vars,
|
||||||
const vector<int> &point_ids,
|
const vector<int> &point_ids,
|
||||||
double **XX,
|
const vector<double> &px,
|
||||||
|
const vector<double> &py,
|
||||||
|
const vector<double> &pz,
|
||||||
double *Shellf,
|
double *Shellf,
|
||||||
int num_var,
|
int num_var,
|
||||||
int ordn,
|
int ordn,
|
||||||
@@ -295,9 +441,9 @@ void run_cpu_interp_for_block(Block *BP,
|
|||||||
for (size_t p = 0; p < point_ids.size(); ++p)
|
for (size_t p = 0; p < point_ids.size(); ++p)
|
||||||
{
|
{
|
||||||
const int j = point_ids[p];
|
const int j = point_ids[p];
|
||||||
double x = XX[0][j];
|
double x = px[p];
|
||||||
double y = XX[1][j];
|
double y = py[p];
|
||||||
double z = XX[2][j];
|
double z = pz[p];
|
||||||
int ordn_local = ordn;
|
int ordn_local = ordn;
|
||||||
int symmetry_local = Symmetry;
|
int symmetry_local = Symmetry;
|
||||||
for (int v = 0; v < num_var; ++v)
|
for (int v = 0; v < num_var; ++v)
|
||||||
@@ -310,33 +456,34 @@ void run_cpu_interp_for_block(Block *BP,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void interpolate_owned_points(MyList<var> *VarList,
|
void interpolate_owned_points(MyList<var> *VarList,
|
||||||
int NN, double **XX,
|
|
||||||
double *Shellf, int Symmetry,
|
double *Shellf, int Symmetry,
|
||||||
int myrank, int ordn,
|
int ordn,
|
||||||
const BlockBinIndex &block_index,
|
const BlockBinIndex &block_index,
|
||||||
const int *owner_rank,
|
const CachedInterpPlan &plan)
|
||||||
const int *owner_block)
|
|
||||||
{
|
{
|
||||||
vector<InterpVarDesc> vars;
|
vector<InterpVarDesc> vars;
|
||||||
collect_interp_vars(VarList, vars);
|
collect_interp_vars(VarList, vars);
|
||||||
const int num_var = static_cast<int>(vars.size());
|
const int num_var = static_cast<int>(vars.size());
|
||||||
vector<vector<int>> block_points(block_index.views.size());
|
|
||||||
|
|
||||||
for (int j = 0; j < NN; ++j)
|
for (size_t bi = 0; bi < plan.block_points.size(); ++bi)
|
||||||
{
|
{
|
||||||
if (owner_rank[j] == myrank && owner_block[j] >= 0)
|
if (plan.block_points[bi].empty())
|
||||||
block_points[owner_block[j]].push_back(j);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t bi = 0; bi < block_points.size(); ++bi)
|
|
||||||
{
|
|
||||||
if (block_points[bi].empty())
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
Block *BP = block_index.views[bi].bp;
|
Block *BP = block_index.views[bi].bp;
|
||||||
bool done = run_cuda_interp_for_block(BP, vars, block_points[bi], XX, Shellf, num_var, ordn, Symmetry);
|
bool done = run_cuda_interp_for_block(BP, vars,
|
||||||
|
plan.block_points[bi],
|
||||||
|
plan.block_px[bi],
|
||||||
|
plan.block_py[bi],
|
||||||
|
plan.block_pz[bi],
|
||||||
|
Shellf, num_var, ordn, Symmetry);
|
||||||
if (!done)
|
if (!done)
|
||||||
run_cpu_interp_for_block(BP, vars, block_points[bi], XX, Shellf, num_var, ordn, Symmetry);
|
run_cpu_interp_for_block(BP, vars,
|
||||||
|
plan.block_points[bi],
|
||||||
|
plan.block_px[bi],
|
||||||
|
plan.block_py[bi],
|
||||||
|
plan.block_pz[bi],
|
||||||
|
Shellf, num_var, ordn, Symmetry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
@@ -684,55 +831,15 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
|
|
||||||
memset(Shellf, 0, sizeof(double) * NN * num_var);
|
memset(Shellf, 0, sizeof(double) * NN * num_var);
|
||||||
|
|
||||||
// owner_rank[j] records which MPI rank owns point j
|
|
||||||
// All ranks traverse the same block list so they all agree on ownership
|
|
||||||
int *owner_rank;
|
|
||||||
owner_rank = new int[NN];
|
|
||||||
int *owner_block;
|
|
||||||
owner_block = new int[NN];
|
|
||||||
for (int j = 0; j < NN; j++)
|
|
||||||
{
|
|
||||||
owner_rank[j] = -1;
|
|
||||||
owner_block[j] = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
double DH[dim];
|
double DH[dim];
|
||||||
for (int i = 0; i < dim; i++)
|
for (int i = 0; i < dim; i++)
|
||||||
DH[i] = getdX(i);
|
DH[i] = getdX(i);
|
||||||
BlockBinIndex block_index;
|
BlockBinIndex block_index;
|
||||||
build_block_bin_index(this, DH, block_index);
|
build_block_bin_index(this, DH, block_index);
|
||||||
|
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, myrank == 0, false);
|
||||||
|
const int *owner_rank = plan.owner_rank.data();
|
||||||
|
|
||||||
for (int j = 0; j < NN; j++) // run along points
|
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
|
||||||
{
|
|
||||||
double pox[dim];
|
|
||||||
for (int i = 0; i < dim; i++)
|
|
||||||
{
|
|
||||||
pox[i] = XX[i][j];
|
|
||||||
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
|
|
||||||
{
|
|
||||||
cout << "Patch::Interp_Points: point (";
|
|
||||||
for (int k = 0; k < dim; k++)
|
|
||||||
{
|
|
||||||
cout << XX[k][j];
|
|
||||||
if (k < dim - 1)
|
|
||||||
cout << ",";
|
|
||||||
else
|
|
||||||
cout << ") is out of current Patch." << endl;
|
|
||||||
}
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
|
||||||
if (block_i >= 0)
|
|
||||||
{
|
|
||||||
Block *BP = block_index.views[block_i].bp;
|
|
||||||
owner_rank[j] = BP->rank;
|
|
||||||
owner_block[j] = block_i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
|
|
||||||
|
|
||||||
// Replace MPI_Allreduce with per-owner MPI_Bcast:
|
// Replace MPI_Allreduce with per-owner MPI_Bcast:
|
||||||
// Group consecutive points by owner rank and broadcast each group.
|
// Group consecutive points by owner rank and broadcast each group.
|
||||||
@@ -788,8 +895,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
delete[] owner_rank;
|
|
||||||
delete[] owner_block;
|
|
||||||
}
|
}
|
||||||
void Patch::Interp_Points(MyList<var> *VarList,
|
void Patch::Interp_Points(MyList<var> *VarList,
|
||||||
int NN, double **XX,
|
int NN, double **XX,
|
||||||
@@ -818,97 +923,21 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
|
|
||||||
memset(Shellf, 0, sizeof(double) * NN * num_var);
|
memset(Shellf, 0, sizeof(double) * NN * num_var);
|
||||||
|
|
||||||
// owner_rank[j] records which MPI rank owns point j
|
|
||||||
int *owner_rank;
|
|
||||||
owner_rank = new int[NN];
|
|
||||||
int *owner_block;
|
|
||||||
owner_block = new int[NN];
|
|
||||||
for (int j = 0; j < NN; j++)
|
|
||||||
{
|
|
||||||
owner_rank[j] = -1;
|
|
||||||
owner_block[j] = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
double DH[dim];
|
double DH[dim];
|
||||||
for (int i = 0; i < dim; i++)
|
for (int i = 0; i < dim; i++)
|
||||||
DH[i] = getdX(i);
|
DH[i] = getdX(i);
|
||||||
BlockBinIndex block_index;
|
BlockBinIndex block_index;
|
||||||
build_block_bin_index(this, DH, block_index);
|
build_block_bin_index(this, DH, block_index);
|
||||||
|
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, myrank == 0, false);
|
||||||
|
const int *owner_rank = plan.owner_rank.data();
|
||||||
|
|
||||||
// --- Interpolation phase (identical to original) ---
|
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
|
||||||
for (int j = 0; j < NN; j++)
|
|
||||||
{
|
|
||||||
double pox[dim];
|
|
||||||
for (int i = 0; i < dim; i++)
|
|
||||||
{
|
|
||||||
pox[i] = XX[i][j];
|
|
||||||
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
|
|
||||||
{
|
|
||||||
cout << "Patch::Interp_Points: point (";
|
|
||||||
for (int k = 0; k < dim; k++)
|
|
||||||
{
|
|
||||||
cout << XX[k][j];
|
|
||||||
if (k < dim - 1)
|
|
||||||
cout << ",";
|
|
||||||
else
|
|
||||||
cout << ") is out of current Patch." << endl;
|
|
||||||
}
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
|
||||||
if (block_i >= 0)
|
|
||||||
{
|
|
||||||
Block *BP = block_index.views[block_i].bp;
|
|
||||||
owner_rank[j] = BP->rank;
|
|
||||||
owner_block[j] = block_i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
|
|
||||||
|
|
||||||
#ifdef INTERP_LB_PROFILE
|
#ifdef INTERP_LB_PROFILE
|
||||||
double t_interp_end = MPI_Wtime();
|
double t_interp_end = MPI_Wtime();
|
||||||
double t_interp_local = t_interp_end - t_interp_start;
|
double t_interp_local = t_interp_end - t_interp_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// --- Error check for unfound points ---
|
|
||||||
for (int j = 0; j < NN; j++)
|
|
||||||
{
|
|
||||||
if (owner_rank[j] < 0 && myrank == 0)
|
|
||||||
{
|
|
||||||
cout << "ERROR: Patch::Interp_Points fails to find point (";
|
|
||||||
for (int d = 0; d < dim; d++)
|
|
||||||
{
|
|
||||||
cout << XX[d][j];
|
|
||||||
if (d < dim - 1)
|
|
||||||
cout << ",";
|
|
||||||
else
|
|
||||||
cout << ")";
|
|
||||||
}
|
|
||||||
cout << " on Patch (";
|
|
||||||
for (int d = 0; d < dim; d++)
|
|
||||||
{
|
|
||||||
cout << bbox[d] << "+" << lli[d] * DH[d];
|
|
||||||
if (d < dim - 1)
|
|
||||||
cout << ",";
|
|
||||||
else
|
|
||||||
cout << ")--";
|
|
||||||
}
|
|
||||||
cout << "(";
|
|
||||||
for (int d = 0; d < dim; d++)
|
|
||||||
{
|
|
||||||
cout << bbox[dim + d] << "-" << uui[d] * DH[d];
|
|
||||||
if (d < dim - 1)
|
|
||||||
cout << ",";
|
|
||||||
else
|
|
||||||
cout << ")" << endl;
|
|
||||||
}
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Targeted point-to-point communication phase ---
|
// --- Targeted point-to-point communication phase ---
|
||||||
// Compute consumer_rank[j] using the same deterministic formula as surface_integral
|
// Compute consumer_rank[j] using the same deterministic formula as surface_integral
|
||||||
int *consumer_rank = new int[NN];
|
int *consumer_rank = new int[NN];
|
||||||
@@ -1028,8 +1057,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
delete[] send_count;
|
delete[] send_count;
|
||||||
delete[] recv_count;
|
delete[] recv_count;
|
||||||
delete[] consumer_rank;
|
delete[] consumer_rank;
|
||||||
delete[] owner_rank;
|
|
||||||
delete[] owner_block;
|
|
||||||
|
|
||||||
#ifdef INTERP_LB_PROFILE
|
#ifdef INTERP_LB_PROFILE
|
||||||
{
|
{
|
||||||
@@ -1077,17 +1104,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
|
|
||||||
memset(Shellf, 0, sizeof(double) * NN * num_var);
|
memset(Shellf, 0, sizeof(double) * NN * num_var);
|
||||||
|
|
||||||
// owner_rank[j] stores the global rank that owns point j
|
|
||||||
int *owner_rank;
|
|
||||||
owner_rank = new int[NN];
|
|
||||||
int *owner_block;
|
|
||||||
owner_block = new int[NN];
|
|
||||||
for (int j = 0; j < NN; j++)
|
|
||||||
{
|
|
||||||
owner_rank[j] = -1;
|
|
||||||
owner_block[j] = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build global-to-local rank translation for Comm_here
|
// Build global-to-local rank translation for Comm_here
|
||||||
MPI_Group world_group, local_group;
|
MPI_Group world_group, local_group;
|
||||||
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
|
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
|
||||||
@@ -1098,38 +1114,10 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
DH[i] = getdX(i);
|
DH[i] = getdX(i);
|
||||||
BlockBinIndex block_index;
|
BlockBinIndex block_index;
|
||||||
build_block_bin_index(this, DH, block_index);
|
build_block_bin_index(this, DH, block_index);
|
||||||
|
CachedInterpPlan &plan = get_cached_interp_plan(this, NN, XX, Symmetry, myrank, DH, block_index, lmyrank == 0, true);
|
||||||
|
const int *owner_rank = plan.owner_rank.data();
|
||||||
|
|
||||||
for (int j = 0; j < NN; j++) // run along points
|
interpolate_owned_points(VarList, Shellf, Symmetry, ordn, block_index, plan);
|
||||||
{
|
|
||||||
double pox[dim];
|
|
||||||
for (int i = 0; i < dim; i++)
|
|
||||||
{
|
|
||||||
pox[i] = XX[i][j];
|
|
||||||
if (lmyrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
|
|
||||||
{
|
|
||||||
cout << "Patch::Interp_Points: point (";
|
|
||||||
for (int k = 0; k < dim; k++)
|
|
||||||
{
|
|
||||||
cout << XX[k][j];
|
|
||||||
if (k < dim - 1)
|
|
||||||
cout << ",";
|
|
||||||
else
|
|
||||||
cout << ") is out of current Patch." << endl;
|
|
||||||
}
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
|
||||||
if (block_i >= 0)
|
|
||||||
{
|
|
||||||
Block *BP = block_index.views[block_i].bp;
|
|
||||||
owner_rank[j] = BP->rank;
|
|
||||||
owner_block[j] = block_i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
interpolate_owned_points(VarList, NN, XX, Shellf, Symmetry, myrank, ordn, block_index, owner_rank, owner_block);
|
|
||||||
|
|
||||||
// Collect unique global owner ranks and translate to local ranks in Comm_here
|
// Collect unique global owner ranks and translate to local ranks in Comm_here
|
||||||
// Then broadcast each owner's points via MPI_Bcast on Comm_here
|
// Then broadcast each owner's points via MPI_Bcast on Comm_here
|
||||||
@@ -1159,8 +1147,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
|
|
||||||
MPI_Group_free(&world_group);
|
MPI_Group_free(&world_group);
|
||||||
MPI_Group_free(&local_group);
|
MPI_Group_free(&local_group);
|
||||||
delete[] owner_rank;
|
|
||||||
delete[] owner_block;
|
|
||||||
}
|
}
|
||||||
void Patch::checkBlock()
|
void Patch::checkBlock()
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -953,14 +953,36 @@ int bssn_cuda_interp_points_batch(const int *ex,
|
|||||||
|
|
||||||
struct InterpBatchCache
|
struct InterpBatchCache
|
||||||
{
|
{
|
||||||
|
struct StencilCacheEntry
|
||||||
|
{
|
||||||
|
const double *X;
|
||||||
|
const double *Y;
|
||||||
|
const double *Z;
|
||||||
|
const double *px;
|
||||||
|
const double *py;
|
||||||
|
const double *pz;
|
||||||
|
int nx;
|
||||||
|
int ny;
|
||||||
|
int nz;
|
||||||
|
int num_points;
|
||||||
|
int ordn;
|
||||||
|
int symmetry;
|
||||||
|
CachedBuffer weights;
|
||||||
|
CachedIntBuffer indices;
|
||||||
|
CachedIntBuffer reflect;
|
||||||
|
|
||||||
|
StencilCacheEntry()
|
||||||
|
: X(nullptr), Y(nullptr), Z(nullptr),
|
||||||
|
px(nullptr), py(nullptr), pz(nullptr),
|
||||||
|
nx(0), ny(0), nz(0), num_points(0), ordn(0), symmetry(0) {}
|
||||||
|
};
|
||||||
|
|
||||||
CachedBuffer out;
|
CachedBuffer out;
|
||||||
CachedBuffer soa;
|
CachedBuffer soa;
|
||||||
CachedBuffer field_ptrs;
|
CachedBuffer field_ptrs;
|
||||||
CachedBuffer weights;
|
|
||||||
CachedIntBuffer indices;
|
|
||||||
CachedIntBuffer reflect;
|
|
||||||
CachedIntBuffer error_flag;
|
CachedIntBuffer error_flag;
|
||||||
std::vector<CachedBuffer> host_field_copies;
|
std::vector<CachedBuffer> host_field_copies;
|
||||||
|
std::vector<StencilCacheEntry> stencil_entries;
|
||||||
};
|
};
|
||||||
static thread_local InterpBatchCache cache;
|
static thread_local InterpBatchCache cache;
|
||||||
|
|
||||||
@@ -978,45 +1000,82 @@ int bssn_cuda_interp_points_batch(const int *ex,
|
|||||||
const size_t indices_bytes = point_stencil_ints * sizeof(int);
|
const size_t indices_bytes = point_stencil_ints * sizeof(int);
|
||||||
|
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
std::vector<double> host_weights(point_stencil_doubles);
|
InterpBatchCache::StencilCacheEntry *stencil_cache = nullptr;
|
||||||
std::vector<int> host_indices(point_stencil_ints);
|
for (size_t i = 0; i < cache.stencil_entries.size(); ++i)
|
||||||
std::vector<int> host_reflect(point_stencil_ints);
|
|
||||||
const double dx = X[1] - X[0];
|
|
||||||
const double dy = Y[1] - Y[0];
|
|
||||||
const double dz = Z[1] - Z[0];
|
|
||||||
const int allow_reflect_x = (symmetry == 2 && std::fabs(X[0]) < dx);
|
|
||||||
const int allow_reflect_y = (symmetry == 2 && std::fabs(Y[0]) < dy);
|
|
||||||
const int allow_reflect_z = (symmetry != 0 && std::fabs(Z[0]) < dz);
|
|
||||||
for (int p = 0; p < num_points; ++p)
|
|
||||||
{
|
{
|
||||||
int start_x = 0, start_y = 0, start_z = 0;
|
InterpBatchCache::StencilCacheEntry &entry = cache.stencil_entries[i];
|
||||||
double cx = 0.0, cy = 0.0, cz = 0.0;
|
if (entry.X == X && entry.Y == Y && entry.Z == Z &&
|
||||||
const bool ok_x = compute_interp_window_host(px[p], X, nx, ordn, allow_reflect_x, &start_x, &cx);
|
entry.px == px && entry.py == py && entry.pz == pz &&
|
||||||
const bool ok_y = compute_interp_window_host(py[p], Y, ny, ordn, allow_reflect_y, &start_y, &cy);
|
entry.nx == nx && entry.ny == ny && entry.nz == nz &&
|
||||||
const bool ok_z = compute_interp_window_host(pz[p], Z, nz, ordn, allow_reflect_z, &start_z, &cz);
|
entry.num_points == num_points && entry.ordn == ordn &&
|
||||||
if (!ok_x || !ok_y || !ok_z)
|
entry.symmetry == symmetry)
|
||||||
return 1;
|
|
||||||
|
|
||||||
lagrange_weights_ord6_host(cx, host_weights.data() + p * 18);
|
|
||||||
lagrange_weights_ord6_host(cy, host_weights.data() + p * 18 + 6);
|
|
||||||
lagrange_weights_ord6_host(cz, host_weights.data() + p * 18 + 12);
|
|
||||||
|
|
||||||
for (int i = 0; i < 6; ++i)
|
|
||||||
{
|
{
|
||||||
if (!map_interp_index_host(start_x + i, nx, &host_indices[p * 18 + i], &host_reflect[p * 18 + i]))
|
stencil_cache = &entry;
|
||||||
return 1;
|
break;
|
||||||
if (!map_interp_index_host(start_y + i, ny, &host_indices[p * 18 + 6 + i], &host_reflect[p * 18 + 6 + i]))
|
|
||||||
return 1;
|
|
||||||
if (!map_interp_index_host(start_z + i, nz, &host_indices[p * 18 + 12 + i], &host_reflect[p * 18 + 12 + i]))
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!stencil_cache)
|
||||||
|
{
|
||||||
|
cache.stencil_entries.push_back(InterpBatchCache::StencilCacheEntry());
|
||||||
|
stencil_cache = &cache.stencil_entries.back();
|
||||||
|
stencil_cache->X = X;
|
||||||
|
stencil_cache->Y = Y;
|
||||||
|
stencil_cache->Z = Z;
|
||||||
|
stencil_cache->px = px;
|
||||||
|
stencil_cache->py = py;
|
||||||
|
stencil_cache->pz = pz;
|
||||||
|
stencil_cache->nx = nx;
|
||||||
|
stencil_cache->ny = ny;
|
||||||
|
stencil_cache->nz = nz;
|
||||||
|
stencil_cache->num_points = num_points;
|
||||||
|
stencil_cache->ordn = ordn;
|
||||||
|
stencil_cache->symmetry = symmetry;
|
||||||
|
|
||||||
|
std::vector<double> host_weights(point_stencil_doubles);
|
||||||
|
std::vector<int> host_indices(point_stencil_ints);
|
||||||
|
std::vector<int> host_reflect(point_stencil_ints);
|
||||||
|
const double dx = X[1] - X[0];
|
||||||
|
const double dy = Y[1] - Y[0];
|
||||||
|
const double dz = Z[1] - Z[0];
|
||||||
|
const int allow_reflect_x = (symmetry == 2 && std::fabs(X[0]) < dx);
|
||||||
|
const int allow_reflect_y = (symmetry == 2 && std::fabs(Y[0]) < dy);
|
||||||
|
const int allow_reflect_z = (symmetry != 0 && std::fabs(Z[0]) < dz);
|
||||||
|
for (int p = 0; p < num_points; ++p)
|
||||||
|
{
|
||||||
|
int start_x = 0, start_y = 0, start_z = 0;
|
||||||
|
double cx = 0.0, cy = 0.0, cz = 0.0;
|
||||||
|
const bool ok_x = compute_interp_window_host(px[p], X, nx, ordn, allow_reflect_x, &start_x, &cx);
|
||||||
|
const bool ok_y = compute_interp_window_host(py[p], Y, ny, ordn, allow_reflect_y, &start_y, &cy);
|
||||||
|
const bool ok_z = compute_interp_window_host(pz[p], Z, nz, ordn, allow_reflect_z, &start_z, &cz);
|
||||||
|
if (!ok_x || !ok_y || !ok_z)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
lagrange_weights_ord6_host(cx, host_weights.data() + p * 18);
|
||||||
|
lagrange_weights_ord6_host(cy, host_weights.data() + p * 18 + 6);
|
||||||
|
lagrange_weights_ord6_host(cz, host_weights.data() + p * 18 + 12);
|
||||||
|
|
||||||
|
for (int i = 0; i < 6; ++i)
|
||||||
|
{
|
||||||
|
if (!map_interp_index_host(start_x + i, nx, &host_indices[p * 18 + i], &host_reflect[p * 18 + i]))
|
||||||
|
return 1;
|
||||||
|
if (!map_interp_index_host(start_y + i, ny, &host_indices[p * 18 + 6 + i], &host_reflect[p * 18 + 6 + i]))
|
||||||
|
return 1;
|
||||||
|
if (!map_interp_index_host(start_z + i, nz, &host_indices[p * 18 + 12 + i], &host_reflect[p * 18 + 12 + i]))
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ok = ok &&
|
||||||
|
copy_to_device(stencil_cache->weights, host_weights.data(), weights_bytes) &&
|
||||||
|
copy_to_device(stencil_cache->indices, host_indices.data(), indices_bytes) &&
|
||||||
|
copy_to_device(stencil_cache->reflect, host_reflect.data(), indices_bytes);
|
||||||
|
if (!ok)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
ok = ok &&
|
ok = ok &&
|
||||||
copy_to_device(cache.soa, soa_flat, soa_bytes) &&
|
copy_to_device(cache.soa, soa_flat, soa_bytes) &&
|
||||||
copy_to_device(cache.weights, host_weights.data(), weights_bytes) &&
|
|
||||||
copy_to_device(cache.indices, host_indices.data(), indices_bytes) &&
|
|
||||||
copy_to_device(cache.reflect, host_reflect.data(), indices_bytes) &&
|
|
||||||
ensure_capacity(cache.out, out_bytes) &&
|
ensure_capacity(cache.out, out_bytes) &&
|
||||||
ensure_capacity(cache.field_ptrs, ptr_bytes) &&
|
ensure_capacity(cache.field_ptrs, ptr_bytes) &&
|
||||||
ensure_capacity(cache.error_flag, sizeof(int));
|
ensure_capacity(cache.error_flag, sizeof(int));
|
||||||
@@ -1063,9 +1122,9 @@ int bssn_cuda_interp_points_batch(const int *ex,
|
|||||||
int ny_local = ny;
|
int ny_local = ny;
|
||||||
const double *dsoa = cache.soa.ptr;
|
const double *dsoa = cache.soa.ptr;
|
||||||
const double *const *dfields = reinterpret_cast<const double *const *>(cache.field_ptrs.ptr);
|
const double *const *dfields = reinterpret_cast<const double *const *>(cache.field_ptrs.ptr);
|
||||||
const double *dweights = cache.weights.ptr;
|
const double *dweights = stencil_cache->weights.ptr;
|
||||||
const int *dindices = cache.indices.ptr;
|
const int *dindices = stencil_cache->indices.ptr;
|
||||||
const int *dreflect = cache.reflect.ptr;
|
const int *dreflect = stencil_cache->reflect.ptr;
|
||||||
double *dout = cache.out.ptr;
|
double *dout = cache.out.ptr;
|
||||||
int *derror = cache.error_flag.ptr;
|
int *derror = cache.error_flag.ptr;
|
||||||
|
|
||||||
|
|||||||
@@ -182,6 +182,7 @@ surface_integral::surface_integral(int iSymmetry) : Symmetry(iSymmetry)
|
|||||||
//|============================================================================
|
//|============================================================================
|
||||||
surface_integral::~surface_integral()
|
surface_integral::~surface_integral()
|
||||||
{
|
{
|
||||||
|
release_cached_buffers();
|
||||||
delete[] nx_g;
|
delete[] nx_g;
|
||||||
delete[] ny_g;
|
delete[] ny_g;
|
||||||
delete[] nz_g;
|
delete[] nz_g;
|
||||||
@@ -190,6 +191,50 @@ surface_integral::~surface_integral()
|
|||||||
delete[] wtcostheta;
|
delete[] wtcostheta;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void surface_integral::get_surface_points(double rex, double **pox)
|
||||||
|
{
|
||||||
|
SpherePointCache &cache = sphere_point_cache[rex];
|
||||||
|
if (!cache.pox[0])
|
||||||
|
{
|
||||||
|
for (int i = 0; i < 3; ++i)
|
||||||
|
cache.pox[i] = new double[n_tot];
|
||||||
|
for (int n = 0; n < n_tot; ++n)
|
||||||
|
{
|
||||||
|
cache.pox[0][n] = rex * nx_g[n];
|
||||||
|
cache.pox[1][n] = rex * ny_g[n];
|
||||||
|
cache.pox[2][n] = rex * nz_g[n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pox[0] = cache.pox[0];
|
||||||
|
pox[1] = cache.pox[1];
|
||||||
|
pox[2] = cache.pox[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
double *surface_integral::get_shellf_buffer(int num_var)
|
||||||
|
{
|
||||||
|
double *&buffer = shellf_cache[num_var];
|
||||||
|
if (!buffer)
|
||||||
|
buffer = new double[n_tot * num_var];
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
void surface_integral::release_cached_buffers()
|
||||||
|
{
|
||||||
|
for (map<double, SpherePointCache>::iterator it = sphere_point_cache.begin(); it != sphere_point_cache.end(); ++it)
|
||||||
|
{
|
||||||
|
delete[] it->second.pox[0];
|
||||||
|
delete[] it->second.pox[1];
|
||||||
|
delete[] it->second.pox[2];
|
||||||
|
it->second.pox[0] = it->second.pox[1] = it->second.pox[2] = 0;
|
||||||
|
}
|
||||||
|
sphere_point_cache.clear();
|
||||||
|
|
||||||
|
for (map<int, double *>::iterator it = shellf_cache.begin(); it != shellf_cache.end(); ++it)
|
||||||
|
delete[] it->second;
|
||||||
|
shellf_cache.clear();
|
||||||
|
}
|
||||||
//|----------------------------------------------------------------
|
//|----------------------------------------------------------------
|
||||||
// spin weighted spinw component of psi4, general routine
|
// spin weighted spinw component of psi4, general routine
|
||||||
// l takes from spinw to maxl; m takes from -l to l
|
// l takes from spinw to maxl; m takes from -l to l
|
||||||
@@ -211,14 +256,7 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
|
|||||||
|
|
||||||
int n;
|
int n;
|
||||||
double *pox[3];
|
double *pox[3];
|
||||||
for (int i = 0; i < 3; i++)
|
get_surface_points(rex, pox);
|
||||||
pox[i] = new double[n_tot];
|
|
||||||
for (n = 0; n < n_tot; n++)
|
|
||||||
{
|
|
||||||
pox[0][n] = rex * nx_g[n];
|
|
||||||
pox[1][n] = rex * ny_g[n];
|
|
||||||
pox[2][n] = rex * nz_g[n];
|
|
||||||
}
|
|
||||||
|
|
||||||
int mp, Lp, Nmin, Nmax;
|
int mp, Lp, Nmin, Nmax;
|
||||||
mp = n_tot / cpusize;
|
mp = n_tot / cpusize;
|
||||||
@@ -234,8 +272,7 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
|
|||||||
Nmax = Nmin + mp - 1;
|
Nmax = Nmin + mp - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
double *shellf;
|
double *shellf = get_shellf_buffer(InList);
|
||||||
shellf = new double[n_tot * InList];
|
|
||||||
|
|
||||||
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax);
|
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax);
|
||||||
|
|
||||||
@@ -375,10 +412,6 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
|
|||||||
|
|
||||||
//|------= Free memory.
|
//|------= Free memory.
|
||||||
|
|
||||||
delete[] pox[0];
|
|
||||||
delete[] pox[1];
|
|
||||||
delete[] pox[2];
|
|
||||||
delete[] shellf;
|
|
||||||
delete[] RP_out;
|
delete[] RP_out;
|
||||||
delete[] IP_out;
|
delete[] IP_out;
|
||||||
DG_List->clearList();
|
DG_List->clearList();
|
||||||
@@ -404,17 +437,9 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
|
|||||||
|
|
||||||
int n;
|
int n;
|
||||||
double *pox[3];
|
double *pox[3];
|
||||||
for (int i = 0; i < 3; i++)
|
get_surface_points(rex, pox);
|
||||||
pox[i] = new double[n_tot];
|
|
||||||
for (n = 0; n < n_tot; n++)
|
|
||||||
{
|
|
||||||
pox[0][n] = rex * nx_g[n];
|
|
||||||
pox[1][n] = rex * ny_g[n];
|
|
||||||
pox[2][n] = rex * nz_g[n];
|
|
||||||
}
|
|
||||||
|
|
||||||
double *shellf;
|
double *shellf = get_shellf_buffer(InList);
|
||||||
shellf = new double[n_tot * InList];
|
|
||||||
|
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Interp_Points");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Interp_Points");
|
||||||
|
|
||||||
@@ -577,10 +602,6 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *
|
|||||||
|
|
||||||
//|------= Free memory.
|
//|------= Free memory.
|
||||||
|
|
||||||
delete[] pox[0];
|
|
||||||
delete[] pox[1];
|
|
||||||
delete[] pox[2];
|
|
||||||
delete[] shellf;
|
|
||||||
delete[] RP_out;
|
delete[] RP_out;
|
||||||
delete[] IP_out;
|
delete[] IP_out;
|
||||||
DG_List->clearList();
|
DG_List->clearList();
|
||||||
@@ -599,17 +620,9 @@ void surface_integral::surf_Wave(double rex, int lev, ShellPatch *GH, var *Rpsi4
|
|||||||
|
|
||||||
int n;
|
int n;
|
||||||
double *pox[3];
|
double *pox[3];
|
||||||
for (int i = 0; i < 3; i++)
|
get_surface_points(rex, pox);
|
||||||
pox[i] = new double[n_tot];
|
|
||||||
for (n = 0; n < n_tot; n++)
|
|
||||||
{
|
|
||||||
pox[0][n] = rex * nx_g[n];
|
|
||||||
pox[1][n] = rex * ny_g[n];
|
|
||||||
pox[2][n] = rex * nz_g[n];
|
|
||||||
}
|
|
||||||
|
|
||||||
double *shellf;
|
double *shellf = get_shellf_buffer(InList);
|
||||||
shellf = new double[n_tot * InList];
|
|
||||||
|
|
||||||
GH->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry);
|
GH->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry);
|
||||||
|
|
||||||
@@ -2570,10 +2583,6 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var
|
|||||||
Rout[5] = sy;
|
Rout[5] = sy;
|
||||||
Rout[6] = sz;
|
Rout[6] = sz;
|
||||||
|
|
||||||
delete[] pox[0];
|
|
||||||
delete[] pox[1];
|
|
||||||
delete[] pox[2];
|
|
||||||
delete[] shellf;
|
|
||||||
DG_List->clearList();
|
DG_List->clearList();
|
||||||
}
|
}
|
||||||
void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK,
|
void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK,
|
||||||
@@ -2639,17 +2648,9 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var
|
|||||||
|
|
||||||
int n;
|
int n;
|
||||||
double *pox[3];
|
double *pox[3];
|
||||||
for (int i = 0; i < 3; i++)
|
get_surface_points(rex, pox);
|
||||||
pox[i] = new double[n_tot];
|
|
||||||
for (n = 0; n < n_tot; n++)
|
|
||||||
{
|
|
||||||
pox[0][n] = rex * nx_g[n];
|
|
||||||
pox[1][n] = rex * ny_g[n];
|
|
||||||
pox[2][n] = rex * nz_g[n];
|
|
||||||
}
|
|
||||||
|
|
||||||
double *shellf;
|
double *shellf = get_shellf_buffer(InList);
|
||||||
shellf = new double[n_tot * InList];
|
|
||||||
|
|
||||||
// we have assumed there is only one box on this level,
|
// we have assumed there is only one box on this level,
|
||||||
// so we do not need loop boxes
|
// so we do not need loop boxes
|
||||||
@@ -2839,10 +2840,6 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var
|
|||||||
Rout[5] = sy;
|
Rout[5] = sy;
|
||||||
Rout[6] = sz;
|
Rout[6] = sz;
|
||||||
|
|
||||||
delete[] pox[0];
|
|
||||||
delete[] pox[1];
|
|
||||||
delete[] pox[2];
|
|
||||||
delete[] shellf;
|
|
||||||
DG_List->clearList();
|
DG_List->clearList();
|
||||||
}
|
}
|
||||||
//|----------------------------------------------------------------
|
//|----------------------------------------------------------------
|
||||||
|
|||||||
@@ -23,11 +23,21 @@ using namespace std;
|
|||||||
#include "NullShellPatch2.h"
|
#include "NullShellPatch2.h"
|
||||||
#include "var.h"
|
#include "var.h"
|
||||||
#include "monitor.h"
|
#include "monitor.h"
|
||||||
|
#include <map>
|
||||||
|
|
||||||
class surface_integral
|
class surface_integral
|
||||||
{
|
{
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
struct SpherePointCache
|
||||||
|
{
|
||||||
|
double *pox[3];
|
||||||
|
SpherePointCache()
|
||||||
|
{
|
||||||
|
pox[0] = pox[1] = pox[2] = 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
int Symmetry, factor;
|
int Symmetry, factor;
|
||||||
int N_theta, N_phi; // Number of points in Theta & Phi directions
|
int N_theta, N_phi; // Number of points in Theta & Phi directions
|
||||||
double dphi, dcostheta;
|
double dphi, dcostheta;
|
||||||
@@ -36,6 +46,12 @@ private:
|
|||||||
|
|
||||||
double *nx_g, *ny_g, *nz_g; // global list of unit normals
|
double *nx_g, *ny_g, *nz_g; // global list of unit normals
|
||||||
int myrank, cpusize;
|
int myrank, cpusize;
|
||||||
|
map<double, SpherePointCache> sphere_point_cache;
|
||||||
|
map<int, double *> shellf_cache;
|
||||||
|
|
||||||
|
void get_surface_points(double rex, double **pox);
|
||||||
|
double *get_shellf_buffer(int num_var);
|
||||||
|
void release_cached_buffers();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
surface_integral(int iSymmetry);
|
surface_integral(int iSymmetry);
|
||||||
|
|||||||
Reference in New Issue
Block a user