perf(MPatch): 用空间 bin 索引加速 Interp_Points 的 block 归属查找
- 为 Patch::Interp_Points 三个重载引入 BlockBinIndex(候选筛选 + 全扫回退) - 保持原 point-in-block 判定与后续插值/通信流程不变 - 将逐点线性扫块从 O(N_points*N_blocks) 降为近似 O(N_points*k) - 测试:bin 上限如果太大,会引入不必要的索引构建开销。将 bins 上限设为 16。 Co-authored-by: gpt-5.3-codex
This commit is contained in:
@@ -7,6 +7,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <new>
|
#include <new>
|
||||||
|
#include <vector>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
@@ -17,6 +18,168 @@ using namespace std;
|
|||||||
#include "interp_lb_profile.h"
|
#include "interp_lb_profile.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
struct InterpBlockView
|
||||||
|
{
|
||||||
|
Block *bp;
|
||||||
|
double llb[dim];
|
||||||
|
double uub[dim];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BlockBinIndex
|
||||||
|
{
|
||||||
|
int bins[dim];
|
||||||
|
double lo[dim];
|
||||||
|
double inv[dim];
|
||||||
|
vector<InterpBlockView> views;
|
||||||
|
vector<vector<int>> bin_to_blocks;
|
||||||
|
bool valid;
|
||||||
|
|
||||||
|
BlockBinIndex() : valid(false)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < dim; i++)
|
||||||
|
{
|
||||||
|
bins[i] = 1;
|
||||||
|
lo[i] = 0.0;
|
||||||
|
inv[i] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline int clamp_int(int v, int lo, int hi)
|
||||||
|
{
|
||||||
|
return (v < lo) ? lo : ((v > hi) ? hi : v);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int coord_to_bin(double x, double lo, double inv, int nb)
|
||||||
|
{
|
||||||
|
if (nb <= 1 || inv <= 0.0)
|
||||||
|
return 0;
|
||||||
|
int b = int(floor((x - lo) * inv));
|
||||||
|
return clamp_int(b, 0, nb - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int bin_loc(const BlockBinIndex &index, int b0, int b1, int b2)
|
||||||
|
{
|
||||||
|
return b0 + index.bins[0] * (b1 + index.bins[1] * b2);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool point_in_block_view(const InterpBlockView &view, const double *pox, const double *DH)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < dim; i++)
|
||||||
|
{
|
||||||
|
if (pox[i] - view.llb[i] < -DH[i] / 2 || pox[i] - view.uub[i] > DH[i] / 2)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void build_block_bin_index(Patch *patch, const double *DH, BlockBinIndex &index)
|
||||||
|
{
|
||||||
|
index = BlockBinIndex();
|
||||||
|
|
||||||
|
MyList<Block> *Bp = patch->blb;
|
||||||
|
while (Bp)
|
||||||
|
{
|
||||||
|
Block *BP = Bp->data;
|
||||||
|
InterpBlockView view;
|
||||||
|
view.bp = BP;
|
||||||
|
for (int i = 0; i < dim; i++)
|
||||||
|
{
|
||||||
|
#ifdef Vertex
|
||||||
|
#ifdef Cell
|
||||||
|
#error Both Cell and Vertex are defined
|
||||||
|
#endif
|
||||||
|
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
|
||||||
|
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
|
||||||
|
#else
|
||||||
|
#ifdef Cell
|
||||||
|
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
|
||||||
|
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
|
||||||
|
#else
|
||||||
|
#error Not define Vertex nor Cell
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
index.views.push_back(view);
|
||||||
|
if (Bp == patch->ble)
|
||||||
|
break;
|
||||||
|
Bp = Bp->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int nblocks = int(index.views.size());
|
||||||
|
if (nblocks <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
int bins_1d = int(ceil(pow(double(nblocks), 1.0 / 3.0)));
|
||||||
|
bins_1d = clamp_int(bins_1d, 1, 32);
|
||||||
|
for (int i = 0; i < dim; i++)
|
||||||
|
{
|
||||||
|
index.bins[i] = bins_1d;
|
||||||
|
index.lo[i] = patch->bbox[i] + patch->lli[i] * DH[i];
|
||||||
|
const double hi = patch->bbox[dim + i] - patch->uui[i] * DH[i];
|
||||||
|
if (hi > index.lo[i] && bins_1d > 1)
|
||||||
|
index.inv[i] = bins_1d / (hi - index.lo[i]);
|
||||||
|
else
|
||||||
|
index.inv[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
index.bin_to_blocks.resize(index.bins[0] * index.bins[1] * index.bins[2]);
|
||||||
|
|
||||||
|
for (int bi = 0; bi < nblocks; bi++)
|
||||||
|
{
|
||||||
|
const InterpBlockView &view = index.views[bi];
|
||||||
|
int bmin[dim], bmax[dim];
|
||||||
|
for (int d = 0; d < dim; d++)
|
||||||
|
{
|
||||||
|
const double low = view.llb[d] - DH[d] / 2;
|
||||||
|
const double up = view.uub[d] + DH[d] / 2;
|
||||||
|
bmin[d] = coord_to_bin(low, index.lo[d], index.inv[d], index.bins[d]);
|
||||||
|
bmax[d] = coord_to_bin(up, index.lo[d], index.inv[d], index.bins[d]);
|
||||||
|
if (bmax[d] < bmin[d])
|
||||||
|
{
|
||||||
|
int t = bmin[d];
|
||||||
|
bmin[d] = bmax[d];
|
||||||
|
bmax[d] = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int bz = bmin[2]; bz <= bmax[2]; bz++)
|
||||||
|
for (int by = bmin[1]; by <= bmax[1]; by++)
|
||||||
|
for (int bx = bmin[0]; bx <= bmax[0]; bx++)
|
||||||
|
index.bin_to_blocks[bin_loc(index, bx, by, bz)].push_back(bi);
|
||||||
|
}
|
||||||
|
|
||||||
|
index.valid = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int find_block_index_for_point(const BlockBinIndex &index, const double *pox, const double *DH)
|
||||||
|
{
|
||||||
|
if (!index.valid)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
const int bx = coord_to_bin(pox[0], index.lo[0], index.inv[0], index.bins[0]);
|
||||||
|
const int by = coord_to_bin(pox[1], index.lo[1], index.inv[1], index.bins[1]);
|
||||||
|
const int bz = coord_to_bin(pox[2], index.lo[2], index.inv[2], index.bins[2]);
|
||||||
|
const vector<int> &cand = index.bin_to_blocks[bin_loc(index, bx, by, bz)];
|
||||||
|
|
||||||
|
for (size_t ci = 0; ci < cand.size(); ci++)
|
||||||
|
{
|
||||||
|
const int bi = cand[ci];
|
||||||
|
if (point_in_block_view(index.views[bi], pox, DH))
|
||||||
|
return bi;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to full scan for numerical edge cases around bin boundaries.
|
||||||
|
for (size_t bi = 0; bi < index.views.size(); bi++)
|
||||||
|
if (point_in_block_view(index.views[bi], pox, DH))
|
||||||
|
return int(bi);
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
|
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
|
||||||
{
|
{
|
||||||
|
|
||||||
@@ -367,9 +530,11 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
for (int j = 0; j < NN; j++)
|
for (int j = 0; j < NN; j++)
|
||||||
owner_rank[j] = -1;
|
owner_rank[j] = -1;
|
||||||
|
|
||||||
double DH[dim], llb[dim], uub[dim];
|
double DH[dim];
|
||||||
for (int i = 0; i < dim; i++)
|
for (int i = 0; i < dim; i++)
|
||||||
DH[i] = getdX(i);
|
DH[i] = getdX(i);
|
||||||
|
BlockBinIndex block_index;
|
||||||
|
build_block_bin_index(this, DH, block_index);
|
||||||
|
|
||||||
for (int j = 0; j < NN; j++) // run along points
|
for (int j = 0; j < NN; j++) // run along points
|
||||||
{
|
{
|
||||||
@@ -392,57 +557,24 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MyList<Block> *Bp = blb;
|
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
||||||
bool notfind = true;
|
if (block_i >= 0)
|
||||||
while (notfind && Bp) // run along Blocks
|
|
||||||
{
|
{
|
||||||
Block *BP = Bp->data;
|
Block *BP = block_index.views[block_i].bp;
|
||||||
|
owner_rank[j] = BP->rank;
|
||||||
bool flag = true;
|
if (myrank == BP->rank)
|
||||||
for (int i = 0; i < dim; i++)
|
|
||||||
{
|
{
|
||||||
#ifdef Vertex
|
//---> interpolation
|
||||||
#ifdef Cell
|
varl = VarList;
|
||||||
#error Both Cell and Vertex are defined
|
int k = 0;
|
||||||
#endif
|
while (varl) // run along variables
|
||||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
|
|
||||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
|
|
||||||
#else
|
|
||||||
#ifdef Cell
|
|
||||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
|
|
||||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
|
|
||||||
#else
|
|
||||||
#error Not define Vertex nor Cell
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
|
|
||||||
{
|
{
|
||||||
flag = false;
|
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
||||||
break;
|
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||||
|
varl = varl->next;
|
||||||
|
k++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flag)
|
|
||||||
{
|
|
||||||
notfind = false;
|
|
||||||
owner_rank[j] = BP->rank;
|
|
||||||
if (myrank == BP->rank)
|
|
||||||
{
|
|
||||||
//---> interpolation
|
|
||||||
varl = VarList;
|
|
||||||
int k = 0;
|
|
||||||
while (varl) // run along variables
|
|
||||||
{
|
|
||||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
|
||||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
|
||||||
varl = varl->next;
|
|
||||||
k++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (Bp == ble)
|
|
||||||
break;
|
|
||||||
Bp = Bp->next;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -535,9 +667,11 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
for (int j = 0; j < NN; j++)
|
for (int j = 0; j < NN; j++)
|
||||||
owner_rank[j] = -1;
|
owner_rank[j] = -1;
|
||||||
|
|
||||||
double DH[dim], llb[dim], uub[dim];
|
double DH[dim];
|
||||||
for (int i = 0; i < dim; i++)
|
for (int i = 0; i < dim; i++)
|
||||||
DH[i] = getdX(i);
|
DH[i] = getdX(i);
|
||||||
|
BlockBinIndex block_index;
|
||||||
|
build_block_bin_index(this, DH, block_index);
|
||||||
|
|
||||||
// --- Interpolation phase (identical to original) ---
|
// --- Interpolation phase (identical to original) ---
|
||||||
for (int j = 0; j < NN; j++)
|
for (int j = 0; j < NN; j++)
|
||||||
@@ -561,56 +695,23 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MyList<Block> *Bp = blb;
|
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
||||||
bool notfind = true;
|
if (block_i >= 0)
|
||||||
while (notfind && Bp)
|
|
||||||
{
|
{
|
||||||
Block *BP = Bp->data;
|
Block *BP = block_index.views[block_i].bp;
|
||||||
|
owner_rank[j] = BP->rank;
|
||||||
bool flag = true;
|
if (myrank == BP->rank)
|
||||||
for (int i = 0; i < dim; i++)
|
|
||||||
{
|
{
|
||||||
#ifdef Vertex
|
varl = VarList;
|
||||||
#ifdef Cell
|
int k = 0;
|
||||||
#error Both Cell and Vertex are defined
|
while (varl)
|
||||||
#endif
|
|
||||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
|
|
||||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
|
|
||||||
#else
|
|
||||||
#ifdef Cell
|
|
||||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
|
|
||||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
|
|
||||||
#else
|
|
||||||
#error Not define Vertex nor Cell
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
|
|
||||||
{
|
{
|
||||||
flag = false;
|
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
||||||
break;
|
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||||
|
varl = varl->next;
|
||||||
|
k++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flag)
|
|
||||||
{
|
|
||||||
notfind = false;
|
|
||||||
owner_rank[j] = BP->rank;
|
|
||||||
if (myrank == BP->rank)
|
|
||||||
{
|
|
||||||
varl = VarList;
|
|
||||||
int k = 0;
|
|
||||||
while (varl)
|
|
||||||
{
|
|
||||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
|
||||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
|
||||||
varl = varl->next;
|
|
||||||
k++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (Bp == ble)
|
|
||||||
break;
|
|
||||||
Bp = Bp->next;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -833,9 +934,11 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
|
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
|
||||||
MPI_Comm_group(Comm_here, &local_group);
|
MPI_Comm_group(Comm_here, &local_group);
|
||||||
|
|
||||||
double DH[dim], llb[dim], uub[dim];
|
double DH[dim];
|
||||||
for (int i = 0; i < dim; i++)
|
for (int i = 0; i < dim; i++)
|
||||||
DH[i] = getdX(i);
|
DH[i] = getdX(i);
|
||||||
|
BlockBinIndex block_index;
|
||||||
|
build_block_bin_index(this, DH, block_index);
|
||||||
|
|
||||||
for (int j = 0; j < NN; j++) // run along points
|
for (int j = 0; j < NN; j++) // run along points
|
||||||
{
|
{
|
||||||
@@ -858,57 +961,24 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MyList<Block> *Bp = blb;
|
const int block_i = find_block_index_for_point(block_index, pox, DH);
|
||||||
bool notfind = true;
|
if (block_i >= 0)
|
||||||
while (notfind && Bp) // run along Blocks
|
|
||||||
{
|
{
|
||||||
Block *BP = Bp->data;
|
Block *BP = block_index.views[block_i].bp;
|
||||||
|
owner_rank[j] = BP->rank;
|
||||||
bool flag = true;
|
if (myrank == BP->rank)
|
||||||
for (int i = 0; i < dim; i++)
|
|
||||||
{
|
{
|
||||||
#ifdef Vertex
|
//---> interpolation
|
||||||
#ifdef Cell
|
varl = VarList;
|
||||||
#error Both Cell and Vertex are defined
|
int k = 0;
|
||||||
#endif
|
while (varl) // run along variables
|
||||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
|
|
||||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
|
|
||||||
#else
|
|
||||||
#ifdef Cell
|
|
||||||
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
|
|
||||||
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
|
|
||||||
#else
|
|
||||||
#error Not define Vertex nor Cell
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
|
|
||||||
{
|
{
|
||||||
flag = false;
|
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
||||||
break;
|
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||||
|
varl = varl->next;
|
||||||
|
k++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flag)
|
|
||||||
{
|
|
||||||
notfind = false;
|
|
||||||
owner_rank[j] = BP->rank;
|
|
||||||
if (myrank == BP->rank)
|
|
||||||
{
|
|
||||||
//---> interpolation
|
|
||||||
varl = VarList;
|
|
||||||
int k = 0;
|
|
||||||
while (varl) // run along variables
|
|
||||||
{
|
|
||||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
|
||||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
|
||||||
varl = varl->next;
|
|
||||||
k++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (Bp == ble)
|
|
||||||
break;
|
|
||||||
Bp = Bp->next;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user