Add resident BSSN GPU point interpolation

This commit is contained in:
2026-04-30 11:39:15 +08:00
parent 18e9c9cc50
commit 8486532920
3 changed files with 268 additions and 28 deletions

View File

@@ -439,38 +439,56 @@ bool bssn_cuda_interp_bh_point_resident(MyList<Patch> *PatL,
if (bssn_cuda_has_resident_state(block) &&
block->shape[0] >= ordn && block->shape[1] >= ordn && block->shape[2] >= ordn)
{
const int sx = ordn;
const int sy = ordn;
const int sz = ordn;
const int region_all = sx * sy * sz;
const int i0 = bssn_cuda_interp_tile_start(block->X[0], block->shape[0], x, DH[0], ordn);
const int j0 = bssn_cuda_interp_tile_start(block->X[1], block->shape[1], y, DH[1], ordn);
const int k0 = bssn_cuda_interp_tile_start(block->X[2], block->shape[2], z, DH[2], ordn);
double packed_fields[3 * region_all];
var *vars[3] = {forx, fory, forz};
double soa3[9];
for (int f = 0; f < 3; f++)
{
if (bssn_cuda_pack_state_region_to_host_buffer(block,
k_bssn_cuda_bh_state_indices[f],
packed_fields + f * region_all,
block->shape,
i0, j0, k0,
sx, sy, sz) != 0)
soa3[3 * f + 0] = vars[f]->SoA[0];
soa3[3 * f + 1] = vars[f]->SoA[1];
soa3[3 * f + 2] = vars[f]->SoA[2];
}
if (bssn_cuda_interp_state_point3(block, block->shape,
k_bssn_cuda_bh_state_indices[0],
k_bssn_cuda_bh_state_indices[1],
k_bssn_cuda_bh_state_indices[2],
block->X[0][0], block->X[1][0], block->X[2][0],
DH[0], DH[1], DH[2],
x, y, z,
interp_ordn, interp_sym,
soa3, shellf) != 0)
{
const int sx = ordn;
const int sy = ordn;
const int sz = ordn;
const int region_all = sx * sy * sz;
const int i0 = bssn_cuda_interp_tile_start(block->X[0], block->shape[0], x, DH[0], ordn);
const int j0 = bssn_cuda_interp_tile_start(block->X[1], block->shape[1], y, DH[1], ordn);
const int k0 = bssn_cuda_interp_tile_start(block->X[2], block->shape[2], z, DH[2], ordn);
double packed_fields[3 * region_all];
for (int f = 0; f < 3; f++)
{
cout << "CUDA BH tile download failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
if (bssn_cuda_pack_state_region_to_host_buffer(block,
k_bssn_cuda_bh_state_indices[f],
packed_fields + f * region_all,
block->shape,
i0, j0, k0,
sx, sy, sz) != 0)
{
cout << "CUDA BH tile download failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
int tile_shape[3] = {sx, sy, sz};
f_global_interp(tile_shape,
block->X[0] + i0,
block->X[1] + j0,
block->X[2] + k0,
packed_fields + f * region_all,
shellf[f],
x, y, z,
interp_ordn,
vars[f]->SoA,
interp_sym);
}
int tile_shape[3] = {sx, sy, sz};
f_global_interp(tile_shape,
block->X[0] + i0,
block->X[1] + j0,
block->X[2] + k0,
packed_fields + f * region_all,
shellf[f],
x, y, z,
interp_ordn,
vars[f]->SoA,
interp_sym);
}
}
else
@@ -3692,13 +3710,16 @@ void bssn_class::Step(int lev, int YN)
}
}
STEP_TIMER_ADD(TB_BH_PREDICTOR, timer_bh_predictor);
// data analysis part
// Warning NOTE: the variables1 are used as temp storege room
if (lev == a_lev)
{
STEP_TIMER_DECL(timer_analysis_surface);
AnalysisStuff(lev, dT_lev);
STEP_TIMER_ADD(TB_ANALYSIS_SURFACE, timer_analysis_surface);
}
STEP_TIMER_ADD(TB_BH_PREDICTOR, timer_bh_predictor);
#endif
#ifdef With_AHF