Add resident BSSN GPU point interpolation
This commit is contained in:
@@ -439,38 +439,56 @@ bool bssn_cuda_interp_bh_point_resident(MyList<Patch> *PatL,
|
||||
if (bssn_cuda_has_resident_state(block) &&
|
||||
block->shape[0] >= ordn && block->shape[1] >= ordn && block->shape[2] >= ordn)
|
||||
{
|
||||
const int sx = ordn;
|
||||
const int sy = ordn;
|
||||
const int sz = ordn;
|
||||
const int region_all = sx * sy * sz;
|
||||
const int i0 = bssn_cuda_interp_tile_start(block->X[0], block->shape[0], x, DH[0], ordn);
|
||||
const int j0 = bssn_cuda_interp_tile_start(block->X[1], block->shape[1], y, DH[1], ordn);
|
||||
const int k0 = bssn_cuda_interp_tile_start(block->X[2], block->shape[2], z, DH[2], ordn);
|
||||
double packed_fields[3 * region_all];
|
||||
var *vars[3] = {forx, fory, forz};
|
||||
double soa3[9];
|
||||
for (int f = 0; f < 3; f++)
|
||||
{
|
||||
if (bssn_cuda_pack_state_region_to_host_buffer(block,
|
||||
k_bssn_cuda_bh_state_indices[f],
|
||||
packed_fields + f * region_all,
|
||||
block->shape,
|
||||
i0, j0, k0,
|
||||
sx, sy, sz) != 0)
|
||||
soa3[3 * f + 0] = vars[f]->SoA[0];
|
||||
soa3[3 * f + 1] = vars[f]->SoA[1];
|
||||
soa3[3 * f + 2] = vars[f]->SoA[2];
|
||||
}
|
||||
if (bssn_cuda_interp_state_point3(block, block->shape,
|
||||
k_bssn_cuda_bh_state_indices[0],
|
||||
k_bssn_cuda_bh_state_indices[1],
|
||||
k_bssn_cuda_bh_state_indices[2],
|
||||
block->X[0][0], block->X[1][0], block->X[2][0],
|
||||
DH[0], DH[1], DH[2],
|
||||
x, y, z,
|
||||
interp_ordn, interp_sym,
|
||||
soa3, shellf) != 0)
|
||||
{
|
||||
const int sx = ordn;
|
||||
const int sy = ordn;
|
||||
const int sz = ordn;
|
||||
const int region_all = sx * sy * sz;
|
||||
const int i0 = bssn_cuda_interp_tile_start(block->X[0], block->shape[0], x, DH[0], ordn);
|
||||
const int j0 = bssn_cuda_interp_tile_start(block->X[1], block->shape[1], y, DH[1], ordn);
|
||||
const int k0 = bssn_cuda_interp_tile_start(block->X[2], block->shape[2], z, DH[2], ordn);
|
||||
double packed_fields[3 * region_all];
|
||||
for (int f = 0; f < 3; f++)
|
||||
{
|
||||
cout << "CUDA BH tile download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
if (bssn_cuda_pack_state_region_to_host_buffer(block,
|
||||
k_bssn_cuda_bh_state_indices[f],
|
||||
packed_fields + f * region_all,
|
||||
block->shape,
|
||||
i0, j0, k0,
|
||||
sx, sy, sz) != 0)
|
||||
{
|
||||
cout << "CUDA BH tile download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int tile_shape[3] = {sx, sy, sz};
|
||||
f_global_interp(tile_shape,
|
||||
block->X[0] + i0,
|
||||
block->X[1] + j0,
|
||||
block->X[2] + k0,
|
||||
packed_fields + f * region_all,
|
||||
shellf[f],
|
||||
x, y, z,
|
||||
interp_ordn,
|
||||
vars[f]->SoA,
|
||||
interp_sym);
|
||||
}
|
||||
int tile_shape[3] = {sx, sy, sz};
|
||||
f_global_interp(tile_shape,
|
||||
block->X[0] + i0,
|
||||
block->X[1] + j0,
|
||||
block->X[2] + k0,
|
||||
packed_fields + f * region_all,
|
||||
shellf[f],
|
||||
x, y, z,
|
||||
interp_ordn,
|
||||
vars[f]->SoA,
|
||||
interp_sym);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -3692,13 +3710,16 @@ void bssn_class::Step(int lev, int YN)
|
||||
}
|
||||
}
|
||||
|
||||
STEP_TIMER_ADD(TB_BH_PREDICTOR, timer_bh_predictor);
|
||||
|
||||
// data analysis part
|
||||
// Warning NOTE: the variables1 are used as temp storege room
|
||||
if (lev == a_lev)
|
||||
{
|
||||
STEP_TIMER_DECL(timer_analysis_surface);
|
||||
AnalysisStuff(lev, dT_lev);
|
||||
STEP_TIMER_ADD(TB_ANALYSIS_SURFACE, timer_analysis_surface);
|
||||
}
|
||||
STEP_TIMER_ADD(TB_BH_PREDICTOR, timer_bh_predictor);
|
||||
#endif
|
||||
|
||||
#ifdef With_AHF
|
||||
|
||||
Reference in New Issue
Block a user