Checkpoint Z4C CUDA resident sync progress
This commit is contained in:
@@ -388,41 +388,57 @@ bool z4c_cuda_interp_bh_point_resident(MyList<Patch> *PatL,
|
||||
if (z4c_cuda_has_resident_state(block) &&
|
||||
block->shape[0] >= ordn && block->shape[1] >= ordn && block->shape[2] >= ordn)
|
||||
{
|
||||
const int sx = ordn;
|
||||
const int sy = ordn;
|
||||
const int sz = ordn;
|
||||
const int region_all = sx * sy * sz;
|
||||
const int i0 = z4c_cuda_interp_tile_start(block->X[0], block->shape[0], x, DH[0], ordn);
|
||||
const int j0 = z4c_cuda_interp_tile_start(block->X[1], block->shape[1], y, DH[1], ordn);
|
||||
const int k0 = z4c_cuda_interp_tile_start(block->X[2], block->shape[2], z, DH[2], ordn);
|
||||
double *packed_fields = new double[3 * region_all];
|
||||
var *vars[3] = {forx, fory, forz};
|
||||
for (int f = 0; f < 3; f++)
|
||||
static int use_device_bh_interp = -1;
|
||||
if (use_device_bh_interp < 0)
|
||||
{
|
||||
if (z4c_cuda_pack_state_region_to_host_buffer(block,
|
||||
k_z4c_cuda_bh_state_indices[f],
|
||||
packed_fields + f * region_all,
|
||||
block->shape,
|
||||
i0, j0, k0,
|
||||
sx, sy, sz) != 0)
|
||||
const char *env = getenv("AMSS_CUDA_Z4C_BH_INTERP_DEVICE");
|
||||
use_device_bh_interp = (env && atoi(env) != 0) ? 1 : 0;
|
||||
}
|
||||
bool used_device_interp = false;
|
||||
if (use_device_bh_interp)
|
||||
{
|
||||
double soa3[9];
|
||||
for (int f = 0; f < 3; f++)
|
||||
{
|
||||
delete[] packed_fields;
|
||||
cout << "CUDA Z4C BH tile download failed" << endl;
|
||||
soa3[3 * f + 0] = vars[f]->SoA[0];
|
||||
soa3[3 * f + 1] = vars[f]->SoA[1];
|
||||
soa3[3 * f + 2] = vars[f]->SoA[2];
|
||||
}
|
||||
used_device_interp =
|
||||
(z4c_cuda_interp_state_point3(block, block->shape,
|
||||
k_z4c_cuda_bh_state_indices[0],
|
||||
k_z4c_cuda_bh_state_indices[1],
|
||||
k_z4c_cuda_bh_state_indices[2],
|
||||
block->X[0][0], block->X[1][0], block->X[2][0],
|
||||
DH[0], DH[1], DH[2],
|
||||
x, y, z,
|
||||
interp_ordn, interp_sym,
|
||||
soa3, shellf) == 0);
|
||||
}
|
||||
if (!used_device_interp)
|
||||
{
|
||||
double *shift_views[3] = {
|
||||
block->fgfs[forx->sgfn],
|
||||
block->fgfs[fory->sgfn],
|
||||
block->fgfs[forz->sgfn]};
|
||||
if (z4c_cuda_download_state_subset(block, block->shape, 3,
|
||||
k_z4c_cuda_bh_state_indices,
|
||||
shift_views) != 0)
|
||||
{
|
||||
cout << "CUDA Z4C BH shift download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int tile_shape[3] = {sx, sy, sz};
|
||||
f_global_interp(tile_shape,
|
||||
block->X[0] + i0,
|
||||
block->X[1] + j0,
|
||||
block->X[2] + k0,
|
||||
packed_fields + f * region_all,
|
||||
shellf[f],
|
||||
x, y, z,
|
||||
interp_ordn,
|
||||
vars[f]->SoA,
|
||||
interp_sym);
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[forx->sgfn], shellf[0],
|
||||
x, y, z, interp_ordn, forx->SoA, interp_sym);
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[fory->sgfn], shellf[1],
|
||||
x, y, z, interp_ordn, fory->SoA, interp_sym);
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[forz->sgfn], shellf[2],
|
||||
x, y, z, interp_ordn, forz->SoA, interp_sym);
|
||||
}
|
||||
delete[] packed_fields;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user