Stabilize device AMR restrict across regrid
This commit is contained in:
@@ -583,6 +583,148 @@ void bssn_cuda_download_level_state(MyList<Patch> *PatL, MyList<var> *vars, int
|
||||
}
|
||||
}
|
||||
|
||||
void bssn_cuda_download_level_state_if_present(MyList<Patch> *PatL, MyList<var> *vars, int myrank)
|
||||
{
|
||||
MyList<Patch> *Pp = PatL;
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
|
||||
{
|
||||
double *state_out[BSSN_CUDA_STATE_COUNT];
|
||||
if (!fill_bssn_cuda_views(cg, vars, state_out))
|
||||
{
|
||||
cout << "CUDA BSSN state list mismatch on resident state conditional download" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
if (bssn_cuda_download_resident_state_if_present(cg, cg->shape, state_out))
|
||||
{
|
||||
cout << "CUDA resident state conditional download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
}
|
||||
|
||||
void bssn_cuda_release_level_state(MyList<Patch> *PatL, int myrank)
|
||||
{
|
||||
MyList<Patch> *Pp = PatL;
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
|
||||
bssn_cuda_release_step_ctx(cg);
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
}
|
||||
|
||||
void bssn_cuda_flush_level_before_regrid(MyList<Patch> *PatL,
|
||||
MyList<var> *corL,
|
||||
MyList<var> *oldL,
|
||||
MyList<var> *stateL,
|
||||
MyList<var> *preL,
|
||||
int myrank)
|
||||
{
|
||||
bssn_cuda_download_level_state_if_present(PatL, corL, myrank);
|
||||
bssn_cuda_download_level_state_if_present(PatL, oldL, myrank);
|
||||
bssn_cuda_download_level_state_if_present(PatL, stateL, myrank);
|
||||
bssn_cuda_download_level_state_if_present(PatL, preL, myrank);
|
||||
bssn_cuda_release_level_state(PatL, myrank);
|
||||
}
|
||||
|
||||
bool bssn_cuda_regrid_flush_enabled()
|
||||
{
|
||||
static int enabled = -1;
|
||||
if (enabled < 0)
|
||||
{
|
||||
const char *env = getenv("AMSS_CUDA_AMR_RESTRICT_DEVICE");
|
||||
enabled = (env && atoi(env) != 0) ? 1 : 0;
|
||||
}
|
||||
return enabled != 0;
|
||||
}
|
||||
|
||||
bool bssn_cuda_will_regrid_onelevel(cgh *GH, int lev, int Symmetry, int BH_num, double **Porg0)
|
||||
{
|
||||
if (!GH || lev < GH->movls || lev >= GH->levels || !GH->PatL[lev])
|
||||
return false;
|
||||
if (!GH->PatL[lev]->data || !GH->PatL[lev]->data->blb || !GH->PatL[lev]->data->blb->data)
|
||||
return true;
|
||||
|
||||
const int do_every = 2;
|
||||
const double dX = GH->PatL[lev]->data->blb->data->getdX(0);
|
||||
const double dY = GH->PatL[lev]->data->blb->data->getdX(1);
|
||||
const double dZ = GH->PatL[lev]->data->blb->data->getdX(2);
|
||||
|
||||
for (int grd = 0; grd < GH->grids[lev]; grd++)
|
||||
{
|
||||
int bhi = 0;
|
||||
for (bhi = 0; bhi < BH_num; bhi++)
|
||||
{
|
||||
if (feq(GH->Porgls[lev][bhi][0], GH->handle[lev][grd][0], 2 * do_every * dX) &&
|
||||
feq(GH->Porgls[lev][bhi][1], GH->handle[lev][grd][1], 2 * do_every * dY) &&
|
||||
feq(GH->Porgls[lev][bhi][2], GH->handle[lev][grd][2], 2 * do_every * dZ))
|
||||
break;
|
||||
}
|
||||
if (bhi == BH_num)
|
||||
{
|
||||
if (feq(0, GH->bbox[lev][grd][0], dX / 2) &&
|
||||
feq(0, GH->bbox[lev][grd][1], dY / 2) &&
|
||||
feq(0, GH->bbox[lev][grd][2], dZ / 2))
|
||||
continue;
|
||||
if (BH_num == 1)
|
||||
bhi = 0;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
double rr = (Porg0[bhi][0] - GH->handle[lev][grd][0]) / dX;
|
||||
int flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
|
||||
rr = flag * do_every * dX;
|
||||
if (Symmetry == 2 && GH->bbox[lev][grd][0] + rr < 0)
|
||||
rr = -GH->bbox[lev][grd][0];
|
||||
if (fabs(rr) > dX / 2)
|
||||
return true;
|
||||
|
||||
rr = (Porg0[bhi][1] - GH->handle[lev][grd][1]) / dY;
|
||||
flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
|
||||
rr = flag * do_every * dY;
|
||||
if (Symmetry == 2 && GH->bbox[lev][grd][1] + rr < 0)
|
||||
rr = -GH->bbox[lev][grd][1];
|
||||
if (fabs(rr) > dY / 2)
|
||||
return true;
|
||||
|
||||
rr = (Porg0[bhi][2] - GH->handle[lev][grd][2]) / dZ;
|
||||
flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
|
||||
rr = flag * do_every * dZ;
|
||||
if (Symmetry > 0 && GH->bbox[lev][grd][2] + rr < 0)
|
||||
rr = -GH->bbox[lev][grd][1];
|
||||
if (fabs(rr) > dZ / 2)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool bssn_cuda_should_flush_before_regrid(cgh *GH, int lev, int Symmetry, int BH_num, double **Porg0)
|
||||
{
|
||||
return bssn_cuda_regrid_flush_enabled() &&
|
||||
bssn_cuda_will_regrid_onelevel(GH, lev, Symmetry, BH_num, Porg0);
|
||||
}
|
||||
|
||||
void bssn_cuda_sync_level_bh_fields(MyList<Patch> *PatL,
|
||||
int myrank,
|
||||
var *forx, var *fory, var *forz)
|
||||
@@ -2839,6 +2981,14 @@ void bssn_class::Evolve(int Steps)
|
||||
|
||||
#if (REGLEV == 1)
|
||||
STEP_TIMER_DECL(timer_regrid);
|
||||
#if USE_CUDA_BSSN
|
||||
for (int il = 0; il < GH->levels; il++)
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, il, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[il],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
||||
@@ -3075,6 +3225,13 @@ void bssn_class::RecursiveStep(int lev)
|
||||
|
||||
#if (REGLEV == 0)
|
||||
STEP_TIMER_DECL(timer_regrid_onelevel);
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||
@@ -3261,6 +3418,13 @@ void bssn_class::ParallelStep()
|
||||
delete[] tporg;
|
||||
delete[] tporgo;
|
||||
#if (REGLEV == 0)
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, GH->mylev, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[GH->mylev],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||
@@ -3432,6 +3596,13 @@ void bssn_class::ParallelStep()
|
||||
if (lev + 1 >= GH->movls)
|
||||
{
|
||||
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, lev + 1, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[lev + 1],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
||||
@@ -3451,6 +3622,13 @@ void bssn_class::ParallelStep()
|
||||
// for this level
|
||||
if (YN == 1)
|
||||
{
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||
@@ -3474,6 +3652,13 @@ void bssn_class::ParallelStep()
|
||||
if (YN == 1)
|
||||
{
|
||||
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||
@@ -3494,6 +3679,13 @@ void bssn_class::ParallelStep()
|
||||
if (i % 4 == 3)
|
||||
{
|
||||
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||
|
||||
@@ -6404,6 +6404,45 @@ static void download_resident_state(void *block_tag, int *ex, double **state_hos
|
||||
}
|
||||
}
|
||||
|
||||
static bool download_resident_state_if_present(void *block_tag, int *ex, double **state_host_out)
|
||||
{
|
||||
auto it = g_step_ctx.find(block_tag);
|
||||
if (it == g_step_ctx.end()) return false;
|
||||
|
||||
StepContext &ctx = it->second;
|
||||
const int bank = find_resident_bank(ctx, state_host_out);
|
||||
if (bank < 0 || !ctx.resident_valid[bank])
|
||||
return false;
|
||||
|
||||
const size_t all = (size_t)ex[0] * ex[1] * ex[2];
|
||||
const size_t bytes = all * sizeof(double);
|
||||
mark_resident_current_bank(ctx, bank);
|
||||
if (resident_host_subset_clean(ctx, bank, BSSN_STATE_COUNT, nullptr))
|
||||
return true;
|
||||
|
||||
static int direct_download = -1;
|
||||
if (direct_download < 0) {
|
||||
const char *env = getenv("AMSS_CUDA_DIRECT_STATE_DOWNLOAD");
|
||||
direct_download = env ? ((atoi(env) != 0) ? 1 : 0) : 1;
|
||||
}
|
||||
if (direct_download) {
|
||||
for (int i = 0; i < BSSN_STATE_COUNT; ++i) {
|
||||
CUDA_CHECK(cudaMemcpyAsync(state_host_out[i], ctx.d_resident[bank][i],
|
||||
bytes, cudaMemcpyDeviceToHost));
|
||||
}
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
} else {
|
||||
CUDA_CHECK(cudaMemcpy(g_buf.h_stage, ctx.d_resident_mem[bank],
|
||||
(size_t)BSSN_STATE_COUNT * bytes,
|
||||
cudaMemcpyDeviceToHost));
|
||||
for (int i = 0; i < BSSN_STATE_COUNT; ++i) {
|
||||
std::memcpy(state_host_out[i], g_buf.h_stage + (size_t)i * all, bytes);
|
||||
}
|
||||
}
|
||||
set_resident_host_clean(ctx, bank, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void copy_state_subset(void *block_tag,
|
||||
int *ex,
|
||||
int subset_count,
|
||||
@@ -7056,6 +7095,18 @@ int bssn_cuda_download_resident_state(void *block_tag,
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
int bssn_cuda_download_resident_state_if_present(void *block_tag,
|
||||
int *ex,
|
||||
double **state_host_out)
|
||||
{
|
||||
init_gpu_dispatch();
|
||||
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
|
||||
if (!block_tag || !ex || !state_host_out) return 1;
|
||||
download_resident_state_if_present(block_tag, ex, state_host_out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
int bssn_cuda_download_constraint_outputs(int *ex,
|
||||
double **constraint_host_out)
|
||||
|
||||
@@ -73,6 +73,10 @@ int bssn_cuda_download_resident_state(void *block_tag,
|
||||
int *ex,
|
||||
double **state_host_out);
|
||||
|
||||
int bssn_cuda_download_resident_state_if_present(void *block_tag,
|
||||
int *ex,
|
||||
double **state_host_out);
|
||||
|
||||
int bssn_cuda_download_constraint_outputs(int *ex,
|
||||
double **constraint_host_out);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user