Stabilize device AMR restrict across regrid
This commit is contained in:
@@ -583,6 +583,148 @@ void bssn_cuda_download_level_state(MyList<Patch> *PatL, MyList<var> *vars, int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bssn_cuda_download_level_state_if_present(MyList<Patch> *PatL, MyList<var> *vars, int myrank)
|
||||||
|
{
|
||||||
|
MyList<Patch> *Pp = PatL;
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
MyList<Block> *BP = Pp->data->blb;
|
||||||
|
while (BP)
|
||||||
|
{
|
||||||
|
Block *cg = BP->data;
|
||||||
|
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
|
||||||
|
{
|
||||||
|
double *state_out[BSSN_CUDA_STATE_COUNT];
|
||||||
|
if (!fill_bssn_cuda_views(cg, vars, state_out))
|
||||||
|
{
|
||||||
|
cout << "CUDA BSSN state list mismatch on resident state conditional download" << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
if (bssn_cuda_download_resident_state_if_present(cg, cg->shape, state_out))
|
||||||
|
{
|
||||||
|
cout << "CUDA resident state conditional download failed" << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (BP == Pp->data->ble)
|
||||||
|
break;
|
||||||
|
BP = BP->next;
|
||||||
|
}
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_cuda_release_level_state(MyList<Patch> *PatL, int myrank)
|
||||||
|
{
|
||||||
|
MyList<Patch> *Pp = PatL;
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
|
MyList<Block> *BP = Pp->data->blb;
|
||||||
|
while (BP)
|
||||||
|
{
|
||||||
|
Block *cg = BP->data;
|
||||||
|
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
|
||||||
|
bssn_cuda_release_step_ctx(cg);
|
||||||
|
if (BP == Pp->data->ble)
|
||||||
|
break;
|
||||||
|
BP = BP->next;
|
||||||
|
}
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void bssn_cuda_flush_level_before_regrid(MyList<Patch> *PatL,
|
||||||
|
MyList<var> *corL,
|
||||||
|
MyList<var> *oldL,
|
||||||
|
MyList<var> *stateL,
|
||||||
|
MyList<var> *preL,
|
||||||
|
int myrank)
|
||||||
|
{
|
||||||
|
bssn_cuda_download_level_state_if_present(PatL, corL, myrank);
|
||||||
|
bssn_cuda_download_level_state_if_present(PatL, oldL, myrank);
|
||||||
|
bssn_cuda_download_level_state_if_present(PatL, stateL, myrank);
|
||||||
|
bssn_cuda_download_level_state_if_present(PatL, preL, myrank);
|
||||||
|
bssn_cuda_release_level_state(PatL, myrank);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool bssn_cuda_regrid_flush_enabled()
|
||||||
|
{
|
||||||
|
static int enabled = -1;
|
||||||
|
if (enabled < 0)
|
||||||
|
{
|
||||||
|
const char *env = getenv("AMSS_CUDA_AMR_RESTRICT_DEVICE");
|
||||||
|
enabled = (env && atoi(env) != 0) ? 1 : 0;
|
||||||
|
}
|
||||||
|
return enabled != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool bssn_cuda_will_regrid_onelevel(cgh *GH, int lev, int Symmetry, int BH_num, double **Porg0)
|
||||||
|
{
|
||||||
|
if (!GH || lev < GH->movls || lev >= GH->levels || !GH->PatL[lev])
|
||||||
|
return false;
|
||||||
|
if (!GH->PatL[lev]->data || !GH->PatL[lev]->data->blb || !GH->PatL[lev]->data->blb->data)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
const int do_every = 2;
|
||||||
|
const double dX = GH->PatL[lev]->data->blb->data->getdX(0);
|
||||||
|
const double dY = GH->PatL[lev]->data->blb->data->getdX(1);
|
||||||
|
const double dZ = GH->PatL[lev]->data->blb->data->getdX(2);
|
||||||
|
|
||||||
|
for (int grd = 0; grd < GH->grids[lev]; grd++)
|
||||||
|
{
|
||||||
|
int bhi = 0;
|
||||||
|
for (bhi = 0; bhi < BH_num; bhi++)
|
||||||
|
{
|
||||||
|
if (feq(GH->Porgls[lev][bhi][0], GH->handle[lev][grd][0], 2 * do_every * dX) &&
|
||||||
|
feq(GH->Porgls[lev][bhi][1], GH->handle[lev][grd][1], 2 * do_every * dY) &&
|
||||||
|
feq(GH->Porgls[lev][bhi][2], GH->handle[lev][grd][2], 2 * do_every * dZ))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (bhi == BH_num)
|
||||||
|
{
|
||||||
|
if (feq(0, GH->bbox[lev][grd][0], dX / 2) &&
|
||||||
|
feq(0, GH->bbox[lev][grd][1], dY / 2) &&
|
||||||
|
feq(0, GH->bbox[lev][grd][2], dZ / 2))
|
||||||
|
continue;
|
||||||
|
if (BH_num == 1)
|
||||||
|
bhi = 0;
|
||||||
|
else
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
double rr = (Porg0[bhi][0] - GH->handle[lev][grd][0]) / dX;
|
||||||
|
int flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
|
||||||
|
rr = flag * do_every * dX;
|
||||||
|
if (Symmetry == 2 && GH->bbox[lev][grd][0] + rr < 0)
|
||||||
|
rr = -GH->bbox[lev][grd][0];
|
||||||
|
if (fabs(rr) > dX / 2)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
rr = (Porg0[bhi][1] - GH->handle[lev][grd][1]) / dY;
|
||||||
|
flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
|
||||||
|
rr = flag * do_every * dY;
|
||||||
|
if (Symmetry == 2 && GH->bbox[lev][grd][1] + rr < 0)
|
||||||
|
rr = -GH->bbox[lev][grd][1];
|
||||||
|
if (fabs(rr) > dY / 2)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
rr = (Porg0[bhi][2] - GH->handle[lev][grd][2]) / dZ;
|
||||||
|
flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
|
||||||
|
rr = flag * do_every * dZ;
|
||||||
|
if (Symmetry > 0 && GH->bbox[lev][grd][2] + rr < 0)
|
||||||
|
rr = -GH->bbox[lev][grd][1];
|
||||||
|
if (fabs(rr) > dZ / 2)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool bssn_cuda_should_flush_before_regrid(cgh *GH, int lev, int Symmetry, int BH_num, double **Porg0)
|
||||||
|
{
|
||||||
|
return bssn_cuda_regrid_flush_enabled() &&
|
||||||
|
bssn_cuda_will_regrid_onelevel(GH, lev, Symmetry, BH_num, Porg0);
|
||||||
|
}
|
||||||
|
|
||||||
void bssn_cuda_sync_level_bh_fields(MyList<Patch> *PatL,
|
void bssn_cuda_sync_level_bh_fields(MyList<Patch> *PatL,
|
||||||
int myrank,
|
int myrank,
|
||||||
var *forx, var *fory, var *forz)
|
var *forx, var *fory, var *forz)
|
||||||
@@ -2839,6 +2981,14 @@ void bssn_class::Evolve(int Steps)
|
|||||||
|
|
||||||
#if (REGLEV == 1)
|
#if (REGLEV == 1)
|
||||||
STEP_TIMER_DECL(timer_regrid);
|
STEP_TIMER_DECL(timer_regrid);
|
||||||
|
#if USE_CUDA_BSSN
|
||||||
|
for (int il = 0; il < GH->levels; il++)
|
||||||
|
if (bssn_cuda_should_flush_before_regrid(GH, il, Symmetry, BH_num, Porg0))
|
||||||
|
bssn_cuda_flush_level_before_regrid(GH->PatL[il],
|
||||||
|
SynchList_cor, OldStateList,
|
||||||
|
StateList, SynchList_pre,
|
||||||
|
myrank);
|
||||||
|
#endif
|
||||||
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
||||||
@@ -3075,6 +3225,13 @@ void bssn_class::RecursiveStep(int lev)
|
|||||||
|
|
||||||
#if (REGLEV == 0)
|
#if (REGLEV == 0)
|
||||||
STEP_TIMER_DECL(timer_regrid_onelevel);
|
STEP_TIMER_DECL(timer_regrid_onelevel);
|
||||||
|
#if USE_CUDA_BSSN
|
||||||
|
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
|
||||||
|
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
|
||||||
|
SynchList_cor, OldStateList,
|
||||||
|
StateList, SynchList_pre,
|
||||||
|
myrank);
|
||||||
|
#endif
|
||||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||||
@@ -3261,6 +3418,13 @@ void bssn_class::ParallelStep()
|
|||||||
delete[] tporg;
|
delete[] tporg;
|
||||||
delete[] tporgo;
|
delete[] tporgo;
|
||||||
#if (REGLEV == 0)
|
#if (REGLEV == 0)
|
||||||
|
#if USE_CUDA_BSSN
|
||||||
|
if (bssn_cuda_should_flush_before_regrid(GH, GH->mylev, Symmetry, BH_num, Porg0))
|
||||||
|
bssn_cuda_flush_level_before_regrid(GH->PatL[GH->mylev],
|
||||||
|
SynchList_cor, OldStateList,
|
||||||
|
StateList, SynchList_pre,
|
||||||
|
myrank);
|
||||||
|
#endif
|
||||||
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||||
@@ -3432,6 +3596,13 @@ void bssn_class::ParallelStep()
|
|||||||
if (lev + 1 >= GH->movls)
|
if (lev + 1 >= GH->movls)
|
||||||
{
|
{
|
||||||
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
|
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
|
||||||
|
#if USE_CUDA_BSSN
|
||||||
|
if (bssn_cuda_should_flush_before_regrid(GH, lev + 1, Symmetry, BH_num, Porg0))
|
||||||
|
bssn_cuda_flush_level_before_regrid(GH->PatL[lev + 1],
|
||||||
|
SynchList_cor, OldStateList,
|
||||||
|
StateList, SynchList_pre,
|
||||||
|
myrank);
|
||||||
|
#endif
|
||||||
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
||||||
@@ -3451,6 +3622,13 @@ void bssn_class::ParallelStep()
|
|||||||
// for this level
|
// for this level
|
||||||
if (YN == 1)
|
if (YN == 1)
|
||||||
{
|
{
|
||||||
|
#if USE_CUDA_BSSN
|
||||||
|
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
|
||||||
|
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
|
||||||
|
SynchList_cor, OldStateList,
|
||||||
|
StateList, SynchList_pre,
|
||||||
|
myrank);
|
||||||
|
#endif
|
||||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||||
@@ -3474,6 +3652,13 @@ void bssn_class::ParallelStep()
|
|||||||
if (YN == 1)
|
if (YN == 1)
|
||||||
{
|
{
|
||||||
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
||||||
|
#if USE_CUDA_BSSN
|
||||||
|
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
|
||||||
|
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
|
||||||
|
SynchList_cor, OldStateList,
|
||||||
|
StateList, SynchList_pre,
|
||||||
|
myrank);
|
||||||
|
#endif
|
||||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||||
@@ -3494,6 +3679,13 @@ void bssn_class::ParallelStep()
|
|||||||
if (i % 4 == 3)
|
if (i % 4 == 3)
|
||||||
{
|
{
|
||||||
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
||||||
|
#if USE_CUDA_BSSN
|
||||||
|
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
|
||||||
|
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
|
||||||
|
SynchList_cor, OldStateList,
|
||||||
|
StateList, SynchList_pre,
|
||||||
|
myrank);
|
||||||
|
#endif
|
||||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||||
|
|||||||
@@ -6404,6 +6404,45 @@ static void download_resident_state(void *block_tag, int *ex, double **state_hos
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool download_resident_state_if_present(void *block_tag, int *ex, double **state_host_out)
|
||||||
|
{
|
||||||
|
auto it = g_step_ctx.find(block_tag);
|
||||||
|
if (it == g_step_ctx.end()) return false;
|
||||||
|
|
||||||
|
StepContext &ctx = it->second;
|
||||||
|
const int bank = find_resident_bank(ctx, state_host_out);
|
||||||
|
if (bank < 0 || !ctx.resident_valid[bank])
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const size_t all = (size_t)ex[0] * ex[1] * ex[2];
|
||||||
|
const size_t bytes = all * sizeof(double);
|
||||||
|
mark_resident_current_bank(ctx, bank);
|
||||||
|
if (resident_host_subset_clean(ctx, bank, BSSN_STATE_COUNT, nullptr))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
static int direct_download = -1;
|
||||||
|
if (direct_download < 0) {
|
||||||
|
const char *env = getenv("AMSS_CUDA_DIRECT_STATE_DOWNLOAD");
|
||||||
|
direct_download = env ? ((atoi(env) != 0) ? 1 : 0) : 1;
|
||||||
|
}
|
||||||
|
if (direct_download) {
|
||||||
|
for (int i = 0; i < BSSN_STATE_COUNT; ++i) {
|
||||||
|
CUDA_CHECK(cudaMemcpyAsync(state_host_out[i], ctx.d_resident[bank][i],
|
||||||
|
bytes, cudaMemcpyDeviceToHost));
|
||||||
|
}
|
||||||
|
CUDA_CHECK(cudaDeviceSynchronize());
|
||||||
|
} else {
|
||||||
|
CUDA_CHECK(cudaMemcpy(g_buf.h_stage, ctx.d_resident_mem[bank],
|
||||||
|
(size_t)BSSN_STATE_COUNT * bytes,
|
||||||
|
cudaMemcpyDeviceToHost));
|
||||||
|
for (int i = 0; i < BSSN_STATE_COUNT; ++i) {
|
||||||
|
std::memcpy(state_host_out[i], g_buf.h_stage + (size_t)i * all, bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
set_resident_host_clean(ctx, bank, true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static void copy_state_subset(void *block_tag,
|
static void copy_state_subset(void *block_tag,
|
||||||
int *ex,
|
int *ex,
|
||||||
int subset_count,
|
int subset_count,
|
||||||
@@ -7056,6 +7095,18 @@ int bssn_cuda_download_resident_state(void *block_tag,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
int bssn_cuda_download_resident_state_if_present(void *block_tag,
|
||||||
|
int *ex,
|
||||||
|
double **state_host_out)
|
||||||
|
{
|
||||||
|
init_gpu_dispatch();
|
||||||
|
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
|
||||||
|
if (!block_tag || !ex || !state_host_out) return 1;
|
||||||
|
download_resident_state_if_present(block_tag, ex, state_host_out);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
int bssn_cuda_download_constraint_outputs(int *ex,
|
int bssn_cuda_download_constraint_outputs(int *ex,
|
||||||
double **constraint_host_out)
|
double **constraint_host_out)
|
||||||
|
|||||||
@@ -73,6 +73,10 @@ int bssn_cuda_download_resident_state(void *block_tag,
|
|||||||
int *ex,
|
int *ex,
|
||||||
double **state_host_out);
|
double **state_host_out);
|
||||||
|
|
||||||
|
int bssn_cuda_download_resident_state_if_present(void *block_tag,
|
||||||
|
int *ex,
|
||||||
|
double **state_host_out);
|
||||||
|
|
||||||
int bssn_cuda_download_constraint_outputs(int *ex,
|
int bssn_cuda_download_constraint_outputs(int *ex,
|
||||||
double **constraint_host_out);
|
double **constraint_host_out);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user