Stabilize device AMR restrict across regrid

This commit is contained in:
2026-04-30 20:01:18 +08:00
parent be9033f449
commit b1974ef146
3 changed files with 247 additions and 0 deletions

View File

@@ -583,6 +583,148 @@ void bssn_cuda_download_level_state(MyList<Patch> *PatL, MyList<var> *vars, int
}
}
void bssn_cuda_download_level_state_if_present(MyList<Patch> *PatL, MyList<var> *vars, int myrank)
{
MyList<Patch> *Pp = PatL;
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
{
double *state_out[BSSN_CUDA_STATE_COUNT];
if (!fill_bssn_cuda_views(cg, vars, state_out))
{
cout << "CUDA BSSN state list mismatch on resident state conditional download" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
if (bssn_cuda_download_resident_state_if_present(cg, cg->shape, state_out))
{
cout << "CUDA resident state conditional download failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
void bssn_cuda_release_level_state(MyList<Patch> *PatL, int myrank)
{
MyList<Patch> *Pp = PatL;
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
bssn_cuda_release_step_ctx(cg);
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
void bssn_cuda_flush_level_before_regrid(MyList<Patch> *PatL,
MyList<var> *corL,
MyList<var> *oldL,
MyList<var> *stateL,
MyList<var> *preL,
int myrank)
{
bssn_cuda_download_level_state_if_present(PatL, corL, myrank);
bssn_cuda_download_level_state_if_present(PatL, oldL, myrank);
bssn_cuda_download_level_state_if_present(PatL, stateL, myrank);
bssn_cuda_download_level_state_if_present(PatL, preL, myrank);
bssn_cuda_release_level_state(PatL, myrank);
}
bool bssn_cuda_regrid_flush_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_CUDA_AMR_RESTRICT_DEVICE");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
bool bssn_cuda_will_regrid_onelevel(cgh *GH, int lev, int Symmetry, int BH_num, double **Porg0)
{
if (!GH || lev < GH->movls || lev >= GH->levels || !GH->PatL[lev])
return false;
if (!GH->PatL[lev]->data || !GH->PatL[lev]->data->blb || !GH->PatL[lev]->data->blb->data)
return true;
const int do_every = 2;
const double dX = GH->PatL[lev]->data->blb->data->getdX(0);
const double dY = GH->PatL[lev]->data->blb->data->getdX(1);
const double dZ = GH->PatL[lev]->data->blb->data->getdX(2);
for (int grd = 0; grd < GH->grids[lev]; grd++)
{
int bhi = 0;
for (bhi = 0; bhi < BH_num; bhi++)
{
if (feq(GH->Porgls[lev][bhi][0], GH->handle[lev][grd][0], 2 * do_every * dX) &&
feq(GH->Porgls[lev][bhi][1], GH->handle[lev][grd][1], 2 * do_every * dY) &&
feq(GH->Porgls[lev][bhi][2], GH->handle[lev][grd][2], 2 * do_every * dZ))
break;
}
if (bhi == BH_num)
{
if (feq(0, GH->bbox[lev][grd][0], dX / 2) &&
feq(0, GH->bbox[lev][grd][1], dY / 2) &&
feq(0, GH->bbox[lev][grd][2], dZ / 2))
continue;
if (BH_num == 1)
bhi = 0;
else
return true;
}
double rr = (Porg0[bhi][0] - GH->handle[lev][grd][0]) / dX;
int flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
rr = flag * do_every * dX;
if (Symmetry == 2 && GH->bbox[lev][grd][0] + rr < 0)
rr = -GH->bbox[lev][grd][0];
if (fabs(rr) > dX / 2)
return true;
rr = (Porg0[bhi][1] - GH->handle[lev][grd][1]) / dY;
flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
rr = flag * do_every * dY;
if (Symmetry == 2 && GH->bbox[lev][grd][1] + rr < 0)
rr = -GH->bbox[lev][grd][1];
if (fabs(rr) > dY / 2)
return true;
rr = (Porg0[bhi][2] - GH->handle[lev][grd][2]) / dZ;
flag = (rr > 0) ? int(rr + 0.5) / do_every : int(rr - 0.5) / do_every;
rr = flag * do_every * dZ;
if (Symmetry > 0 && GH->bbox[lev][grd][2] + rr < 0)
rr = -GH->bbox[lev][grd][1];
if (fabs(rr) > dZ / 2)
return true;
}
return false;
}
bool bssn_cuda_should_flush_before_regrid(cgh *GH, int lev, int Symmetry, int BH_num, double **Porg0)
{
return bssn_cuda_regrid_flush_enabled() &&
bssn_cuda_will_regrid_onelevel(GH, lev, Symmetry, BH_num, Porg0);
}
void bssn_cuda_sync_level_bh_fields(MyList<Patch> *PatL,
int myrank,
var *forx, var *fory, var *forz)
@@ -2839,6 +2981,14 @@ void bssn_class::Evolve(int Steps)
#if (REGLEV == 1)
STEP_TIMER_DECL(timer_regrid);
#if USE_CUDA_BSSN
for (int il = 0; il < GH->levels; il++)
if (bssn_cuda_should_flush_before_regrid(GH, il, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[il],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
@@ -3075,6 +3225,13 @@ void bssn_class::RecursiveStep(int lev)
#if (REGLEV == 0)
STEP_TIMER_DECL(timer_regrid_onelevel);
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
@@ -3261,6 +3418,13 @@ void bssn_class::ParallelStep()
delete[] tporg;
delete[] tporgo;
#if (REGLEV == 0)
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, GH->mylev, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[GH->mylev],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
@@ -3432,6 +3596,13 @@ void bssn_class::ParallelStep()
if (lev + 1 >= GH->movls)
{
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev + 1, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev + 1],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
@@ -3451,6 +3622,13 @@ void bssn_class::ParallelStep()
// for this level
if (YN == 1)
{
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
@@ -3474,6 +3652,13 @@ void bssn_class::ParallelStep()
if (YN == 1)
{
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
@@ -3494,6 +3679,13 @@ void bssn_class::ParallelStep()
if (i % 4 == 3)
{
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))

View File

@@ -6404,6 +6404,45 @@ static void download_resident_state(void *block_tag, int *ex, double **state_hos
}
}
static bool download_resident_state_if_present(void *block_tag, int *ex, double **state_host_out)
{
auto it = g_step_ctx.find(block_tag);
if (it == g_step_ctx.end()) return false;
StepContext &ctx = it->second;
const int bank = find_resident_bank(ctx, state_host_out);
if (bank < 0 || !ctx.resident_valid[bank])
return false;
const size_t all = (size_t)ex[0] * ex[1] * ex[2];
const size_t bytes = all * sizeof(double);
mark_resident_current_bank(ctx, bank);
if (resident_host_subset_clean(ctx, bank, BSSN_STATE_COUNT, nullptr))
return true;
static int direct_download = -1;
if (direct_download < 0) {
const char *env = getenv("AMSS_CUDA_DIRECT_STATE_DOWNLOAD");
direct_download = env ? ((atoi(env) != 0) ? 1 : 0) : 1;
}
if (direct_download) {
for (int i = 0; i < BSSN_STATE_COUNT; ++i) {
CUDA_CHECK(cudaMemcpyAsync(state_host_out[i], ctx.d_resident[bank][i],
bytes, cudaMemcpyDeviceToHost));
}
CUDA_CHECK(cudaDeviceSynchronize());
} else {
CUDA_CHECK(cudaMemcpy(g_buf.h_stage, ctx.d_resident_mem[bank],
(size_t)BSSN_STATE_COUNT * bytes,
cudaMemcpyDeviceToHost));
for (int i = 0; i < BSSN_STATE_COUNT; ++i) {
std::memcpy(state_host_out[i], g_buf.h_stage + (size_t)i * all, bytes);
}
}
set_resident_host_clean(ctx, bank, true);
return true;
}
static void copy_state_subset(void *block_tag,
int *ex,
int subset_count,
@@ -7056,6 +7095,18 @@ int bssn_cuda_download_resident_state(void *block_tag,
return 0;
}
extern "C"
int bssn_cuda_download_resident_state_if_present(void *block_tag,
int *ex,
double **state_host_out)
{
init_gpu_dispatch();
CUDA_CHECK(cudaSetDevice(g_dispatch.my_device));
if (!block_tag || !ex || !state_host_out) return 1;
download_resident_state_if_present(block_tag, ex, state_host_out);
return 0;
}
extern "C"
int bssn_cuda_download_constraint_outputs(int *ex,
double **constraint_host_out)

View File

@@ -73,6 +73,10 @@ int bssn_cuda_download_resident_state(void *block_tag,
int *ex,
double **state_host_out);
int bssn_cuda_download_resident_state_if_present(void *block_tag,
int *ex,
double **state_host_out);
int bssn_cuda_download_constraint_outputs(int *ex,
double **constraint_host_out);