Stabilize EScalar CUDA fallback path

This commit is contained in:
2026-05-03 16:05:47 +08:00
parent 4430d04ee7
commit e4c10eca0f
5 changed files with 1542 additions and 127 deletions

View File

@@ -740,6 +740,38 @@ void bssn_cuda_download_level_state_if_present(MyList<Patch> *PatL, MyList<var>
}
}
void bssn_cuda_download_level_scalar_tail_if_present(MyList<Patch> *PatL,
MyList<var> *vars,
int myrank)
{
MyList<var> *tail = vars;
for (int i = 0; i < BSSN_CUDA_STATE_COUNT && tail; ++i)
tail = tail->next;
if (!tail || !tail->next || tail->next->next)
return;
MyList<Patch> *Pp = PatL;
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank && cg->fgfs)
{
bssn_cuda_escalar_download_fields_if_present(
cg, cg->shape,
cg->fgfs[tail->data->sgfn],
cg->fgfs[tail->next->data->sgfn]);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
void bssn_cuda_release_level_state(MyList<Patch> *PatL, int myrank)
{
MyList<Patch> *Pp = PatL;
@@ -770,9 +802,30 @@ void bssn_cuda_flush_level_before_regrid(MyList<Patch> *PatL,
bssn_cuda_download_level_state_if_present(PatL, oldL, myrank);
bssn_cuda_download_level_state_if_present(PatL, stateL, myrank);
bssn_cuda_download_level_state_if_present(PatL, preL, myrank);
bssn_cuda_download_level_scalar_tail_if_present(PatL, corL, myrank);
bssn_cuda_download_level_scalar_tail_if_present(PatL, oldL, myrank);
bssn_cuda_download_level_scalar_tail_if_present(PatL, stateL, myrank);
bssn_cuda_download_level_scalar_tail_if_present(PatL, preL, myrank);
bssn_cuda_release_level_state(PatL, myrank);
}
void bssn_cuda_flush_all_levels_before_regrid(cgh *GH,
MyList<var> *corL,
MyList<var> *oldL,
MyList<var> *stateL,
MyList<var> *preL,
int myrank)
{
if (!GH)
return;
for (int il = 0; il < GH->levels; ++il)
{
bssn_cuda_flush_level_before_regrid(GH->PatL[il],
corL, oldL, stateL, preL,
myrank);
}
}
#if USE_CUDA_Z4C && (ABEtype == 2)
bool fill_z4c_cuda_views_for_regrid(Block *cg, MyList<var> *vars,
double **host_views)
@@ -3234,12 +3287,27 @@ void bssn_class::Evolve(int Steps)
#if (REGLEV == 1)
STEP_TIMER_DECL(timer_regrid);
#if USE_CUDA_BSSN && (ABEtype != 2)
for (int il = 0; il < GH->levels; il++)
if (bssn_cuda_should_flush_before_regrid(GH, il, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[il],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
if (amss_escalar_mixed_gpu_rp_enabled())
{
bool any_cuda_regrid_flush = false;
for (int il = 0; il < GH->levels; il++)
if (bssn_cuda_should_flush_before_regrid(GH, il, Symmetry, BH_num, Porg0))
any_cuda_regrid_flush = true;
if (any_cuda_regrid_flush)
bssn_cuda_flush_all_levels_before_regrid(GH,
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
}
else
{
for (int il = 0; il < GH->levels; il++)
if (bssn_cuda_should_flush_before_regrid(GH, il, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[il],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
}
#endif
#if USE_CUDA_Z4C && USE_CUDA_BSSN && (ABEtype == 2)
for (int il = 0; il < GH->levels; il++)
@@ -3491,10 +3559,18 @@ void bssn_class::RecursiveStep(int lev)
STEP_TIMER_DECL(timer_regrid_onelevel);
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
{
if (amss_escalar_mixed_gpu_rp_enabled())
bssn_cuda_flush_all_levels_before_regrid(GH,
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
else
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
}
#endif
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
@@ -3684,10 +3760,10 @@ void bssn_class::ParallelStep()
#if (REGLEV == 0)
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, GH->mylev, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[GH->mylev],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
bssn_cuda_flush_all_levels_before_regrid(GH,
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
@@ -3817,6 +3893,20 @@ void bssn_class::ParallelStep()
// Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT_lev);
#if USE_CUDA_BSSN && (ABEtype != 2)
const bool cuda_recursive_regrid_needs_full_flush =
bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0) ||
(lev < GH->levels - 1 &&
bssn_cuda_should_flush_before_regrid(GH, lev + 1, Symmetry, BH_num, Porg0)) ||
(lev - 1 >= GH->movls &&
bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0));
if (cuda_recursive_regrid_needs_full_flush)
bssn_cuda_flush_all_levels_before_regrid(GH,
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
{
MPI_Status status;
// receive
@@ -3860,13 +3950,6 @@ void bssn_class::ParallelStep()
if (lev + 1 >= GH->movls)
{
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev + 1, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev + 1],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
@@ -3886,13 +3969,6 @@ void bssn_class::ParallelStep()
// for this level
if (YN == 1)
{
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
@@ -3916,13 +3992,6 @@ void bssn_class::ParallelStep()
if (YN == 1)
{
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
@@ -3943,13 +4012,6 @@ void bssn_class::ParallelStep()
if (i % 4 == 3)
{
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
#if USE_CUDA_BSSN
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
SynchList_cor, OldStateList,
StateList, SynchList_pre,
myrank);
#endif
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))