Stabilize EScalar CUDA fallback path
This commit is contained in:
@@ -740,6 +740,38 @@ void bssn_cuda_download_level_state_if_present(MyList<Patch> *PatL, MyList<var>
|
||||
}
|
||||
}
|
||||
|
||||
void bssn_cuda_download_level_scalar_tail_if_present(MyList<Patch> *PatL,
|
||||
MyList<var> *vars,
|
||||
int myrank)
|
||||
{
|
||||
MyList<var> *tail = vars;
|
||||
for (int i = 0; i < BSSN_CUDA_STATE_COUNT && tail; ++i)
|
||||
tail = tail->next;
|
||||
if (!tail || !tail->next || tail->next->next)
|
||||
return;
|
||||
|
||||
MyList<Patch> *Pp = PatL;
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank && cg->fgfs)
|
||||
{
|
||||
bssn_cuda_escalar_download_fields_if_present(
|
||||
cg, cg->shape,
|
||||
cg->fgfs[tail->data->sgfn],
|
||||
cg->fgfs[tail->next->data->sgfn]);
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
}
|
||||
|
||||
void bssn_cuda_release_level_state(MyList<Patch> *PatL, int myrank)
|
||||
{
|
||||
MyList<Patch> *Pp = PatL;
|
||||
@@ -770,9 +802,30 @@ void bssn_cuda_flush_level_before_regrid(MyList<Patch> *PatL,
|
||||
bssn_cuda_download_level_state_if_present(PatL, oldL, myrank);
|
||||
bssn_cuda_download_level_state_if_present(PatL, stateL, myrank);
|
||||
bssn_cuda_download_level_state_if_present(PatL, preL, myrank);
|
||||
bssn_cuda_download_level_scalar_tail_if_present(PatL, corL, myrank);
|
||||
bssn_cuda_download_level_scalar_tail_if_present(PatL, oldL, myrank);
|
||||
bssn_cuda_download_level_scalar_tail_if_present(PatL, stateL, myrank);
|
||||
bssn_cuda_download_level_scalar_tail_if_present(PatL, preL, myrank);
|
||||
bssn_cuda_release_level_state(PatL, myrank);
|
||||
}
|
||||
|
||||
void bssn_cuda_flush_all_levels_before_regrid(cgh *GH,
|
||||
MyList<var> *corL,
|
||||
MyList<var> *oldL,
|
||||
MyList<var> *stateL,
|
||||
MyList<var> *preL,
|
||||
int myrank)
|
||||
{
|
||||
if (!GH)
|
||||
return;
|
||||
for (int il = 0; il < GH->levels; ++il)
|
||||
{
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[il],
|
||||
corL, oldL, stateL, preL,
|
||||
myrank);
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
bool fill_z4c_cuda_views_for_regrid(Block *cg, MyList<var> *vars,
|
||||
double **host_views)
|
||||
@@ -3234,12 +3287,27 @@ void bssn_class::Evolve(int Steps)
|
||||
#if (REGLEV == 1)
|
||||
STEP_TIMER_DECL(timer_regrid);
|
||||
#if USE_CUDA_BSSN && (ABEtype != 2)
|
||||
for (int il = 0; il < GH->levels; il++)
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, il, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[il],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
if (amss_escalar_mixed_gpu_rp_enabled())
|
||||
{
|
||||
bool any_cuda_regrid_flush = false;
|
||||
for (int il = 0; il < GH->levels; il++)
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, il, Symmetry, BH_num, Porg0))
|
||||
any_cuda_regrid_flush = true;
|
||||
if (any_cuda_regrid_flush)
|
||||
bssn_cuda_flush_all_levels_before_regrid(GH,
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int il = 0; il < GH->levels; il++)
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, il, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[il],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
}
|
||||
#endif
|
||||
#if USE_CUDA_Z4C && USE_CUDA_BSSN && (ABEtype == 2)
|
||||
for (int il = 0; il < GH->levels; il++)
|
||||
@@ -3491,10 +3559,18 @@ void bssn_class::RecursiveStep(int lev)
|
||||
STEP_TIMER_DECL(timer_regrid_onelevel);
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
{
|
||||
if (amss_escalar_mixed_gpu_rp_enabled())
|
||||
bssn_cuda_flush_all_levels_before_regrid(GH,
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
else
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
}
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
@@ -3684,10 +3760,10 @@ void bssn_class::ParallelStep()
|
||||
#if (REGLEV == 0)
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, GH->mylev, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[GH->mylev],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
bssn_cuda_flush_all_levels_before_regrid(GH,
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
@@ -3817,6 +3893,20 @@ void bssn_class::ParallelStep()
|
||||
|
||||
// Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT_lev);
|
||||
|
||||
#if USE_CUDA_BSSN && (ABEtype != 2)
|
||||
const bool cuda_recursive_regrid_needs_full_flush =
|
||||
bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0) ||
|
||||
(lev < GH->levels - 1 &&
|
||||
bssn_cuda_should_flush_before_regrid(GH, lev + 1, Symmetry, BH_num, Porg0)) ||
|
||||
(lev - 1 >= GH->movls &&
|
||||
bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0));
|
||||
if (cuda_recursive_regrid_needs_full_flush)
|
||||
bssn_cuda_flush_all_levels_before_regrid(GH,
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
|
||||
{
|
||||
MPI_Status status;
|
||||
// receive
|
||||
@@ -3860,13 +3950,6 @@ void bssn_class::ParallelStep()
|
||||
if (lev + 1 >= GH->movls)
|
||||
{
|
||||
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, lev + 1, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[lev + 1],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
||||
@@ -3886,13 +3969,6 @@ void bssn_class::ParallelStep()
|
||||
// for this level
|
||||
if (YN == 1)
|
||||
{
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, lev, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[lev],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||
@@ -3916,13 +3992,6 @@ void bssn_class::ParallelStep()
|
||||
if (YN == 1)
|
||||
{
|
||||
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||
@@ -3943,13 +4012,6 @@ void bssn_class::ParallelStep()
|
||||
if (i % 4 == 3)
|
||||
{
|
||||
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
||||
#if USE_CUDA_BSSN
|
||||
if (bssn_cuda_should_flush_before_regrid(GH, lev - 1, Symmetry, BH_num, Porg0))
|
||||
bssn_cuda_flush_level_before_regrid(GH->PatL[lev - 1],
|
||||
SynchList_cor, OldStateList,
|
||||
StateList, SynchList_pre,
|
||||
myrank);
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||
|
||||
Reference in New Issue
Block a user