Optimize Z4C GPU runtime defaults

This commit is contained in:
2026-05-07 15:37:09 +08:00
parent 83afaf19ce
commit 96829d0441
4 changed files with 114 additions and 45 deletions

View File

@@ -70,6 +70,17 @@ int amss_analysis_map_every()
return every;
}
bool amss_constraint_out_enabled_for_step(int step)
{
static int every = -1;
if (every < 0)
{
const char *env = getenv("AMSS_CONSTRAINT_OUT_EVERY");
every = (env && atoi(env) > 0) ? atoi(env) : 1;
}
return every <= 1 || (step > 0 && step % every == 0);
}
bool amss_rp_timing_enabled()
{
static int enabled = -1;
@@ -3143,12 +3154,15 @@ void bssn_class::Evolve(int Steps)
// misc::tillherecheck("before Constraint_Out");
const double constraint_t0 = evolve_timing ? MPI_Wtime() : 0.0;
STEP_TIMER_DECL(timer_constraint_out);
Constraint_Out(); // this will affect the Dump_List
STEP_TIMER_ADD(TB_CONSTRAINT_OUT, timer_constraint_out);
if (evolve_timing)
amss_evolve_timing_add_constraint(MPI_Wtime() - constraint_t0);
if (amss_constraint_out_enabled_for_step(ncount))
{
const double constraint_t0 = evolve_timing ? MPI_Wtime() : 0.0;
STEP_TIMER_DECL(timer_constraint_out);
Constraint_Out(); // this will affect the Dump_List
STEP_TIMER_ADD(TB_CONSTRAINT_OUT, timer_constraint_out);
if (evolve_timing)
amss_evolve_timing_add_constraint(MPI_Wtime() - constraint_t0);
}
LastDump += dT_mon;
Last2dDump += dT_mon;
@@ -3220,7 +3234,7 @@ void bssn_class::Evolve(int Steps)
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
#if (ABEtype != 1 && ABEtype != 2)
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
STEP_TIMER_ADD(TB_REGRID, timer_regrid);
@@ -3491,7 +3505,7 @@ void bssn_class::RecursiveStep(int lev)
{
if (ConstraintRefreshLevels)
ConstraintRefreshLevels[lev] = 1;
#if (ABEtype != 1 && ABEtype != 2)
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
@@ -3684,7 +3698,7 @@ void bssn_class::ParallelStep()
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
{
#if (ABEtype != 1 && ABEtype != 2)
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
@@ -3862,7 +3876,7 @@ void bssn_class::ParallelStep()
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
{
#if (ABEtype != 1 && ABEtype != 2)
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
@@ -3888,7 +3902,7 @@ void bssn_class::ParallelStep()
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
{
#if (ABEtype != 1 && ABEtype != 2)
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
@@ -3918,7 +3932,7 @@ void bssn_class::ParallelStep()
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
{
#if (ABEtype != 1 && ABEtype != 2)
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
@@ -3945,7 +3959,7 @@ void bssn_class::ParallelStep()
SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
{
#if (ABEtype != 1 && ABEtype != 2)
#if (ABEtype != 1)
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
}
@@ -4588,7 +4602,7 @@ void bssn_class::Step(int lev, int YN)
}
}
#endif
#if (ABEtype != 1 && ABEtype != 2)
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
#endif
STEP_TIMER_ADD(TB_PREDICTOR_SYNC, timer_predictor_sync);
@@ -5035,7 +5049,7 @@ void bssn_class::Step(int lev, int YN)
}
}
#endif
#if (ABEtype != 1 && ABEtype != 2)
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
#endif
STEP_TIMER_ADD(TB_CORRECTOR_SYNC, timer_corrector_sync);
@@ -5554,7 +5568,7 @@ void bssn_class::Step(int lev, int YN)
}
}
#endif
#if (ABEtype != 1 && ABEtype != 2)
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
#endif
@@ -5905,7 +5919,7 @@ void bssn_class::Step(int lev, int YN)
}
}
#endif
#if (ABEtype != 1 && ABEtype != 2)
#if (ABEtype != 1)
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
#endif