Optimize Z4C GPU runtime defaults

This commit is contained in:
2026-05-07 15:37:09 +08:00
parent 83afaf19ce
commit 96829d0441
4 changed files with 114 additions and 45 deletions

View File

@@ -135,12 +135,19 @@ void Z4c_class::Initialize()
{
CheckPoint->read_Black_Hole_position(BH_num_input, BH_num, Porg0, Pmom, Spin, Mass, Porgbr, Porg, Porg1, Porg_rhs);
}
else
{
PhysTime = StartTime;
Setup_Black_Hole_position();
}
}
else
{
PhysTime = StartTime;
Setup_Black_Hole_position();
}
sync_cache_pre = new Parallel::SyncCache[GH->levels];
sync_cache_cor = new Parallel::SyncCache[GH->levels];
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
}
//================================================================================================
@@ -452,6 +459,17 @@ bool z4c_cuda_compute_porg_rhs_resident(cgh *GH,
return true;
}
bool z4c_cuda_resident_step_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_Z4C_CUDA_RESIDENT");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
} // namespace
#endif
@@ -498,7 +516,7 @@ void Z4c_class::Step(int lev, int YN)
#elif (MRBD == 1)
apply_bam_bc = 1;
#endif
int keep_resident_state = 1;
int keep_resident_state = z4c_cuda_resident_step_enabled() ? 1 : 0;
int apply_enforce_ga = 0;
#if (AGM == 0)
apply_enforce_ga = 1;
@@ -593,7 +611,7 @@ void Z4c_class::Step(int lev, int YN)
#elif (MRBD == 1)
apply_bam_bc = 1;
#endif
int keep_resident_state = 1;
int keep_resident_state = z4c_cuda_resident_step_enabled() ? 1 : 0;
int apply_enforce_ga = 0;
#if (AGM == 0)
apply_enforce_ga = 1;
@@ -639,14 +657,21 @@ void Z4c_class::Step(int lev, int YN)
if (BH_num > 0 && lev == GH->levels - 1)
{
if (!z4c_cuda_compute_porg_rhs_resident(GH, lev, myrank, BH_num,
Porg, Porg1,
Sfx, Sfy, Sfz, Symmetry))
if (z4c_cuda_resident_step_enabled())
{
if (myrank == 0 && ErrorMonitor->outfile)
ErrorMonitor->outfile << "CUDA Z4C failed to interpolate black-hole shift at t = "
<< PhysTime << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
if (!z4c_cuda_compute_porg_rhs_resident(GH, lev, myrank, BH_num,
Porg, Porg1,
Sfx, Sfy, Sfz, Symmetry))
{
if (myrank == 0 && ErrorMonitor->outfile)
ErrorMonitor->outfile << "CUDA Z4C failed to interpolate black-hole shift at t = "
<< PhysTime << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
else
{
compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev);
}
for (int ithBH = 0; ithBH < BH_num; ithBH++)
{