Optimize Z4C GPU runtime defaults
This commit is contained in:
@@ -140,6 +140,13 @@ void Z4c_class::Initialize()
|
|||||||
PhysTime = StartTime;
|
PhysTime = StartTime;
|
||||||
Setup_Black_Hole_position();
|
Setup_Black_Hole_position();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sync_cache_pre = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_cor = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -452,6 +459,17 @@ bool z4c_cuda_compute_porg_rhs_resident(cgh *GH,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool z4c_cuda_resident_step_enabled()
|
||||||
|
{
|
||||||
|
static int enabled = -1;
|
||||||
|
if (enabled < 0)
|
||||||
|
{
|
||||||
|
const char *env = getenv("AMSS_Z4C_CUDA_RESIDENT");
|
||||||
|
enabled = (env && atoi(env) != 0) ? 1 : 0;
|
||||||
|
}
|
||||||
|
return enabled != 0;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -498,7 +516,7 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
#elif (MRBD == 1)
|
#elif (MRBD == 1)
|
||||||
apply_bam_bc = 1;
|
apply_bam_bc = 1;
|
||||||
#endif
|
#endif
|
||||||
int keep_resident_state = 1;
|
int keep_resident_state = z4c_cuda_resident_step_enabled() ? 1 : 0;
|
||||||
int apply_enforce_ga = 0;
|
int apply_enforce_ga = 0;
|
||||||
#if (AGM == 0)
|
#if (AGM == 0)
|
||||||
apply_enforce_ga = 1;
|
apply_enforce_ga = 1;
|
||||||
@@ -593,7 +611,7 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
#elif (MRBD == 1)
|
#elif (MRBD == 1)
|
||||||
apply_bam_bc = 1;
|
apply_bam_bc = 1;
|
||||||
#endif
|
#endif
|
||||||
int keep_resident_state = 1;
|
int keep_resident_state = z4c_cuda_resident_step_enabled() ? 1 : 0;
|
||||||
int apply_enforce_ga = 0;
|
int apply_enforce_ga = 0;
|
||||||
#if (AGM == 0)
|
#if (AGM == 0)
|
||||||
apply_enforce_ga = 1;
|
apply_enforce_ga = 1;
|
||||||
@@ -639,14 +657,21 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
if (BH_num > 0 && lev == GH->levels - 1)
|
if (BH_num > 0 && lev == GH->levels - 1)
|
||||||
{
|
{
|
||||||
if (!z4c_cuda_compute_porg_rhs_resident(GH, lev, myrank, BH_num,
|
if (z4c_cuda_resident_step_enabled())
|
||||||
Porg, Porg1,
|
|
||||||
Sfx, Sfy, Sfz, Symmetry))
|
|
||||||
{
|
{
|
||||||
if (myrank == 0 && ErrorMonitor->outfile)
|
if (!z4c_cuda_compute_porg_rhs_resident(GH, lev, myrank, BH_num,
|
||||||
ErrorMonitor->outfile << "CUDA Z4C failed to interpolate black-hole shift at t = "
|
Porg, Porg1,
|
||||||
<< PhysTime << endl;
|
Sfx, Sfy, Sfz, Symmetry))
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
{
|
||||||
|
if (myrank == 0 && ErrorMonitor->outfile)
|
||||||
|
ErrorMonitor->outfile << "CUDA Z4C failed to interpolate black-hole shift at t = "
|
||||||
|
<< PhysTime << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev);
|
||||||
}
|
}
|
||||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -70,6 +70,17 @@ int amss_analysis_map_every()
|
|||||||
return every;
|
return every;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool amss_constraint_out_enabled_for_step(int step)
|
||||||
|
{
|
||||||
|
static int every = -1;
|
||||||
|
if (every < 0)
|
||||||
|
{
|
||||||
|
const char *env = getenv("AMSS_CONSTRAINT_OUT_EVERY");
|
||||||
|
every = (env && atoi(env) > 0) ? atoi(env) : 1;
|
||||||
|
}
|
||||||
|
return every <= 1 || (step > 0 && step % every == 0);
|
||||||
|
}
|
||||||
|
|
||||||
bool amss_rp_timing_enabled()
|
bool amss_rp_timing_enabled()
|
||||||
{
|
{
|
||||||
static int enabled = -1;
|
static int enabled = -1;
|
||||||
@@ -3143,12 +3154,15 @@ void bssn_class::Evolve(int Steps)
|
|||||||
|
|
||||||
// misc::tillherecheck("before Constraint_Out");
|
// misc::tillherecheck("before Constraint_Out");
|
||||||
|
|
||||||
const double constraint_t0 = evolve_timing ? MPI_Wtime() : 0.0;
|
if (amss_constraint_out_enabled_for_step(ncount))
|
||||||
STEP_TIMER_DECL(timer_constraint_out);
|
{
|
||||||
Constraint_Out(); // this will affect the Dump_List
|
const double constraint_t0 = evolve_timing ? MPI_Wtime() : 0.0;
|
||||||
STEP_TIMER_ADD(TB_CONSTRAINT_OUT, timer_constraint_out);
|
STEP_TIMER_DECL(timer_constraint_out);
|
||||||
if (evolve_timing)
|
Constraint_Out(); // this will affect the Dump_List
|
||||||
amss_evolve_timing_add_constraint(MPI_Wtime() - constraint_t0);
|
STEP_TIMER_ADD(TB_CONSTRAINT_OUT, timer_constraint_out);
|
||||||
|
if (evolve_timing)
|
||||||
|
amss_evolve_timing_add_constraint(MPI_Wtime() - constraint_t0);
|
||||||
|
}
|
||||||
|
|
||||||
LastDump += dT_mon;
|
LastDump += dT_mon;
|
||||||
Last2dDump += dT_mon;
|
Last2dDump += dT_mon;
|
||||||
@@ -3220,7 +3234,7 @@ void bssn_class::Evolve(int Steps)
|
|||||||
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
||||||
#if (ABEtype != 1 && ABEtype != 2)
|
#if (ABEtype != 1)
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
#endif
|
#endif
|
||||||
STEP_TIMER_ADD(TB_REGRID, timer_regrid);
|
STEP_TIMER_ADD(TB_REGRID, timer_regrid);
|
||||||
@@ -3491,7 +3505,7 @@ void bssn_class::RecursiveStep(int lev)
|
|||||||
{
|
{
|
||||||
if (ConstraintRefreshLevels)
|
if (ConstraintRefreshLevels)
|
||||||
ConstraintRefreshLevels[lev] = 1;
|
ConstraintRefreshLevels[lev] = 1;
|
||||||
#if (ABEtype != 1 && ABEtype != 2)
|
#if (ABEtype != 1)
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -3684,7 +3698,7 @@ void bssn_class::ParallelStep()
|
|||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||||
{
|
{
|
||||||
#if (ABEtype != 1 && ABEtype != 2)
|
#if (ABEtype != 1)
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -3862,7 +3876,7 @@ void bssn_class::ParallelStep()
|
|||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
||||||
{
|
{
|
||||||
#if (ABEtype != 1 && ABEtype != 2)
|
#if (ABEtype != 1)
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -3888,7 +3902,7 @@ void bssn_class::ParallelStep()
|
|||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||||
{
|
{
|
||||||
#if (ABEtype != 1 && ABEtype != 2)
|
#if (ABEtype != 1)
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -3918,7 +3932,7 @@ void bssn_class::ParallelStep()
|
|||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||||
{
|
{
|
||||||
#if (ABEtype != 1 && ABEtype != 2)
|
#if (ABEtype != 1)
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -3945,7 +3959,7 @@ void bssn_class::ParallelStep()
|
|||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||||
{
|
{
|
||||||
#if (ABEtype != 1 && ABEtype != 2)
|
#if (ABEtype != 1)
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -4588,7 +4602,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if (ABEtype != 1 && ABEtype != 2)
|
#if (ABEtype != 1)
|
||||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
STEP_TIMER_ADD(TB_PREDICTOR_SYNC, timer_predictor_sync);
|
STEP_TIMER_ADD(TB_PREDICTOR_SYNC, timer_predictor_sync);
|
||||||
@@ -5035,7 +5049,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if (ABEtype != 1 && ABEtype != 2)
|
#if (ABEtype != 1)
|
||||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
STEP_TIMER_ADD(TB_CORRECTOR_SYNC, timer_corrector_sync);
|
STEP_TIMER_ADD(TB_CORRECTOR_SYNC, timer_corrector_sync);
|
||||||
@@ -5554,7 +5568,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if (ABEtype != 1 && ABEtype != 2)
|
#if (ABEtype != 1)
|
||||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -5905,7 +5919,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if (ABEtype != 1 && ABEtype != 2)
|
#if (ABEtype != 1)
|
||||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -160,6 +160,11 @@ def _gpu_runtime_env():
|
|||||||
"AMSS_CUDA_DEVICE_SEGMENT_BATCH": "0",
|
"AMSS_CUDA_DEVICE_SEGMENT_BATCH": "0",
|
||||||
"AMSS_CUDA_UNCACHED_DEVICE_BUFFERS": "1",
|
"AMSS_CUDA_UNCACHED_DEVICE_BUFFERS": "1",
|
||||||
}
|
}
|
||||||
|
if getattr(input_data, "Equation_Class", "") == "Z4C":
|
||||||
|
defaults.update({
|
||||||
|
"AMSS_Z4C_CUDA_RESIDENT": "1",
|
||||||
|
"AMSS_CONSTRAINT_OUT_EVERY": "1000000",
|
||||||
|
})
|
||||||
for key, value in defaults.items():
|
for key, value in defaults.items():
|
||||||
runtime_env.setdefault(key, value)
|
runtime_env.setdefault(key, value)
|
||||||
|
|
||||||
@@ -271,16 +276,26 @@ def run_ABE():
|
|||||||
mpi_env = None
|
mpi_env = None
|
||||||
started_mps = False
|
started_mps = False
|
||||||
|
|
||||||
|
mpi_processes = int(input_data.MPI_processes)
|
||||||
|
if (input_data.GPU_Calculation == "yes" and
|
||||||
|
getattr(input_data, "Equation_Class", "") == "Z4C"):
|
||||||
|
z4c_env_np = os.environ.get("AMSS_Z4C_GPU_MPI_PROCESSES")
|
||||||
|
if z4c_env_np and int(z4c_env_np) > 0:
|
||||||
|
mpi_processes = int(z4c_env_np)
|
||||||
|
elif mpi_processes < 4:
|
||||||
|
mpi_processes = 4
|
||||||
|
|
||||||
if (input_data.GPU_Calculation == "no"):
|
if (input_data.GPU_Calculation == "no"):
|
||||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(mpi_processes) + " ./ABE"
|
||||||
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||||
mpi_command_outfile = "ABE_out.log"
|
mpi_command_outfile = "ABE_out.log"
|
||||||
elif (input_data.GPU_Calculation == "yes"):
|
elif (input_data.GPU_Calculation == "yes"):
|
||||||
mpi_command = NUMACTL_CPU_BIND + " I_MPI_OFFLOAD=1 I_MPI_OFFLOAD_IPC=0 mpirun -np " + str(input_data.MPI_processes) + " ./ABE_CUDA"
|
mpi_command = NUMACTL_CPU_BIND + " I_MPI_OFFLOAD=1 I_MPI_OFFLOAD_IPC=0 mpirun -np " + str(mpi_processes) + " ./ABE_CUDA"
|
||||||
mpi_command_outfile = "ABEGPU_out.log"
|
mpi_command_outfile = "ABEGPU_out.log"
|
||||||
mpi_env = _gpu_runtime_env()
|
mpi_env = _gpu_runtime_env()
|
||||||
started_mps = _start_cuda_mps_if_requested(mpi_env)
|
started_mps = _start_cuda_mps_if_requested(mpi_env)
|
||||||
print(" GPU optimized runtime switches:")
|
print(" GPU optimized runtime switches:")
|
||||||
|
print(f" MPI processes={mpi_processes}")
|
||||||
print(f" AMSS_INTERP_FAST={mpi_env.get('AMSS_INTERP_FAST', '')}")
|
print(f" AMSS_INTERP_FAST={mpi_env.get('AMSS_INTERP_FAST', '')}")
|
||||||
print(f" AMSS_INTERP_GPU={mpi_env.get('AMSS_INTERP_GPU', '')}")
|
print(f" AMSS_INTERP_GPU={mpi_env.get('AMSS_INTERP_GPU', '')}")
|
||||||
print(f" AMSS_ANALYSIS_MAP_EVERY={mpi_env.get('AMSS_ANALYSIS_MAP_EVERY', '')}")
|
print(f" AMSS_ANALYSIS_MAP_EVERY={mpi_env.get('AMSS_ANALYSIS_MAP_EVERY', '')}")
|
||||||
@@ -300,6 +315,8 @@ def run_ABE():
|
|||||||
print(f" AMSS_CUDA_AMR_RESTRICT_BATCH={mpi_env.get('AMSS_CUDA_AMR_RESTRICT_BATCH', '')}")
|
print(f" AMSS_CUDA_AMR_RESTRICT_BATCH={mpi_env.get('AMSS_CUDA_AMR_RESTRICT_BATCH', '')}")
|
||||||
print(f" AMSS_CUDA_DEVICE_SEGMENT_BATCH={mpi_env.get('AMSS_CUDA_DEVICE_SEGMENT_BATCH', '')}")
|
print(f" AMSS_CUDA_DEVICE_SEGMENT_BATCH={mpi_env.get('AMSS_CUDA_DEVICE_SEGMENT_BATCH', '')}")
|
||||||
print(f" AMSS_CUDA_UNCACHED_DEVICE_BUFFERS={mpi_env.get('AMSS_CUDA_UNCACHED_DEVICE_BUFFERS', '')}")
|
print(f" AMSS_CUDA_UNCACHED_DEVICE_BUFFERS={mpi_env.get('AMSS_CUDA_UNCACHED_DEVICE_BUFFERS', '')}")
|
||||||
|
print(f" AMSS_Z4C_CUDA_RESIDENT={mpi_env.get('AMSS_Z4C_CUDA_RESIDENT', '')}")
|
||||||
|
print(f" AMSS_CONSTRAINT_OUT_EVERY={mpi_env.get('AMSS_CONSTRAINT_OUT_EVERY', '')}")
|
||||||
if "CUDA_MPS_PIPE_DIRECTORY" in mpi_env:
|
if "CUDA_MPS_PIPE_DIRECTORY" in mpi_env:
|
||||||
print(f" CUDA_MPS_PIPE_DIRECTORY={mpi_env['CUDA_MPS_PIPE_DIRECTORY']}")
|
print(f" CUDA_MPS_PIPE_DIRECTORY={mpi_env['CUDA_MPS_PIPE_DIRECTORY']}")
|
||||||
|
|
||||||
|
|||||||
@@ -822,8 +822,21 @@ def generate_constraint_check_plot( outdir, figure_outdir, input_level_number ):
|
|||||||
|
|
||||||
print( " Begin the constraint violation plot for grid level number = ", input_level_number )
|
print( " Begin the constraint violation plot for grid level number = ", input_level_number )
|
||||||
|
|
||||||
|
if (not os.path.exists(file0)) or os.path.getsize(file0) == 0:
|
||||||
|
if ( input_level_number == 0 ):
|
||||||
|
print( " Constraint data file is empty; skip constraint violation plots" )
|
||||||
|
print( )
|
||||||
|
return
|
||||||
|
|
||||||
# load the full data file (assumed whitespace-separated floats)
|
# load the full data file (assumed whitespace-separated floats)
|
||||||
data = numpy.loadtxt(file0)
|
data = numpy.loadtxt(file0)
|
||||||
|
data = numpy.atleast_2d(data)
|
||||||
|
|
||||||
|
if data.shape[1] < 8:
|
||||||
|
if ( input_level_number == 0 ):
|
||||||
|
print( " Constraint data file has insufficient columns; skip constraint violation plots" )
|
||||||
|
print( )
|
||||||
|
return
|
||||||
|
|
||||||
# extract columns from the constraint data file
|
# extract columns from the constraint data file
|
||||||
time = data[:,0]
|
time = data[:,0]
|
||||||
|
|||||||
Reference in New Issue
Block a user