Optimize MPI communication in RestrictProlong and surface_integral
Cache Sync in RestrictProlong: replace 11 basic Parallel::Sync() calls with Parallel::Sync_cached() across RestrictProlong, RestrictProlong_aux, and ProlongRestrict to avoid rebuilding grid segment lists every call. Merge paired MPI_Allreduce in surface_integral: combine 9 pairs of consecutive RP/IP Allreduce calls into single calls with count=2*NN. Merge scalar MPI_Allreduce in surf_MassPAng: combine 3 groups of 7 scalar Allreduce calls (mass + angular/linear momentum) into single calls with count=7. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -734,6 +734,8 @@ void bssn_class::Initialize()
|
||||
// Initialize sync caches (per-level, for predictor and corrector)
|
||||
sync_cache_pre = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_cor = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
|
||||
}
|
||||
|
||||
//================================================================================================
|
||||
@@ -998,6 +1000,18 @@ bssn_class::~bssn_class()
|
||||
sync_cache_cor[i].destroy();
|
||||
delete[] sync_cache_cor;
|
||||
}
|
||||
if (sync_cache_rp_coarse)
|
||||
{
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_rp_coarse[i].destroy();
|
||||
delete[] sync_cache_rp_coarse;
|
||||
}
|
||||
if (sync_cache_rp_fine)
|
||||
{
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_rp_fine[i].destroy();
|
||||
delete[] sync_cache_rp_fine;
|
||||
}
|
||||
|
||||
delete GH;
|
||||
#ifdef WithShell
|
||||
@@ -2199,7 +2213,7 @@ void bssn_class::Evolve(int Steps)
|
||||
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); }
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||
#endif
|
||||
|
||||
#if (REGLEV == 0 && (PSTR == 1 || PSTR == 2))
|
||||
@@ -2415,7 +2429,7 @@ void bssn_class::RecursiveStep(int lev)
|
||||
GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); }
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -2594,7 +2608,7 @@ void bssn_class::ParallelStep()
|
||||
GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); }
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -2761,7 +2775,7 @@ void bssn_class::ParallelStep()
|
||||
GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor);
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); }
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||
|
||||
// a_stream.clear();
|
||||
// a_stream.str("");
|
||||
@@ -2776,7 +2790,7 @@ void bssn_class::ParallelStep()
|
||||
GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); }
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||
|
||||
// a_stream.clear();
|
||||
// a_stream.str("");
|
||||
@@ -2795,7 +2809,7 @@ void bssn_class::ParallelStep()
|
||||
GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor);
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); }
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||
|
||||
// a_stream.clear();
|
||||
// a_stream.str("");
|
||||
@@ -2811,7 +2825,7 @@ void bssn_class::ParallelStep()
|
||||
GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor);
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); }
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||
|
||||
// a_stream.clear();
|
||||
// a_stream.str("");
|
||||
@@ -5795,7 +5809,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
||||
#endif
|
||||
|
||||
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
|
||||
#if (PSTR == 1 || PSTR == 2)
|
||||
// a_stream.clear();
|
||||
@@ -5856,7 +5870,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
||||
#endif
|
||||
|
||||
Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry);
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
|
||||
#if (PSTR == 1 || PSTR == 2)
|
||||
// a_stream.clear();
|
||||
@@ -5894,7 +5908,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
#endif
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
|
||||
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
||||
|
||||
#if (PSTR == 1 || PSTR == 2)
|
||||
// a_stream.clear();
|
||||
@@ -5952,7 +5966,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||
#endif
|
||||
|
||||
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
@@ -5984,7 +5998,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
||||
#endif
|
||||
|
||||
Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry);
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
@@ -6008,7 +6022,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
||||
#endif
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
|
||||
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6059,7 +6073,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||
#endif
|
||||
|
||||
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
@@ -6093,7 +6107,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
|
||||
#endif
|
||||
|
||||
Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry);
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
|
||||
#if (RPB == 0)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
@@ -6117,7 +6131,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||
#endif
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6200,10 +6214,10 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
||||
#else
|
||||
Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
|
||||
#endif
|
||||
Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry);
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
||||
}
|
||||
}
|
||||
#undef MIXOUTB
|
||||
|
||||
Reference in New Issue
Block a user