Cache GSL in SyncPlan and apply async Sync to Z4c_class

Major optimization: Pre-build grid segment lists (GSLs) once per Step() call via SyncPreparePlan(), then reuse them across all 4 RK4 substep SyncBegin calls via SyncBeginWithPlan(). This eliminates the O(cpusize * blocks^2) GSL rebuild cost that was incurred on every ghost zone exchange. Applied async SyncBegin/SyncEnd overlap pattern to Z4c_class.C (ABEtype==2, the default configuration), which was still using blocking Parallel::Sync. Both the regular and CPBC variants of Z4c Step() are now optimized. Co-authored-by: copilot-swe-agent[bot] <198982749+copilot@users.noreply.github.com>
2026-02-08 08:36:21 +00:00
parent a918dc103e
commit afd4006da2
4 changed files with 225 additions and 25 deletions
--- a/AMSS_NCKU_source/bssn_class.C
+++ b/AMSS_NCKU_source/bssn_class.C
@@ -3035,6 +3035,12 @@ void bssn_class::Step(int lev, int YN)
  int ERROR = 0;

  MyList<ss_patch> *sPp;
+
+  // Pre-build grid segment lists once for this level's patches.
+  // These are reused across predictor + 3 corrector SyncBegin calls,
+  // avoiding O(cpusize * blocks^2) rebuild each time.
+  Parallel::SyncPlan *sync_plan = Parallel::SyncPreparePlan(GH->PatL[lev], Symmetry);
+
  // Predictor
  MyList<Patch> *Pp = GH->PatL[lev];
  while (Pp)
@@ -3160,7 +3166,7 @@ void bssn_class::Step(int lev, int YN)
  }

  // Start async ghost zone exchange - overlaps with error check and Shell computation
-  Parallel::SyncHandle *sync_pre = Parallel::SyncBegin(GH->PatL[lev], SynchList_pre, Symmetry);
+  Parallel::SyncHandle *sync_pre = Parallel::SyncBeginWithPlan(sync_plan, SynchList_pre);

  // check error information (overlaps with MPI transfer)
  {
@@ -3536,7 +3542,7 @@ void bssn_class::Step(int lev, int YN)
    }

    // Start async ghost zone exchange - overlaps with error check and Shell computation
-    Parallel::SyncHandle *sync_cor = Parallel::SyncBegin(GH->PatL[lev], SynchList_cor, Symmetry);
+    Parallel::SyncHandle *sync_cor = Parallel::SyncBeginWithPlan(sync_plan, SynchList_cor);

    // check error information (overlaps with MPI transfer)
    {
@@ -3908,6 +3914,8 @@ void bssn_class::Step(int lev, int YN)
      Porg0[ithBH][2] = Porg1[ithBH][2];
    }
  }
+
+  Parallel::SyncFreePlan(sync_plan);
 }

 //================================================================================================
@@ -4830,6 +4838,12 @@ void bssn_class::Step(int lev, int YN)
  int ERROR = 0;

  MyList<ss_patch> *sPp;
+
+  // Pre-build grid segment lists once for this level's patches.
+  // These are reused across predictor + 3 corrector SyncBegin calls,
+  // avoiding O(cpusize * blocks^2) rebuild each time.
+  Parallel::SyncPlan *sync_plan = Parallel::SyncPreparePlan(GH->PatL[lev], Symmetry);
+
  // Predictor
  MyList<Patch> *Pp = GH->PatL[lev];
  while (Pp)
@@ -4957,7 +4971,7 @@ void bssn_class::Step(int lev, int YN)
  //   misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Predictor rhs calculation");

  // Start async ghost zone exchange - overlaps with error check and BH position
-  Parallel::SyncHandle *sync_pre = Parallel::SyncBegin(GH->PatL[lev], SynchList_pre, Symmetry);
+  Parallel::SyncHandle *sync_pre = Parallel::SyncBeginWithPlan(sync_plan, SynchList_pre);

  // check error information (overlaps with MPI transfer)
  {
@@ -5159,7 +5173,7 @@ void bssn_class::Step(int lev, int YN)
    //   misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector error check");

    // Start async ghost zone exchange - overlaps with error check and BH position
-    Parallel::SyncHandle *sync_cor = Parallel::SyncBegin(GH->PatL[lev], SynchList_cor, Symmetry);
+    Parallel::SyncHandle *sync_cor = Parallel::SyncBeginWithPlan(sync_plan, SynchList_cor);

    // check error information (overlaps with MPI transfer)
    {
@@ -5299,6 +5313,8 @@ void bssn_class::Step(int lev, int YN)

  //     if(myrank==GH->start_rank[lev]) cout<<GH->mylev<<endl;
  //     misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"complet GH Step");
+
+  Parallel::SyncFreePlan(sync_plan);
 }

 //================================================================================================