diff --git a/AMSS_NCKU_source/Z4c_class.C b/AMSS_NCKU_source/Z4c_class.C index 6f4cd27..3a53dc0 100644 --- a/AMSS_NCKU_source/Z4c_class.C +++ b/AMSS_NCKU_source/Z4c_class.C @@ -485,25 +485,7 @@ void Z4c_class::Step(int lev, int YN) } #endif - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - } -#endif + // CA-RK4: skip post-prediction sync (redundant; ghost cells computable locally) // for black hole position if (BH_num > 0 && lev == GH->levels - 1) @@ -868,25 +850,28 @@ void Z4c_class::Step(int lev, int YN) } #endif - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); + // CA-RK4: only sync after last corrector (iter_count == 3); stages 1 & 2 are redundant + if (iter_count == 3) { + Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); #ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) + if (lev == 0) { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_cor, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } } - } #endif + } // end CA-RK4 guard // for black hole position if (BH_num > 0 && lev == GH->levels - 1) { @@ -1558,7 +1543,7 @@ void Z4c_class::Step(int lev, int YN) } } - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); + // CA-RK4: skip post-prediction MPI ghost sync (redundant; ghost cells computable locally) if (lev == 0) { @@ -2120,7 +2105,9 @@ void Z4c_class::Step(int lev, int YN) } } - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); + // CA-RK4: only MPI sync after last corrector (iter_count == 3); stages 1 & 2 are redundant + if (iter_count == 3) + Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); if (lev == 0) { diff --git a/AMSS_NCKU_source/bssnEM_class.C b/AMSS_NCKU_source/bssnEM_class.C index e06b701..4867600 100644 --- a/AMSS_NCKU_source/bssnEM_class.C +++ b/AMSS_NCKU_source/bssnEM_class.C @@ -1221,25 +1221,7 @@ void bssnEM_class::Step(int lev, int YN) } #endif - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - } -#endif + // CA-RK4: skip post-prediction sync (redundant; ghost cells computable locally) // for black hole position if (BH_num > 0 && lev == GH->levels - 1) @@ -1683,25 +1665,28 @@ void bssnEM_class::Step(int lev, int YN) } #endif - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); + // CA-RK4: only sync after last corrector (iter_count == 3); stages 1 & 2 are redundant + if (iter_count == 3) { + Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); #ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) + if (lev == 0) { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_cor, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } } - } #endif + } // end CA-RK4 guard // for black hole position if (BH_num > 0 && lev == GH->levels - 1) { diff --git a/AMSS_NCKU_source/bssn_class.C b/AMSS_NCKU_source/bssn_class.C index eb84f8e..09881c6 100644 --- a/AMSS_NCKU_source/bssn_class.C +++ b/AMSS_NCKU_source/bssn_class.C @@ -3349,27 +3349,7 @@ void bssn_class::Step(int lev, int YN) } #endif - Parallel::AsyncSyncState async_pre; - Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } - } -#endif - Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry); + // CA-RK4: skip post-prediction sync (redundant; ghost cells computable locally) #ifdef WithShell // Complete non-blocking error reduction and check @@ -3709,27 +3689,30 @@ void bssn_class::Step(int lev, int YN) } #endif - Parallel::AsyncSyncState async_cor; - Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor); + // CA-RK4: only sync after last corrector (iter_count == 3); stages 1 & 2 are redundant + if (iter_count == 3) { + Parallel::AsyncSyncState async_cor; + Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor); #ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) + if (lev == 0) { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_cor, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } } - } #endif - Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry); + Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry); + } // end CA-RK4 guard #ifdef WithShell // Complete non-blocking error reduction and check