diff --git a/AMSS_NCKU_source/Z4c_class.C b/AMSS_NCKU_source/Z4c_class.C index 3a53dc0..6f4cd27 100644 --- a/AMSS_NCKU_source/Z4c_class.C +++ b/AMSS_NCKU_source/Z4c_class.C @@ -485,7 +485,25 @@ void Z4c_class::Step(int lev, int YN) } #endif - // CA-RK4: skip post-prediction sync (redundant; ghost cells computable locally) + Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); + +#ifdef WithShell + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_pre, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } +#endif // for black hole position if (BH_num > 0 && lev == GH->levels - 1) @@ -850,28 +868,25 @@ void Z4c_class::Step(int lev, int YN) } #endif - // CA-RK4: only sync after last corrector (iter_count == 3); stages 1 & 2 are redundant - if (iter_count == 3) { - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); + Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); #ifdef WithShell - if (lev == 0) + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_cor, Symmetry); + if (myrank == 0) { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; } + } #endif - } // end CA-RK4 guard // for black hole position if (BH_num > 0 && lev == GH->levels - 1) { @@ -1543,7 +1558,7 @@ void Z4c_class::Step(int lev, int YN) } } - // CA-RK4: skip post-prediction MPI ghost sync (redundant; ghost cells computable locally) + Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); if (lev == 0) { @@ -2105,9 +2120,7 @@ void Z4c_class::Step(int lev, int YN) } } - // CA-RK4: only MPI sync after last corrector (iter_count == 3); stages 1 & 2 are redundant - if (iter_count == 3) - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); + Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); if (lev == 0) { diff --git a/AMSS_NCKU_source/bssnEM_class.C b/AMSS_NCKU_source/bssnEM_class.C index 4867600..e06b701 100644 --- a/AMSS_NCKU_source/bssnEM_class.C +++ b/AMSS_NCKU_source/bssnEM_class.C @@ -1221,7 +1221,25 @@ void bssnEM_class::Step(int lev, int YN) } #endif - // CA-RK4: skip post-prediction sync (redundant; ghost cells computable locally) + Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); + +#ifdef WithShell + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_pre, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } +#endif // for black hole position if (BH_num > 0 && lev == GH->levels - 1) @@ -1665,28 +1683,25 @@ void bssnEM_class::Step(int lev, int YN) } #endif - // CA-RK4: only sync after last corrector (iter_count == 3); stages 1 & 2 are redundant - if (iter_count == 3) { - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); + Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); #ifdef WithShell - if (lev == 0) + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_cor, Symmetry); + if (myrank == 0) { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; } + } #endif - } // end CA-RK4 guard // for black hole position if (BH_num > 0 && lev == GH->levels - 1) { diff --git a/AMSS_NCKU_source/bssn_class.C b/AMSS_NCKU_source/bssn_class.C index 09881c6..eb84f8e 100644 --- a/AMSS_NCKU_source/bssn_class.C +++ b/AMSS_NCKU_source/bssn_class.C @@ -3349,7 +3349,27 @@ void bssn_class::Step(int lev, int YN) } #endif - // CA-RK4: skip post-prediction sync (redundant; ghost cells computable locally) + Parallel::AsyncSyncState async_pre; + Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre); + +#ifdef WithShell + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_pre, Symmetry); + if (myrank == 0) + { + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } +#endif + Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry); #ifdef WithShell // Complete non-blocking error reduction and check @@ -3689,30 +3709,27 @@ void bssn_class::Step(int lev, int YN) } #endif - // CA-RK4: only sync after last corrector (iter_count == 3); stages 1 & 2 are redundant - if (iter_count == 3) { - Parallel::AsyncSyncState async_cor; - Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor); + Parallel::AsyncSyncState async_cor; + Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor); #ifdef WithShell - if (lev == 0) + if (lev == 0) + { + clock_t prev_clock, curr_clock; + if (myrank == 0) + curr_clock = clock(); + SH->Synch(SynchList_cor, Symmetry); + if (myrank == 0) { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; - } + prev_clock = curr_clock; + curr_clock = clock(); + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; } + } #endif - Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry); - } // end CA-RK4 guard + Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry); #ifdef WithShell // Complete non-blocking error reduction and check