diff --git a/AMSS_NCKU_source/bssn_class.C b/AMSS_NCKU_source/bssn_class.C index e14092b..553cc72 100644 --- a/AMSS_NCKU_source/bssn_class.C +++ b/AMSS_NCKU_source/bssn_class.C @@ -3302,22 +3302,11 @@ void bssn_class::Step(int lev, int YN) #endif } - // check error information (combined Patch + Shell Patch check) + // Non-blocking error reduction overlapped with Sync to hide Allreduce latency + MPI_Request err_req; { int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); - SH->Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req); } #endif @@ -3334,11 +3323,25 @@ void bssn_class::Step(int lev, int YN) { prev_clock = curr_clock; curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl; } } + + // Complete non-blocking error reduction and check + MPI_Wait(&err_req, MPI_STATUS_IGNORE); + if (ERROR) + { + Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); + SH->Dump_Data(StateList, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } #endif #if (MAPBH == 0) @@ -3655,23 +3658,11 @@ void bssn_class::Step(int lev, int YN) sPp = sPp->next; } } - // check error information (combined Patch + Shell Patch check) + // Non-blocking error reduction overlapped with Sync to hide Allreduce latency + MPI_Request err_req_cor; { int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count - << " variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor); } #endif @@ -3688,11 +3679,27 @@ void bssn_class::Step(int lev, int YN) { prev_clock = curr_clock; curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl; } } + + // Complete non-blocking error reduction and check + MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); + if (ERROR) + { + Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); + SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count + << " variables at t = " << PhysTime + << ", lev = " << lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } #endif #if (MAPBH == 0) @@ -4146,22 +4153,11 @@ void bssn_class::Step(int lev, int YN) } #endif } - // check error information (combined Patch + Shell Patch check) + // Non-blocking error reduction overlapped with Sync to hide Allreduce latency + MPI_Request err_req; { int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); - SH->Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req); } #endif @@ -4178,9 +4174,24 @@ void bssn_class::Step(int lev, int YN) { prev_clock = curr_clock; curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds! " << endl; + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + << " seconds! " << endl; + } + } + + // Complete non-blocking error reduction and check + MPI_Wait(&err_req, MPI_STATUS_IGNORE); + if (ERROR) + { + Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); + SH->Dump_Data(StateList, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime + << ", lev = " << lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); } } #endif @@ -4483,23 +4494,11 @@ void bssn_class::Step(int lev, int YN) sPp = sPp->next; } } - // check error information (combined Patch + Shell Patch check) + // Non-blocking error reduction overlapped with Sync to hide Allreduce latency + MPI_Request err_req_cor; { int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count - << " variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor); } #endif @@ -4516,11 +4515,27 @@ void bssn_class::Step(int lev, int YN) { prev_clock = curr_clock; curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl; } } + + // Complete non-blocking error reduction and check + MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); + if (ERROR) + { + Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); + SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count + << " variables at t = " << PhysTime + << ", lev = " << lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } #endif // for black hole position if (BH_num > 0 && lev == GH->levels - 1) @@ -4886,11 +4901,19 @@ void bssn_class::Step(int lev, int YN) // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Predictor rhs calculation"); - // check error information + // Non-blocking error reduction overlapped with Sync to hide Allreduce latency + MPI_Request err_req; { int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]); + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev], &err_req); } + + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync"); + + Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); + + // Complete non-blocking error reduction and check + MPI_Wait(&err_req, MPI_STATUS_IGNORE); if (ERROR) { Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); @@ -4902,10 +4925,6 @@ void bssn_class::Step(int lev, int YN) } } - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync"); - - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); - #if (MAPBH == 0) // for black hole position if (BH_num > 0 && lev == GH->levels - 1) @@ -5083,22 +5102,11 @@ void bssn_class::Step(int lev, int YN) // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector error check"); - // check error information + // Non-blocking error reduction overlapped with Sync to hide Allreduce latency + MPI_Request err_req_cor; { int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count - << " variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev], &err_req_cor); } // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync"); @@ -5107,6 +5115,21 @@ void bssn_class::Step(int lev, int YN) // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync"); + // Complete non-blocking error reduction and check + MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); + if (ERROR) + { + Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count + << " variables at t = " << PhysTime + << ", lev = " << lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + #if (MAPBH == 0) // for black hole position if (BH_num > 0 && lev == GH->levels - 1) @@ -5390,21 +5413,11 @@ void bssn_class::SHStep() #if (PSTR == 1 || PSTR == 2) // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor's error check"); #endif - // check error information + // Non-blocking error reduction overlapped with Synch to hide Allreduce latency + MPI_Request err_req; { int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - - if (ERROR) - { - SH->Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req); } { @@ -5416,12 +5429,25 @@ void bssn_class::SHStep() { prev_clock = curr_clock; curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl; } } + // Complete non-blocking error reduction and check + MPI_Wait(&err_req, MPI_STATUS_IGNORE); + if (ERROR) + { + SH->Dump_Data(StateList, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + // corrector for (iter_count = 1; iter_count < 4; iter_count++) { @@ -5564,21 +5590,11 @@ void bssn_class::SHStep() sPp = sPp->next; } } - // check error information + // Non-blocking error reduction overlapped with Synch to hide Allreduce latency + MPI_Request err_req_cor; { int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count - << " variables at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } + MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req_cor); } { @@ -5590,12 +5606,26 @@ void bssn_class::SHStep() { prev_clock = curr_clock; curr_clock = clock(); - cout << " Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) + cout << " Shell stuff synchronization used " + << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl; } } + // Complete non-blocking error reduction and check + MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE); + if (ERROR) + { + SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); + if (myrank == 0) + { + if (ErrorMonitor->outfile) + ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count + << " variables at t = " << PhysTime << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + sPp = SH->PatL; while (sPp) {