From 738498cb28674a445b5103f28fb894af87193179 Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Mon, 9 Feb 2026 22:07:12 +0800 Subject: [PATCH] Optimize MPI communication in RestrictProlong and surface_integral Cache Sync in RestrictProlong: replace 11 basic Parallel::Sync() calls with Parallel::Sync_cached() across RestrictProlong, RestrictProlong_aux, and ProlongRestrict to avoid rebuilding grid segment lists every call. Merge paired MPI_Allreduce in surface_integral: combine 9 pairs of consecutive RP/IP Allreduce calls into single calls with count=2*NN. Merge scalar MPI_Allreduce in surf_MassPAng: combine 3 groups of 7 scalar Allreduce calls (mass + angular/linear momentum) into single calls with count=7. Co-Authored-By: Claude Opus 4.6 --- AMSS_NCKU_source/bssn_class.C | 50 ++++++--- AMSS_NCKU_source/bssn_class.h | 2 + AMSS_NCKU_source/surface_integral.C | 165 ++++++++++++++++++++-------- 3 files changed, 154 insertions(+), 63 deletions(-) diff --git a/AMSS_NCKU_source/bssn_class.C b/AMSS_NCKU_source/bssn_class.C index 7a1400e..927bff5 100644 --- a/AMSS_NCKU_source/bssn_class.C +++ b/AMSS_NCKU_source/bssn_class.C @@ -734,6 +734,8 @@ void bssn_class::Initialize() // Initialize sync caches (per-level, for predictor and corrector) sync_cache_pre = new Parallel::SyncCache[GH->levels]; sync_cache_cor = new Parallel::SyncCache[GH->levels]; + sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels]; + sync_cache_rp_fine = new Parallel::SyncCache[GH->levels]; } //================================================================================================ @@ -998,6 +1000,18 @@ bssn_class::~bssn_class() sync_cache_cor[i].destroy(); delete[] sync_cache_cor; } + if (sync_cache_rp_coarse) + { + for (int i = 0; i < GH->levels; i++) + sync_cache_rp_coarse[i].destroy(); + delete[] sync_cache_rp_coarse; + } + if (sync_cache_rp_fine) + { + for (int i = 0; i < GH->levels; i++) + sync_cache_rp_fine[i].destroy(); + delete[] sync_cache_rp_fine; + } delete GH; #ifdef WithShell @@ -2199,7 +2213,7 @@ void bssn_class::Evolve(int Steps) GH->Regrid(Symmetry, BH_num, Porgbr, Porg0, SynchList_cor, OldStateList, StateList, SynchList_pre, fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor); - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); } + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); } #endif #if (REGLEV == 0 && (PSTR == 1 || PSTR == 2)) @@ -2415,7 +2429,7 @@ void bssn_class::RecursiveStep(int lev) GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, SynchList_cor, OldStateList, StateList, SynchList_pre, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor); - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); } + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); } #endif } @@ -2594,7 +2608,7 @@ void bssn_class::ParallelStep() GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0, SynchList_cor, OldStateList, StateList, SynchList_pre, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor); - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); } + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); } #endif } @@ -2761,7 +2775,7 @@ void bssn_class::ParallelStep() GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0, SynchList_cor, OldStateList, StateList, SynchList_pre, fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor); - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); } + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); } // a_stream.clear(); // a_stream.str(""); @@ -2776,7 +2790,7 @@ void bssn_class::ParallelStep() GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, SynchList_cor, OldStateList, StateList, SynchList_pre, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor); - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); } + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); } // a_stream.clear(); // a_stream.str(""); @@ -2795,7 +2809,7 @@ void bssn_class::ParallelStep() GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, SynchList_cor, OldStateList, StateList, SynchList_pre, fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor); - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); } + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); } // a_stream.clear(); // a_stream.str(""); @@ -2811,7 +2825,7 @@ void bssn_class::ParallelStep() GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, SynchList_cor, OldStateList, StateList, SynchList_pre, fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor); - for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); } + for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); } // a_stream.clear(); // a_stream.str(""); @@ -5795,7 +5809,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB, // misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); #endif - Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry); + Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); #if (PSTR == 1 || PSTR == 2) // a_stream.clear(); @@ -5856,7 +5870,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB, // misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); #endif - Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry); + Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]); #if (PSTR == 1 || PSTR == 2) // a_stream.clear(); @@ -5894,7 +5908,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB, #endif } - Parallel::Sync(GH->PatL[lev], SL, Symmetry); + Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]); #if (PSTR == 1 || PSTR == 2) // a_stream.clear(); @@ -5952,7 +5966,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB, Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); #endif - Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry); + Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); #if (RPB == 0) Ppc = GH->PatL[lev - 1]; @@ -5984,7 +5998,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB, Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); #endif - Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry); + Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]); #if (RPB == 0) Ppc = GH->PatL[lev - 1]; @@ -6008,7 +6022,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB, #endif } - Parallel::Sync(GH->PatL[lev], SL, Symmetry); + Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]); } } @@ -6059,7 +6073,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB) Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry); #endif - Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry); + Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); #if (RPB == 0) Ppc = GH->PatL[lev - 1]; @@ -6093,7 +6107,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB) Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry); #endif - Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry); + Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]); #if (RPB == 0) Ppc = GH->PatL[lev - 1]; @@ -6117,7 +6131,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB) #endif } - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); + Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]); } } @@ -6200,10 +6214,10 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB) #else Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry); #endif - Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry); + Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]); } - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); + Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]); } } #undef MIXOUTB diff --git a/AMSS_NCKU_source/bssn_class.h b/AMSS_NCKU_source/bssn_class.h index fe3618b..db434e2 100644 --- a/AMSS_NCKU_source/bssn_class.h +++ b/AMSS_NCKU_source/bssn_class.h @@ -128,6 +128,8 @@ public: Parallel::SyncCache *sync_cache_pre; // per-level cache for predictor sync Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync + Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1] + Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev] monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor; monitor *ConVMonitor; diff --git a/AMSS_NCKU_source/surface_integral.C b/AMSS_NCKU_source/surface_integral.C index 410aee2..e725ae0 100644 --- a/AMSS_NCKU_source/surface_integral.C +++ b/AMSS_NCKU_source/surface_integral.C @@ -363,8 +363,17 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var * } //|------+ Communicate and sum the results from each processor. - MPI_Allreduce(RP_out, RP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(IP_out, IP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } //|------= Free memory. @@ -556,8 +565,17 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var * } //|------+ Communicate and sum the results from each processor. - MPI_Allreduce(RP_out, RP, NN, MPI_DOUBLE, MPI_SUM, Comm_here); - MPI_Allreduce(IP_out, IP, NN, MPI_DOUBLE, MPI_SUM, Comm_here); + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, Comm_here); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } //|------= Free memory. @@ -735,8 +753,17 @@ void surface_integral::surf_Wave(double rex, int lev, ShellPatch *GH, var *Rpsi4 } //|------+ Communicate and sum the results from each processor. - MPI_Allreduce(RP_out, RP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(IP_out, IP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } //|------= Free memory. @@ -984,8 +1011,17 @@ void surface_integral::surf_Wave(double rex, int lev, ShellPatch *GH, } //|------+ Communicate and sum the results from each processor. - MPI_Allreduce(RP_out, RP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(IP_out, IP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } //|------= Free memory. @@ -1419,8 +1455,17 @@ void surface_integral::surf_Wave(double rex, int lev, ShellPatch *GH, } //|------+ Communicate and sum the results from each processor. - MPI_Allreduce(RP_out, RP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(IP_out, IP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } //|------= Free memory. @@ -1854,8 +1899,17 @@ void surface_integral::surf_Wave(double rex, int lev, cgh *GH, } //|------+ Communicate and sum the results from each processor. - MPI_Allreduce(RP_out, RP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(IP_out, IP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } //|------= Free memory. @@ -2040,8 +2094,17 @@ void surface_integral::surf_Wave(double rex, int lev, NullShellPatch2 *GH, var * } //|------+ Communicate and sum the results from each processor. - MPI_Allreduce(RP_out, RP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(IP_out, IP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } //|------= Free memory. @@ -2226,8 +2289,17 @@ void surface_integral::surf_Wave(double rex, int lev, NullShellPatch *GH, var *R } //|------+ Communicate and sum the results from each processor. - MPI_Allreduce(RP_out, RP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(IP_out, IP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } //|------= Free memory. @@ -2464,15 +2536,13 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var } } - MPI_Allreduce(&Mass_out, &mass, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - - MPI_Allreduce(&ang_outx, &sx, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&ang_outy, &sy, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&ang_outz, &sz, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - - MPI_Allreduce(&p_outx, &px, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&p_outy, &py, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&p_outz, &pz, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + { + double scalar_out[7] = {Mass_out, ang_outx, ang_outy, ang_outz, p_outx, p_outy, p_outz}; + double scalar_in[7]; + MPI_Allreduce(scalar_out, scalar_in, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + mass = scalar_in[0]; sx = scalar_in[1]; sy = scalar_in[2]; sz = scalar_in[3]; + px = scalar_in[4]; py = scalar_in[5]; pz = scalar_in[6]; + } #ifdef GaussInt mass = mass * rex * rex * dphi * factor; @@ -2735,15 +2805,13 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var } } - MPI_Allreduce(&Mass_out, &mass, 1, MPI_DOUBLE, MPI_SUM, Comm_here); - - MPI_Allreduce(&ang_outx, &sx, 1, MPI_DOUBLE, MPI_SUM, Comm_here); - MPI_Allreduce(&ang_outy, &sy, 1, MPI_DOUBLE, MPI_SUM, Comm_here); - MPI_Allreduce(&ang_outz, &sz, 1, MPI_DOUBLE, MPI_SUM, Comm_here); - - MPI_Allreduce(&p_outx, &px, 1, MPI_DOUBLE, MPI_SUM, Comm_here); - MPI_Allreduce(&p_outy, &py, 1, MPI_DOUBLE, MPI_SUM, Comm_here); - MPI_Allreduce(&p_outz, &pz, 1, MPI_DOUBLE, MPI_SUM, Comm_here); + { + double scalar_out[7] = {Mass_out, ang_outx, ang_outy, ang_outz, p_outx, p_outy, p_outz}; + double scalar_in[7]; + MPI_Allreduce(scalar_out, scalar_in, 7, MPI_DOUBLE, MPI_SUM, Comm_here); + mass = scalar_in[0]; sx = scalar_in[1]; sy = scalar_in[2]; sz = scalar_in[3]; + px = scalar_in[4]; py = scalar_in[5]; pz = scalar_in[6]; + } #ifdef GaussInt mass = mass * rex * rex * dphi * factor; @@ -3020,15 +3088,13 @@ void surface_integral::surf_MassPAng(double rex, int lev, ShellPatch *GH, var *c } } - MPI_Allreduce(&Mass_out, &mass, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - - MPI_Allreduce(&ang_outx, &sx, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&ang_outy, &sy, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&ang_outz, &sz, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - - MPI_Allreduce(&p_outx, &px, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&p_outy, &py, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&p_outz, &pz, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + { + double scalar_out[7] = {Mass_out, ang_outx, ang_outy, ang_outz, p_outx, p_outy, p_outz}; + double scalar_in[7]; + MPI_Allreduce(scalar_out, scalar_in, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + mass = scalar_in[0]; sx = scalar_in[1]; sy = scalar_in[2]; sz = scalar_in[3]; + px = scalar_in[4]; py = scalar_in[5]; pz = scalar_in[6]; + } #ifdef GaussInt mass = mass * rex * rex * dphi * factor; @@ -3607,8 +3673,17 @@ void surface_integral::surf_Wave(double rex, cgh *GH, ShellPatch *SH, } //|------+ Communicate and sum the results from each processor. - MPI_Allreduce(RP_out, RP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(IP_out, IP, NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } //|------= Free memory.