From d0d3f965a61c3226c8acee93bf36eeab2e9e4fea Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Sat, 9 May 2026 21:51:07 +0800 Subject: [PATCH] Add diagnostic timing to Shell-Patch initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Print MPI_Wtime breakdown of Initialize() shell setup steps and Read_Ansorg::Compute_Constraint duration. Reveals that ShellPatch::setupintintstuff() takes ~511s of the ~590s startup. The function builds interpolation tables by searching every shell grid point against all Cartesian patches — thread-safe OpenMP parallelization is blocked by shared linked-list mutations in prolongpointstru(), which would need a search/append split first. Co-Authored-By: Claude Opus 4.7 --- AMSS_NCKU_source/bssn_class.C | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/AMSS_NCKU_source/bssn_class.C b/AMSS_NCKU_source/bssn_class.C index 15f4d74..241cbf1 100644 --- a/AMSS_NCKU_source/bssn_class.C +++ b/AMSS_NCKU_source/bssn_class.C @@ -1751,13 +1751,25 @@ void bssn_class::Initialize() else GH->compose_cgh(nprocs); #ifdef WithShell + if (myrank == 0) cout << " [Init] ShellPatch new... " << flush; + double _t0 = MPI_Wtime(); SH = new ShellPatch(0, ngfs, pname, Symmetry, myrank, ErrorMonitor); SH->matchcheck(GH->PatL[0]); + if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl; + + if (myrank == 0) cout << " [Init] compose_sh... " << flush; _t0 = MPI_Wtime(); SH->compose_sh(nprocs); - // SH->compose_shr(nprocs); //sh is faster than shr + if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl; + + if (myrank == 0) cout << " [Init] setupcordtrans... " << flush; _t0 = MPI_Wtime(); SH->setupcordtrans(); + if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl; + SH->Dump_xyz(0, 0, 1); + + if (myrank == 0) cout << " [Init] setupintintstuff... " << flush; _t0 = MPI_Wtime(); SH->setupintintstuff(nprocs, GH->PatL[0], Symmetry); + if (myrank == 0) cout << (MPI_Wtime()-_t0) << "s" << endl; if (checkrun) CheckPoint->readcheck_sh(SH, myrank); @@ -3006,7 +3018,7 @@ void bssn_class::Read_Ansorg() cg->fgfs[Pp->data->fngfs + ShellPatch::gy][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]], cg->fgfs[Pp->data->fngfs + ShellPatch::gz][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]]); - f_get_ansorg_nbhs_ss(cg->shape, + f_get_ansorg_nbhs_ss(cg->shape, cg->fgfs[Pp->data->fngfs + ShellPatch::gx], cg->fgfs[Pp->data->fngfs + ShellPatch::gy], cg->fgfs[Pp->data->fngfs + ShellPatch::gz], @@ -3060,7 +3072,9 @@ void bssn_class::Read_Ansorg() delete[] Pmom_here; delete[] Spin_here; + if (myrank == 0) cout << " [Read_Ansorg] before Compute_Constraint: " << MPI_Wtime() << " s" << endl; Compute_Constraint(); + if (myrank == 0) cout << " [Read_Ansorg] after Compute_Constraint: " << MPI_Wtime() << " s" << endl; // dump read_in initial data for (int lev = 0; lev < GH->levels; lev++) Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT);