Optimize BSSN surface interpolation fast path

This commit is contained in:
2026-04-30 18:25:21 +08:00
parent a6483d013d
commit da4d56ccf7
3 changed files with 621 additions and 81 deletions

View File

@@ -45,6 +45,20 @@ using namespace std;
#include "derivatives.h"
#include "ricci_gamma.h"
namespace
{
bool amss_analysis_timing_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_ANALYSIS_TIMING");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
}
// Compile-time switch for per-timestep memory usage collection/printing.
// Default is OFF to reduce overhead in production runs.
#ifndef BSSN_ENABLE_MEM_USAGE_LOG
@@ -7942,6 +7956,10 @@ void bssn_class::AnalysisStuff(int lev, double dT_lev)
if (LastAnas >= AnasTime)
{
const bool analysis_timing = amss_analysis_timing_enabled();
const double t_analysis_start = analysis_timing ? MPI_Wtime() : 0.0;
double t_psi4_sec = 0.0;
double t_surface_sec = 0.0;
#ifdef Point_Psi4
#error "not support parallel levels yet"
// Gam_ijk and R_ij have been calculated in Interp_Constraint()
@@ -8134,7 +8152,12 @@ void bssn_class::AnalysisStuff(int lev, double dT_lev)
#endif
}
#else
{
const double t0 = analysis_timing ? MPI_Wtime() : 0.0;
Compute_Psi4(lev);
if (analysis_timing)
t_psi4_sec += MPI_Wtime() - t0;
}
#endif
double *RP, *IP, *RoutMAP;
int NN = 0;
@@ -8150,7 +8173,8 @@ void bssn_class::AnalysisStuff(int lev, double dT_lev)
bool shell_mass_prepared = false;
#endif
for (int i = 0; i < decn; i++)
{
{
const double t_surface0 = analysis_timing ? MPI_Wtime() : 0.0;
#ifdef Point_Psi4
Waveshell->surf_Wave(Rex, GH, SH,
phi, trK,
@@ -8243,6 +8267,8 @@ void bssn_class::AnalysisStuff(int lev, double dT_lev)
#endif
#endif
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"end surface integral");
if (analysis_timing)
t_surface_sec += MPI_Wtime() - t_surface0;
#endif
if (i == 0)
{
@@ -8275,6 +8301,13 @@ void bssn_class::AnalysisStuff(int lev, double dT_lev)
delete[] RP;
delete[] IP;
delete[] RoutMAP;
if (analysis_timing)
{
fprintf(stderr,
"[AMSS-ANALYSIS][rank %d] lev=%d psi4=%.6f surface=%.6f total_before_bh=%.6f detectors=%d modes=%d\n",
myrank, lev, t_psi4_sec, t_surface_sec,
MPI_Wtime() - t_analysis_start, decn, NN);
}
// black hole's position
{