From 09ffdb553d69f426584500969e63c868a19f638a Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Fri, 6 Feb 2026 21:20:35 +0800 Subject: [PATCH] Eliminate hot-path heap allocations in TwoPunctures spectral solver Pre-allocate workspace buffers as class members to remove ~8M malloc/free pairs per Newton iteration from LineRelax, ThomasAlgorithm, JFD_times_dv, J_times_dv, chebft_Zeros, fourft, Derivatives_AB3, and F_of_v. Rewrite ThomasAlgorithm to operate in-place on input arrays. Results are bit-identical; no algorithmic changes. Co-Authored-By: Claude Opus 4.6 --- AMSS_NCKU_source/TwoPunctures.C | 276 ++++++++++++++++---------------- AMSS_NCKU_source/TwoPunctures.h | 27 ++++ 2 files changed, 167 insertions(+), 136 deletions(-) diff --git a/AMSS_NCKU_source/TwoPunctures.C b/AMSS_NCKU_source/TwoPunctures.C index a5b0c85..dbb424e 100644 --- a/AMSS_NCKU_source/TwoPunctures.C +++ b/AMSS_NCKU_source/TwoPunctures.C @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -60,13 +61,110 @@ TwoPunctures::TwoPunctures(double mp, double mm, double b, F = dvector(0, ntotal - 1); allocate_derivs(&u, ntotal); allocate_derivs(&v, ntotal); + + // Allocate workspace buffers for hot-path allocation elimination + int N = maximum3(n1, n2, n3); + int maxn = maximum2(n1, n2); + + // LineRelax_be workspace (sized for n2) + ws_diag_be = new double[n2]; + ws_e_be = new double[n2 - 1]; + ws_f_be = new double[n2 - 1]; + ws_b_be = new double[n2]; + ws_x_be = new double[n2]; + + // LineRelax_al workspace (sized for n1) + ws_diag_al = new double[n1]; + ws_e_al = new double[n1 - 1]; + ws_f_al = new double[n1 - 1]; + ws_b_al = new double[n1]; + ws_x_al = new double[n1]; + + // ThomasAlgorithm workspace (sized for max(n1,n2)) + ws_thomas_y = new double[maxn]; + + // JFD_times_dv workspace (sized for nvar) + ws_jfd_values = dvector(0, nvar - 1); + allocate_derivs(&ws_jfd_dU, nvar); + allocate_derivs(&ws_jfd_U, nvar); + + // chebft_Zeros workspace (sized for N+1) + ws_cheb_c = dvector(0, N); + + // fourft workspace (sized for N/2+1 each) + ws_four_a = dvector(0, N / 2); + ws_four_b = dvector(0, N / 2); + + // Derivatives_AB3 workspace + ws_deriv_p = dvector(0, N); + ws_deriv_dp = dvector(0, N); + ws_deriv_d2p = dvector(0, N); + ws_deriv_q = dvector(0, N); + ws_deriv_dq = dvector(0, N); + ws_deriv_r = dvector(0, N); + ws_deriv_dr = dvector(0, N); + ws_deriv_indx = ivector(0, N); + + // F_of_v workspace + ws_fov_sources = new double[n1 * n2 * n3]; + ws_fov_values = dvector(0, nvar - 1); + allocate_derivs(&ws_fov_U, nvar); + + // J_times_dv workspace + ws_jtdv_values = dvector(0, nvar - 1); + allocate_derivs(&ws_jtdv_dU, nvar); + allocate_derivs(&ws_jtdv_U, nvar); } TwoPunctures::~TwoPunctures() { + int const nvar = 1, n1 = npoints_A, n2 = npoints_B, n3 = npoints_phi; + int N = maximum3(n1, n2, n3); + free_dvector(F, 0, ntotal - 1); free_derivs(&u, ntotal); free_derivs(&v, ntotal); + + // Free workspace buffers + delete[] ws_diag_be; + delete[] ws_e_be; + delete[] ws_f_be; + delete[] ws_b_be; + delete[] ws_x_be; + + delete[] ws_diag_al; + delete[] ws_e_al; + delete[] ws_f_al; + delete[] ws_b_al; + delete[] ws_x_al; + + delete[] ws_thomas_y; + + free_dvector(ws_jfd_values, 0, nvar - 1); + free_derivs(&ws_jfd_dU, nvar); + free_derivs(&ws_jfd_U, nvar); + + free_dvector(ws_cheb_c, 0, N); + + free_dvector(ws_four_a, 0, N / 2); + free_dvector(ws_four_b, 0, N / 2); + + free_dvector(ws_deriv_p, 0, N); + free_dvector(ws_deriv_dp, 0, N); + free_dvector(ws_deriv_d2p, 0, N); + free_dvector(ws_deriv_q, 0, N); + free_dvector(ws_deriv_dq, 0, N); + free_dvector(ws_deriv_r, 0, N); + free_dvector(ws_deriv_dr, 0, N); + free_ivector(ws_deriv_indx, 0, N); + + delete[] ws_fov_sources; + free_dvector(ws_fov_values, 0, nvar - 1); + free_derivs(&ws_fov_U, nvar); + + free_dvector(ws_jtdv_values, 0, nvar - 1); + free_derivs(&ws_jtdv_dU, nvar); + free_derivs(&ws_jtdv_U, nvar); } void TwoPunctures::Solve() @@ -655,7 +753,7 @@ void TwoPunctures::chebft_Zeros(double u[], int n, int inv) int k, j, isignum; double fac, sum, Pion, *c; - c = dvector(0, n); + c = ws_cheb_c; Pion = Pi / n; if (inv == 0) { @@ -686,7 +784,6 @@ void TwoPunctures::chebft_Zeros(double u[], int n, int inv) } for (j = 0; j < n; j++) u[j] = c[j]; - free_dvector(c, 0, n); } /* --------------------------------------------------------------------------*/ @@ -774,8 +871,8 @@ void TwoPunctures::fourft(double *u, int N, int inv) double x, x1, fac, Pi_fac, *a, *b; M = N / 2; - a = dvector(0, M); - b = dvector(1, M); /* Actually: b=vector(1,M-1) but this is problematic if M=1*/ + a = ws_four_a; + b = ws_four_b - 1; /* offset to match dvector(1,M) indexing */ fac = 1. / M; Pi_fac = Pi * fac; if (inv == 0) @@ -824,8 +921,6 @@ void TwoPunctures::fourft(double *u, int N, int inv) iy = -iy; } } - free_dvector(a, 0, M); - free_dvector(b, 1, M); } /* -----------------------------------------*/ @@ -1118,14 +1213,14 @@ void TwoPunctures::Derivatives_AB3(int nvar, int n1, int n2, int n3, derivs v) double *p, *dp, *d2p, *q, *dq, *r, *dr; N = maximum3(n1, n2, n3); - p = dvector(0, N); - dp = dvector(0, N); - d2p = dvector(0, N); - q = dvector(0, N); - dq = dvector(0, N); - r = dvector(0, N); - dr = dvector(0, N); - indx = ivector(0, N); + p = ws_deriv_p; + dp = ws_deriv_dp; + d2p = ws_deriv_d2p; + q = ws_deriv_q; + dq = ws_deriv_dq; + r = ws_deriv_r; + dr = ws_deriv_dr; + indx = ws_deriv_indx; for (ivar = 0; ivar < nvar; ivar++) { @@ -1208,14 +1303,6 @@ void TwoPunctures::Derivatives_AB3(int nvar, int n1, int n2, int n3, derivs v) } } } - free_dvector(p, 0, N); - free_dvector(dp, 0, N); - free_dvector(d2p, 0, N); - free_dvector(q, 0, N); - free_dvector(dq, 0, N); - free_dvector(r, 0, N); - free_dvector(dr, 0, N); - free_ivector(indx, 0, N); } /* --------------------------------------------------------------------------*/ void TwoPunctures::Newton(int const nvar, int const n1, int const n2, int const n3, @@ -1284,10 +1371,11 @@ void TwoPunctures::F_of_v(int nvar, int n1, int n2, int n3, derivs v, double *F, derivs U; double *sources; - values = dvector(0, nvar - 1); - allocate_derivs(&U, nvar); + values = ws_fov_values; + U = ws_fov_U; - sources = (double *)calloc(n1 * n2 * n3, sizeof(double)); + sources = ws_fov_sources; + memset(sources, 0, n1 * n2 * n3 * sizeof(double)); if (0) { double *s_x, *s_y, *s_z; @@ -1442,9 +1530,6 @@ void TwoPunctures::F_of_v(int nvar, int n1, int n2, int n3, derivs v, double *F, { fclose(debugfile); } - free(sources); - free_dvector(values, 0, nvar - 1); - free_derivs(&U, nvar); } /* --------------------------------------------------------------------------*/ double TwoPunctures::norm_inf(double const *F, int const ntotal) @@ -1850,11 +1935,12 @@ void TwoPunctures::J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, doubl Derivatives_AB3(nvar, n1, n2, n3, dv); + values = ws_jtdv_values; + dU = ws_jtdv_dU; + U = ws_jtdv_U; + for (i = 0; i < n1; i++) { - values = dvector(0, nvar - 1); - allocate_derivs(&dU, nvar); - allocate_derivs(&U, nvar); for (j = 0; j < n2; j++) { for (k = 0; k < n3; k++) @@ -1908,9 +1994,6 @@ void TwoPunctures::J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, doubl } } } - free_dvector(values, 0, nvar - 1); - free_derivs(&dU, nvar); - free_derivs(&U, nvar); } } /* --------------------------------------------------------------------------*/ @@ -1957,17 +2040,11 @@ void TwoPunctures::LineRelax_be(double *dv, { int j, m, Ic, Ip, Im, col, ivar; - double *diag = new double[n2]; - double *e = new double[n2 - 1]; /* above diagonal */ - double *f = new double[n2 - 1]; /* below diagonal */ - double *b = new double[n2]; /* rhs */ - double *x = new double[n2]; /* solution vector */ - - // gsl_vector *diag = gsl_vector_alloc(n2); - // gsl_vector *e = gsl_vector_alloc(n2-1); /* above diagonal */ - // gsl_vector *f = gsl_vector_alloc(n2-1); /* below diagonal */ - // gsl_vector *b = gsl_vector_alloc(n2); /* rhs */ - // gsl_vector *x = gsl_vector_alloc(n2); /* solution vector */ + double *diag = ws_diag_be; + double *e = ws_e_be; /* above diagonal */ + double *f = ws_f_be; /* below diagonal */ + double *b = ws_b_be; /* rhs */ + double *x = ws_x_be; /* solution vector */ for (ivar = 0; ivar < nvar; ivar++) { @@ -1977,62 +2054,35 @@ void TwoPunctures::LineRelax_be(double *dv, } diag[n2 - 1] = 0; - // gsl_vector_set_zero(diag); - // gsl_vector_set_zero(e); - // gsl_vector_set_zero(f); for (j = 0; j < n2; j++) { Ip = Index(ivar, i, j + 1, k, nvar, n1, n2, n3); Ic = Index(ivar, i, j, k, nvar, n1, n2, n3); Im = Index(ivar, i, j - 1, k, nvar, n1, n2, n3); b[j] = rhs[Ic]; - // gsl_vector_set(b,j,rhs[Ic]); for (m = 0; m < ncols[Ic]; m++) { col = cols[Ic][m]; if (col != Ip && col != Ic && col != Im) b[j] -= JFD[Ic][m] * dv[col]; - // *gsl_vector_ptr(b, j) -= JFD[Ic][m] * dv[col]; else { if (col == Im && j > 0) f[j - 1] = JFD[Ic][m]; - // gsl_vector_set(f,j-1,JFD[Ic][m]); if (col == Ic) diag[j] = JFD[Ic][m]; - // gsl_vector_set(diag,j,JFD[Ic][m]); if (col == Ip && j < n2 - 1) e[j] = JFD[Ic][m]; - // gsl_vector_set(e,j,JFD[Ic][m]); } } } - // A x = b - // A = ( d_0 e_0 0 0 ) - // ( f_0 d_1 e_1 0 ) - // ( 0 f_1 d_2 e_2 ) - // ( 0 0 f_2 d_3 ) - // ThomasAlgorithm(n2, f, diag, e, x, b); - // gsl_linalg_solve_tridiag(diag, e, f, b, x); for (j = 0; j < n2; j++) { Ic = Index(ivar, i, j, k, nvar, n1, n2, n3); dv[Ic] = x[j]; - // dv[Ic] = gsl_vector_get(x, j); } } - - delete[] diag; - delete[] e; - delete[] f; - delete[] b; - delete[] x; - // gsl_vector_free(diag); - // gsl_vector_free(e); - // gsl_vector_free(f); - // gsl_vector_free(b); - // gsl_vector_free(x); } /* --------------------------------------------------------------------------*/ void TwoPunctures::JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2, @@ -2049,8 +2099,8 @@ void TwoPunctures::JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2, ha, ga, ga2, hb, gb, gb2, hp, gp, gp2, gagb, gagp, gbgp; derivs dU, U; - allocate_derivs(&dU, nvar); - allocate_derivs(&U, nvar); + dU = ws_jfd_dU; + U = ws_jfd_U; if (k < 0) k = k + n3; @@ -2168,9 +2218,6 @@ void TwoPunctures::JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2, LinEquations(A, B, X, R, x, r, phi, y, z, dU, U, values); for (ivar = 0; ivar < nvar; ivar++) values[ivar] *= FAC; - - free_derivs(&dU, nvar); - free_derivs(&U, nvar); } #undef FAC /*-----------------------------------------------------------*/ @@ -2202,17 +2249,11 @@ void TwoPunctures::LineRelax_al(double *dv, { int i, m, Ic, Ip, Im, col, ivar; - double *diag = new double[n1]; - double *e = new double[n1 - 1]; /* above diagonal */ - double *f = new double[n1 - 1]; /* below diagonal */ - double *b = new double[n1]; /* rhs */ - double *x = new double[n1]; /* solution vector */ - - // gsl_vector *diag = gsl_vector_alloc(n1); - // gsl_vector *e = gsl_vector_alloc(n1-1); /* above diagonal */ - // gsl_vector *f = gsl_vector_alloc(n1-1); /* below diagonal */ - // gsl_vector *b = gsl_vector_alloc(n1); /* rhs */ - // gsl_vector *x = gsl_vector_alloc(n1); /* solution vector */ + double *diag = ws_diag_al; + double *e = ws_e_al; /* above diagonal */ + double *f = ws_f_al; /* below diagonal */ + double *b = ws_b_al; /* rhs */ + double *x = ws_x_al; /* solution vector */ for (ivar = 0; ivar < nvar; ivar++) { @@ -2222,57 +2263,35 @@ void TwoPunctures::LineRelax_al(double *dv, } diag[n1 - 1] = 0; - // gsl_vector_set_zero(diag); - // gsl_vector_set_zero(e); - // gsl_vector_set_zero(f); for (i = 0; i < n1; i++) { Ip = Index(ivar, i + 1, j, k, nvar, n1, n2, n3); Ic = Index(ivar, i, j, k, nvar, n1, n2, n3); Im = Index(ivar, i - 1, j, k, nvar, n1, n2, n3); b[i] = rhs[Ic]; - // gsl_vector_set(b,i,rhs[Ic]); for (m = 0; m < ncols[Ic]; m++) { col = cols[Ic][m]; if (col != Ip && col != Ic && col != Im) b[i] -= JFD[Ic][m] * dv[col]; - // *gsl_vector_ptr(b, i) -= JFD[Ic][m] * dv[col]; else { if (col == Im && i > 0) f[i - 1] = JFD[Ic][m]; - // gsl_vector_set(f,i-1,JFD[Ic][m]); if (col == Ic) diag[i] = JFD[Ic][m]; - // gsl_vector_set(diag,i,JFD[Ic][m]); if (col == Ip && i < n1 - 1) e[i] = JFD[Ic][m]; - // gsl_vector_set(e,i,JFD[Ic][m]); } } } ThomasAlgorithm(n1, f, diag, e, x, b); - // gsl_linalg_solve_tridiag(diag, e, f, b, x); for (i = 0; i < n1; i++) { Ic = Index(ivar, i, j, k, nvar, n1, n2, n3); dv[Ic] = x[i]; - // dv[Ic] = gsl_vector_get(x, i); } } - - delete[] diag; - delete[] e; - delete[] f; - delete[] b; - delete[] x; - - // gsl_vector_free(diag); - // gsl_vector_free(e); - // gsl_vector_free(f); - // gsl_vector_free(b); - // gsl_vector_free(x); } /* -------------------------------------------------------------------------*/ // a[N], b[N-1], c[N-1], x[N], q[N] @@ -2284,44 +2303,29 @@ void TwoPunctures::LineRelax_al(double *dv, //"Parallel Scientific Computing in C++ and MPI" P361 void TwoPunctures::ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q) { + // In-place Thomas algorithm: uses a[] as d workspace, b[] as l workspace. + // c[] is already u (above-diagonal). ws_thomas_y is pre-allocated workspace. int i; - double *l, *u, *d, *y; - l = new double[N - 1]; - u = new double[N - 1]; - d = new double[N]; - y = new double[N]; - - /* LU Decomposition */ - d[0] = a[0]; - u[0] = c[0]; + double *y = ws_thomas_y; + /* LU Decomposition (in-place: a becomes d, b becomes l) */ for (i = 0; i < N - 2; i++) { - l[i] = b[i] / d[i]; - d[i + 1] = a[i + 1] - l[i] * u[i]; - u[i + 1] = c[i + 1]; + b[i] = b[i] / a[i]; + a[i + 1] = a[i + 1] - b[i] * c[i]; } - - l[N - 2] = b[N - 2] / d[N - 2]; - d[N - 1] = a[N - 1] - l[N - 2] * u[N - 2]; + b[N - 2] = b[N - 2] / a[N - 2]; + a[N - 1] = a[N - 1] - b[N - 2] * c[N - 2]; /* Forward Substitution [L][y] = [q] */ y[0] = q[0]; for (i = 1; i < N; i++) - y[i] = q[i] - l[i - 1] * y[i - 1]; + y[i] = q[i] - b[i - 1] * y[i - 1]; /* Backward Substitution [U][x] = [y] */ - x[N - 1] = y[N - 1] / d[N - 1]; - + x[N - 1] = y[N - 1] / a[N - 1]; for (i = N - 2; i >= 0; i--) - x[i] = (y[i] - u[i] * x[i + 1]) / d[i]; - - delete[] l; - delete[] u; - delete[] d; - delete[] y; - - return; + x[i] = (y[i] - c[i] * x[i + 1]) / a[i]; } // --------------------------------------------------------------------------*/ // Calculates the value of v at an arbitrary position (x,y,z) if the spectral coefficients are know*/*/ diff --git a/AMSS_NCKU_source/TwoPunctures.h b/AMSS_NCKU_source/TwoPunctures.h index 22fb359..6233d59 100644 --- a/AMSS_NCKU_source/TwoPunctures.h +++ b/AMSS_NCKU_source/TwoPunctures.h @@ -42,6 +42,33 @@ private: int ntotal; + // Pre-allocated workspace buffers for hot-path allocation elimination + // LineRelax_be workspace (sized for n2) + double *ws_diag_be, *ws_e_be, *ws_f_be, *ws_b_be, *ws_x_be; + // LineRelax_al workspace (sized for n1) + double *ws_diag_al, *ws_e_al, *ws_f_al, *ws_b_al, *ws_x_al; + // ThomasAlgorithm workspace (sized for max(n1,n2)) + double *ws_thomas_y; + // JFD_times_dv workspace (sized for nvar) + double *ws_jfd_values; + derivs ws_jfd_dU, ws_jfd_U; + // chebft_Zeros workspace (sized for max(n1,n2,n3)+1) + double *ws_cheb_c; + // fourft workspace (sized for max(n1,n2,n3)/2+1 each) + double *ws_four_a, *ws_four_b; + // Derivatives_AB3 workspace + double *ws_deriv_p, *ws_deriv_dp, *ws_deriv_d2p; + double *ws_deriv_q, *ws_deriv_dq; + double *ws_deriv_r, *ws_deriv_dr; + int *ws_deriv_indx; + // F_of_v workspace + double *ws_fov_sources; + double *ws_fov_values; + derivs ws_fov_U; + // J_times_dv workspace + double *ws_jtdv_values; + derivs ws_jtdv_dU, ws_jtdv_U; + struct parameters { int nvar, n1, n2, n3;