From f5ed23d6872d5a6aa96016666928d2dd7bde349c Mon Sep 17 00:00:00 2001 From: ianchb Date: Sat, 7 Feb 2026 10:35:05 +0800 Subject: [PATCH] Revert "Eliminate hot-path heap allocations in TwoPunctures spectral solver" This reverts commit 09ffdb553d69f426584500969e63c868a19f638a. --- AMSS_NCKU_source/TwoPunctures.C | 276 ++++++++++++++++---------------- AMSS_NCKU_source/TwoPunctures.h | 27 ---- 2 files changed, 136 insertions(+), 167 deletions(-) diff --git a/AMSS_NCKU_source/TwoPunctures.C b/AMSS_NCKU_source/TwoPunctures.C index dbb424e..a5b0c85 100644 --- a/AMSS_NCKU_source/TwoPunctures.C +++ b/AMSS_NCKU_source/TwoPunctures.C @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -61,110 +60,13 @@ TwoPunctures::TwoPunctures(double mp, double mm, double b, F = dvector(0, ntotal - 1); allocate_derivs(&u, ntotal); allocate_derivs(&v, ntotal); - - // Allocate workspace buffers for hot-path allocation elimination - int N = maximum3(n1, n2, n3); - int maxn = maximum2(n1, n2); - - // LineRelax_be workspace (sized for n2) - ws_diag_be = new double[n2]; - ws_e_be = new double[n2 - 1]; - ws_f_be = new double[n2 - 1]; - ws_b_be = new double[n2]; - ws_x_be = new double[n2]; - - // LineRelax_al workspace (sized for n1) - ws_diag_al = new double[n1]; - ws_e_al = new double[n1 - 1]; - ws_f_al = new double[n1 - 1]; - ws_b_al = new double[n1]; - ws_x_al = new double[n1]; - - // ThomasAlgorithm workspace (sized for max(n1,n2)) - ws_thomas_y = new double[maxn]; - - // JFD_times_dv workspace (sized for nvar) - ws_jfd_values = dvector(0, nvar - 1); - allocate_derivs(&ws_jfd_dU, nvar); - allocate_derivs(&ws_jfd_U, nvar); - - // chebft_Zeros workspace (sized for N+1) - ws_cheb_c = dvector(0, N); - - // fourft workspace (sized for N/2+1 each) - ws_four_a = dvector(0, N / 2); - ws_four_b = dvector(0, N / 2); - - // Derivatives_AB3 workspace - ws_deriv_p = dvector(0, N); - ws_deriv_dp = dvector(0, N); - ws_deriv_d2p = dvector(0, N); - ws_deriv_q = dvector(0, N); - ws_deriv_dq = dvector(0, N); - ws_deriv_r = dvector(0, N); - ws_deriv_dr = dvector(0, N); - ws_deriv_indx = ivector(0, N); - - // F_of_v workspace - ws_fov_sources = new double[n1 * n2 * n3]; - ws_fov_values = dvector(0, nvar - 1); - allocate_derivs(&ws_fov_U, nvar); - - // J_times_dv workspace - ws_jtdv_values = dvector(0, nvar - 1); - allocate_derivs(&ws_jtdv_dU, nvar); - allocate_derivs(&ws_jtdv_U, nvar); } TwoPunctures::~TwoPunctures() { - int const nvar = 1, n1 = npoints_A, n2 = npoints_B, n3 = npoints_phi; - int N = maximum3(n1, n2, n3); - free_dvector(F, 0, ntotal - 1); free_derivs(&u, ntotal); free_derivs(&v, ntotal); - - // Free workspace buffers - delete[] ws_diag_be; - delete[] ws_e_be; - delete[] ws_f_be; - delete[] ws_b_be; - delete[] ws_x_be; - - delete[] ws_diag_al; - delete[] ws_e_al; - delete[] ws_f_al; - delete[] ws_b_al; - delete[] ws_x_al; - - delete[] ws_thomas_y; - - free_dvector(ws_jfd_values, 0, nvar - 1); - free_derivs(&ws_jfd_dU, nvar); - free_derivs(&ws_jfd_U, nvar); - - free_dvector(ws_cheb_c, 0, N); - - free_dvector(ws_four_a, 0, N / 2); - free_dvector(ws_four_b, 0, N / 2); - - free_dvector(ws_deriv_p, 0, N); - free_dvector(ws_deriv_dp, 0, N); - free_dvector(ws_deriv_d2p, 0, N); - free_dvector(ws_deriv_q, 0, N); - free_dvector(ws_deriv_dq, 0, N); - free_dvector(ws_deriv_r, 0, N); - free_dvector(ws_deriv_dr, 0, N); - free_ivector(ws_deriv_indx, 0, N); - - delete[] ws_fov_sources; - free_dvector(ws_fov_values, 0, nvar - 1); - free_derivs(&ws_fov_U, nvar); - - free_dvector(ws_jtdv_values, 0, nvar - 1); - free_derivs(&ws_jtdv_dU, nvar); - free_derivs(&ws_jtdv_U, nvar); } void TwoPunctures::Solve() @@ -753,7 +655,7 @@ void TwoPunctures::chebft_Zeros(double u[], int n, int inv) int k, j, isignum; double fac, sum, Pion, *c; - c = ws_cheb_c; + c = dvector(0, n); Pion = Pi / n; if (inv == 0) { @@ -784,6 +686,7 @@ void TwoPunctures::chebft_Zeros(double u[], int n, int inv) } for (j = 0; j < n; j++) u[j] = c[j]; + free_dvector(c, 0, n); } /* --------------------------------------------------------------------------*/ @@ -871,8 +774,8 @@ void TwoPunctures::fourft(double *u, int N, int inv) double x, x1, fac, Pi_fac, *a, *b; M = N / 2; - a = ws_four_a; - b = ws_four_b - 1; /* offset to match dvector(1,M) indexing */ + a = dvector(0, M); + b = dvector(1, M); /* Actually: b=vector(1,M-1) but this is problematic if M=1*/ fac = 1. / M; Pi_fac = Pi * fac; if (inv == 0) @@ -921,6 +824,8 @@ void TwoPunctures::fourft(double *u, int N, int inv) iy = -iy; } } + free_dvector(a, 0, M); + free_dvector(b, 1, M); } /* -----------------------------------------*/ @@ -1213,14 +1118,14 @@ void TwoPunctures::Derivatives_AB3(int nvar, int n1, int n2, int n3, derivs v) double *p, *dp, *d2p, *q, *dq, *r, *dr; N = maximum3(n1, n2, n3); - p = ws_deriv_p; - dp = ws_deriv_dp; - d2p = ws_deriv_d2p; - q = ws_deriv_q; - dq = ws_deriv_dq; - r = ws_deriv_r; - dr = ws_deriv_dr; - indx = ws_deriv_indx; + p = dvector(0, N); + dp = dvector(0, N); + d2p = dvector(0, N); + q = dvector(0, N); + dq = dvector(0, N); + r = dvector(0, N); + dr = dvector(0, N); + indx = ivector(0, N); for (ivar = 0; ivar < nvar; ivar++) { @@ -1303,6 +1208,14 @@ void TwoPunctures::Derivatives_AB3(int nvar, int n1, int n2, int n3, derivs v) } } } + free_dvector(p, 0, N); + free_dvector(dp, 0, N); + free_dvector(d2p, 0, N); + free_dvector(q, 0, N); + free_dvector(dq, 0, N); + free_dvector(r, 0, N); + free_dvector(dr, 0, N); + free_ivector(indx, 0, N); } /* --------------------------------------------------------------------------*/ void TwoPunctures::Newton(int const nvar, int const n1, int const n2, int const n3, @@ -1371,11 +1284,10 @@ void TwoPunctures::F_of_v(int nvar, int n1, int n2, int n3, derivs v, double *F, derivs U; double *sources; - values = ws_fov_values; - U = ws_fov_U; + values = dvector(0, nvar - 1); + allocate_derivs(&U, nvar); - sources = ws_fov_sources; - memset(sources, 0, n1 * n2 * n3 * sizeof(double)); + sources = (double *)calloc(n1 * n2 * n3, sizeof(double)); if (0) { double *s_x, *s_y, *s_z; @@ -1530,6 +1442,9 @@ void TwoPunctures::F_of_v(int nvar, int n1, int n2, int n3, derivs v, double *F, { fclose(debugfile); } + free(sources); + free_dvector(values, 0, nvar - 1); + free_derivs(&U, nvar); } /* --------------------------------------------------------------------------*/ double TwoPunctures::norm_inf(double const *F, int const ntotal) @@ -1935,12 +1850,11 @@ void TwoPunctures::J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, doubl Derivatives_AB3(nvar, n1, n2, n3, dv); - values = ws_jtdv_values; - dU = ws_jtdv_dU; - U = ws_jtdv_U; - for (i = 0; i < n1; i++) { + values = dvector(0, nvar - 1); + allocate_derivs(&dU, nvar); + allocate_derivs(&U, nvar); for (j = 0; j < n2; j++) { for (k = 0; k < n3; k++) @@ -1994,6 +1908,9 @@ void TwoPunctures::J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, doubl } } } + free_dvector(values, 0, nvar - 1); + free_derivs(&dU, nvar); + free_derivs(&U, nvar); } } /* --------------------------------------------------------------------------*/ @@ -2040,11 +1957,17 @@ void TwoPunctures::LineRelax_be(double *dv, { int j, m, Ic, Ip, Im, col, ivar; - double *diag = ws_diag_be; - double *e = ws_e_be; /* above diagonal */ - double *f = ws_f_be; /* below diagonal */ - double *b = ws_b_be; /* rhs */ - double *x = ws_x_be; /* solution vector */ + double *diag = new double[n2]; + double *e = new double[n2 - 1]; /* above diagonal */ + double *f = new double[n2 - 1]; /* below diagonal */ + double *b = new double[n2]; /* rhs */ + double *x = new double[n2]; /* solution vector */ + + // gsl_vector *diag = gsl_vector_alloc(n2); + // gsl_vector *e = gsl_vector_alloc(n2-1); /* above diagonal */ + // gsl_vector *f = gsl_vector_alloc(n2-1); /* below diagonal */ + // gsl_vector *b = gsl_vector_alloc(n2); /* rhs */ + // gsl_vector *x = gsl_vector_alloc(n2); /* solution vector */ for (ivar = 0; ivar < nvar; ivar++) { @@ -2054,35 +1977,62 @@ void TwoPunctures::LineRelax_be(double *dv, } diag[n2 - 1] = 0; + // gsl_vector_set_zero(diag); + // gsl_vector_set_zero(e); + // gsl_vector_set_zero(f); for (j = 0; j < n2; j++) { Ip = Index(ivar, i, j + 1, k, nvar, n1, n2, n3); Ic = Index(ivar, i, j, k, nvar, n1, n2, n3); Im = Index(ivar, i, j - 1, k, nvar, n1, n2, n3); b[j] = rhs[Ic]; + // gsl_vector_set(b,j,rhs[Ic]); for (m = 0; m < ncols[Ic]; m++) { col = cols[Ic][m]; if (col != Ip && col != Ic && col != Im) b[j] -= JFD[Ic][m] * dv[col]; + // *gsl_vector_ptr(b, j) -= JFD[Ic][m] * dv[col]; else { if (col == Im && j > 0) f[j - 1] = JFD[Ic][m]; + // gsl_vector_set(f,j-1,JFD[Ic][m]); if (col == Ic) diag[j] = JFD[Ic][m]; + // gsl_vector_set(diag,j,JFD[Ic][m]); if (col == Ip && j < n2 - 1) e[j] = JFD[Ic][m]; + // gsl_vector_set(e,j,JFD[Ic][m]); } } } + // A x = b + // A = ( d_0 e_0 0 0 ) + // ( f_0 d_1 e_1 0 ) + // ( 0 f_1 d_2 e_2 ) + // ( 0 0 f_2 d_3 ) + // ThomasAlgorithm(n2, f, diag, e, x, b); + // gsl_linalg_solve_tridiag(diag, e, f, b, x); for (j = 0; j < n2; j++) { Ic = Index(ivar, i, j, k, nvar, n1, n2, n3); dv[Ic] = x[j]; + // dv[Ic] = gsl_vector_get(x, j); } } + + delete[] diag; + delete[] e; + delete[] f; + delete[] b; + delete[] x; + // gsl_vector_free(diag); + // gsl_vector_free(e); + // gsl_vector_free(f); + // gsl_vector_free(b); + // gsl_vector_free(x); } /* --------------------------------------------------------------------------*/ void TwoPunctures::JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2, @@ -2099,8 +2049,8 @@ void TwoPunctures::JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2, ha, ga, ga2, hb, gb, gb2, hp, gp, gp2, gagb, gagp, gbgp; derivs dU, U; - dU = ws_jfd_dU; - U = ws_jfd_U; + allocate_derivs(&dU, nvar); + allocate_derivs(&U, nvar); if (k < 0) k = k + n3; @@ -2218,6 +2168,9 @@ void TwoPunctures::JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2, LinEquations(A, B, X, R, x, r, phi, y, z, dU, U, values); for (ivar = 0; ivar < nvar; ivar++) values[ivar] *= FAC; + + free_derivs(&dU, nvar); + free_derivs(&U, nvar); } #undef FAC /*-----------------------------------------------------------*/ @@ -2249,11 +2202,17 @@ void TwoPunctures::LineRelax_al(double *dv, { int i, m, Ic, Ip, Im, col, ivar; - double *diag = ws_diag_al; - double *e = ws_e_al; /* above diagonal */ - double *f = ws_f_al; /* below diagonal */ - double *b = ws_b_al; /* rhs */ - double *x = ws_x_al; /* solution vector */ + double *diag = new double[n1]; + double *e = new double[n1 - 1]; /* above diagonal */ + double *f = new double[n1 - 1]; /* below diagonal */ + double *b = new double[n1]; /* rhs */ + double *x = new double[n1]; /* solution vector */ + + // gsl_vector *diag = gsl_vector_alloc(n1); + // gsl_vector *e = gsl_vector_alloc(n1-1); /* above diagonal */ + // gsl_vector *f = gsl_vector_alloc(n1-1); /* below diagonal */ + // gsl_vector *b = gsl_vector_alloc(n1); /* rhs */ + // gsl_vector *x = gsl_vector_alloc(n1); /* solution vector */ for (ivar = 0; ivar < nvar; ivar++) { @@ -2263,35 +2222,57 @@ void TwoPunctures::LineRelax_al(double *dv, } diag[n1 - 1] = 0; + // gsl_vector_set_zero(diag); + // gsl_vector_set_zero(e); + // gsl_vector_set_zero(f); for (i = 0; i < n1; i++) { Ip = Index(ivar, i + 1, j, k, nvar, n1, n2, n3); Ic = Index(ivar, i, j, k, nvar, n1, n2, n3); Im = Index(ivar, i - 1, j, k, nvar, n1, n2, n3); b[i] = rhs[Ic]; + // gsl_vector_set(b,i,rhs[Ic]); for (m = 0; m < ncols[Ic]; m++) { col = cols[Ic][m]; if (col != Ip && col != Ic && col != Im) b[i] -= JFD[Ic][m] * dv[col]; + // *gsl_vector_ptr(b, i) -= JFD[Ic][m] * dv[col]; else { if (col == Im && i > 0) f[i - 1] = JFD[Ic][m]; + // gsl_vector_set(f,i-1,JFD[Ic][m]); if (col == Ic) diag[i] = JFD[Ic][m]; + // gsl_vector_set(diag,i,JFD[Ic][m]); if (col == Ip && i < n1 - 1) e[i] = JFD[Ic][m]; + // gsl_vector_set(e,i,JFD[Ic][m]); } } } ThomasAlgorithm(n1, f, diag, e, x, b); + // gsl_linalg_solve_tridiag(diag, e, f, b, x); for (i = 0; i < n1; i++) { Ic = Index(ivar, i, j, k, nvar, n1, n2, n3); dv[Ic] = x[i]; + // dv[Ic] = gsl_vector_get(x, i); } } + + delete[] diag; + delete[] e; + delete[] f; + delete[] b; + delete[] x; + + // gsl_vector_free(diag); + // gsl_vector_free(e); + // gsl_vector_free(f); + // gsl_vector_free(b); + // gsl_vector_free(x); } /* -------------------------------------------------------------------------*/ // a[N], b[N-1], c[N-1], x[N], q[N] @@ -2303,29 +2284,44 @@ void TwoPunctures::LineRelax_al(double *dv, //"Parallel Scientific Computing in C++ and MPI" P361 void TwoPunctures::ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q) { - // In-place Thomas algorithm: uses a[] as d workspace, b[] as l workspace. - // c[] is already u (above-diagonal). ws_thomas_y is pre-allocated workspace. int i; - double *y = ws_thomas_y; + double *l, *u, *d, *y; + l = new double[N - 1]; + u = new double[N - 1]; + d = new double[N]; + y = new double[N]; + + /* LU Decomposition */ + d[0] = a[0]; + u[0] = c[0]; - /* LU Decomposition (in-place: a becomes d, b becomes l) */ for (i = 0; i < N - 2; i++) { - b[i] = b[i] / a[i]; - a[i + 1] = a[i + 1] - b[i] * c[i]; + l[i] = b[i] / d[i]; + d[i + 1] = a[i + 1] - l[i] * u[i]; + u[i + 1] = c[i + 1]; } - b[N - 2] = b[N - 2] / a[N - 2]; - a[N - 1] = a[N - 1] - b[N - 2] * c[N - 2]; + + l[N - 2] = b[N - 2] / d[N - 2]; + d[N - 1] = a[N - 1] - l[N - 2] * u[N - 2]; /* Forward Substitution [L][y] = [q] */ y[0] = q[0]; for (i = 1; i < N; i++) - y[i] = q[i] - b[i - 1] * y[i - 1]; + y[i] = q[i] - l[i - 1] * y[i - 1]; /* Backward Substitution [U][x] = [y] */ - x[N - 1] = y[N - 1] / a[N - 1]; + x[N - 1] = y[N - 1] / d[N - 1]; + for (i = N - 2; i >= 0; i--) - x[i] = (y[i] - c[i] * x[i + 1]) / a[i]; + x[i] = (y[i] - u[i] * x[i + 1]) / d[i]; + + delete[] l; + delete[] u; + delete[] d; + delete[] y; + + return; } // --------------------------------------------------------------------------*/ // Calculates the value of v at an arbitrary position (x,y,z) if the spectral coefficients are know*/*/ diff --git a/AMSS_NCKU_source/TwoPunctures.h b/AMSS_NCKU_source/TwoPunctures.h index 6233d59..22fb359 100644 --- a/AMSS_NCKU_source/TwoPunctures.h +++ b/AMSS_NCKU_source/TwoPunctures.h @@ -42,33 +42,6 @@ private: int ntotal; - // Pre-allocated workspace buffers for hot-path allocation elimination - // LineRelax_be workspace (sized for n2) - double *ws_diag_be, *ws_e_be, *ws_f_be, *ws_b_be, *ws_x_be; - // LineRelax_al workspace (sized for n1) - double *ws_diag_al, *ws_e_al, *ws_f_al, *ws_b_al, *ws_x_al; - // ThomasAlgorithm workspace (sized for max(n1,n2)) - double *ws_thomas_y; - // JFD_times_dv workspace (sized for nvar) - double *ws_jfd_values; - derivs ws_jfd_dU, ws_jfd_U; - // chebft_Zeros workspace (sized for max(n1,n2,n3)+1) - double *ws_cheb_c; - // fourft workspace (sized for max(n1,n2,n3)/2+1 each) - double *ws_four_a, *ws_four_b; - // Derivatives_AB3 workspace - double *ws_deriv_p, *ws_deriv_dp, *ws_deriv_d2p; - double *ws_deriv_q, *ws_deriv_dq; - double *ws_deriv_r, *ws_deriv_dr; - int *ws_deriv_indx; - // F_of_v workspace - double *ws_fov_sources; - double *ws_fov_values; - derivs ws_fov_U; - // J_times_dv workspace - double *ws_jtdv_values; - derivs ws_jtdv_dU, ws_jtdv_U; - struct parameters { int nvar, n1, n2, n3;