compute div_beta on-the-fly to remove temp array

This commit is contained in:
2026-03-02 12:12:58 +08:00
parent a893b4007c
commit 5839755c2f

View File

@@ -201,7 +201,7 @@ enum {
S_Gamyx, S_Gamyy_t, S_Gamyz_t, S_Gamyx, S_Gamyy_t, S_Gamyz_t,
S_Gamzx, S_Gamzy, S_Gamzz_t, S_Gamzx, S_Gamzy, S_Gamzz_t,
S_Kx, S_Ky, S_Kz, S_Kx, S_Ky, S_Kz,
S_div_beta, S_S_arr, S_f_arr, S_S_arr, S_f_arr,
S_fxx, S_fxy, S_fxz, S_fyy, S_fyz, S_fzz, S_fxx, S_fxy, S_fxz, S_fyy, S_fyz, S_fzz,
S_Gamxa, S_Gamya, S_Gamza, S_Gamxa, S_Gamya, S_Gamza,
S_gupxx, S_gupxy, S_gupxz, S_gupxx, S_gupxy, S_gupxz,
@@ -959,7 +959,7 @@ __global__ void kern_phase1_prep(
} }
} }
/* Phase 2a: chi_rhs, gij_rhs, div_beta */ /* Phase 2a: chi_rhs, gij_rhs */
__global__ void kern_phase2_metric_rhs( __global__ void kern_phase2_metric_rhs(
const double* __restrict__ alpn1, const double* __restrict__ chin1, const double* __restrict__ alpn1, const double* __restrict__ chin1,
const double* __restrict__ gxx, const double* __restrict__ gxy, const double* __restrict__ gxx, const double* __restrict__ gxy,
@@ -974,7 +974,6 @@ __global__ void kern_phase2_metric_rhs(
const double* __restrict__ betayy, const double* __restrict__ betayz, const double* __restrict__ betayy, const double* __restrict__ betayz,
const double* __restrict__ betazx, const double* __restrict__ betazy, const double* __restrict__ betazx, const double* __restrict__ betazy,
const double* __restrict__ betazz, const double* __restrict__ betazz,
double* __restrict__ div_beta,
double* __restrict__ chi_rhs, double* __restrict__ gxx_rhs, double* __restrict__ chi_rhs, double* __restrict__ gxx_rhs,
double* __restrict__ gyy_rhs, double* __restrict__ gzz_rhs, double* __restrict__ gyy_rhs, double* __restrict__ gzz_rhs,
double* __restrict__ gxy_rhs, double* __restrict__ gyz_rhs, double* __restrict__ gxy_rhs, double* __restrict__ gyz_rhs,
@@ -983,7 +982,6 @@ __global__ void kern_phase2_metric_rhs(
const double F2o3 = 2.0/3.0, F1o3 = 1.0/3.0, TWO = 2.0; const double F2o3 = 2.0/3.0, F1o3 = 1.0/3.0, TWO = 2.0;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) {
double db = betaxx[i] + betayy[i] + betazz[i]; double db = betaxx[i] + betayy[i] + betazz[i];
div_beta[i] = db;
chi_rhs[i] = F2o3 * chin1[i] * (alpn1[i] * trK[i] - db); chi_rhs[i] = F2o3 * chin1[i] * (alpn1[i] * trK[i] - db);
gxx_rhs[i] = -TWO*alpn1[i]*Axx[i] - F2o3*gxx[i]*db gxx_rhs[i] = -TWO*alpn1[i]*Axx[i] - F2o3*gxx[i]*db
+ TWO*(gxx[i]*betaxx[i] + gxy[i]*betayx[i] + gxz[i]*betazx[i]); + TWO*(gxx[i]*betaxx[i] + gxy[i]*betayx[i] + gxz[i]*betazx[i]);
@@ -1244,7 +1242,6 @@ void kern_phase8_gamma_rhs_part2(
const double* __restrict__ gupxx, const double* __restrict__ gupxy, const double* __restrict__ gupxx, const double* __restrict__ gupxy,
const double* __restrict__ gupxz, const double* __restrict__ gupyy, const double* __restrict__ gupxz, const double* __restrict__ gupyy,
const double* __restrict__ gupyz, const double* __restrict__ gupzz, const double* __restrict__ gupyz, const double* __restrict__ gupzz,
const double* __restrict__ div_beta,
/* fdderivs(betax) -> gxxx,gxyx,gxzx,gyyx,gyzx,gzzx */ /* fdderivs(betax) -> gxxx,gxyx,gxzx,gyyx,gyzx,gzzx */
const double* __restrict__ bxx_xx, const double* __restrict__ bxx_xy, const double* __restrict__ bxx_xx, const double* __restrict__ bxx_xy,
const double* __restrict__ bxx_xz, const double* __restrict__ bxx_yy, const double* __restrict__ bxx_xz, const double* __restrict__ bxx_yy,
@@ -1303,7 +1300,7 @@ void kern_phase8_gamma_rhs_part2(
double Ga_z = uxx*Gzxx[i]+uyy*Gzyy[i]+uzz*Gzzz[i] double Ga_z = uxx*Gzxx[i]+uyy*Gzyy[i]+uzz*Gzzz[i]
+TWO*(uxy*Gzxy[i]+uxz*Gzxz[i]+uyz*Gzyz[i]); +TWO*(uxy*Gzxy[i]+uxz*Gzxz[i]+uyz*Gzyz[i]);
Gamxa_out[i]=Ga_x; Gamya_out[i]=Ga_y; Gamza_out[i]=Ga_z; Gamxa_out[i]=Ga_x; Gamya_out[i]=Ga_y; Gamza_out[i]=Ga_z;
double db = div_beta[i]; double db = betaxx[i] + betayy[i] + betazz[i];
Gamx_rhs[i] += F2o3*Ga_x*db Gamx_rhs[i] += F2o3*Ga_x*db
- Ga_x*betaxx[i] - Ga_y*betaxy[i] - Ga_z*betaxz[i] - Ga_x*betaxx[i] - Ga_y*betaxy[i] - Ga_z*betaxz[i]
+ F1o3*(uxx*fxx_v+uxy*fxy_v+uxz*fxz_v) + F1o3*(uxx*fxx_v+uxy*fxy_v+uxz*fxz_v)
@@ -1738,7 +1735,6 @@ void kern_phase15_trK_Aij_gauge(
const double* __restrict__ Ayz, const double* __restrict__ Azz, const double* __restrict__ Ayz, const double* __restrict__ Azz,
const double* __restrict__ Lapx, const double* __restrict__ Lapy, const double* __restrict__ Lapx, const double* __restrict__ Lapy,
const double* __restrict__ Lapz, const double* __restrict__ Lapz,
const double* __restrict__ div_beta,
const double* __restrict__ betaxx, const double* __restrict__ betaxy, const double* __restrict__ betaxx, const double* __restrict__ betaxy,
const double* __restrict__ betaxz, const double* __restrict__ betayx, const double* __restrict__ betaxz, const double* __restrict__ betayx,
const double* __restrict__ betayy, const double* __restrict__ betayz, const double* __restrict__ betayy, const double* __restrict__ betayz,
@@ -1857,7 +1853,7 @@ void kern_phase15_trK_Aij_gauge(
+uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i]))); +uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i])));
double trK_v = trK[i]; double trK_v = trK[i];
double db = div_beta[i]; double db = betaxx[i] + betayy[i] + betazz[i];
/* trK_rhs step 1: store D^iD_i alpha * chin1 */ /* trK_rhs step 1: store D^iD_i alpha * chin1 */
trK_rhs[i] = c1 * DDA; trK_rhs[i] = c1 * DDA;
@@ -2207,7 +2203,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
D(S_betaxx), D(S_betaxy), D(S_betaxz), D(S_betaxx), D(S_betaxy), D(S_betaxz),
D(S_betayx), D(S_betayy), D(S_betayz), D(S_betayx), D(S_betayy), D(S_betayz),
D(S_betazx), D(S_betazy), D(S_betazz), D(S_betazx), D(S_betazy), D(S_betazz),
D(S_div_beta),
D(S_chi_rhs), D(S_gxx_rhs), D(S_gyy_rhs), D(S_gzz_rhs), D(S_chi_rhs), D(S_gxx_rhs), D(S_gyy_rhs), D(S_gzz_rhs),
D(S_gxy_rhs), D(S_gyz_rhs), D(S_gxz_rhs)); D(S_gxy_rhs), D(S_gyz_rhs), D(S_gxz_rhs));
@@ -2283,7 +2278,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
kern_phase8_gamma_rhs_part2<<<grid(all),BLK>>>( kern_phase8_gamma_rhs_part2<<<grid(all),BLK>>>(
D(S_gupxx), D(S_gupxy), D(S_gupxz), D(S_gupxx), D(S_gupxy), D(S_gupxz),
D(S_gupyy), D(S_gupyz), D(S_gupzz), D(S_gupyy), D(S_gupyz), D(S_gupzz),
D(S_div_beta),
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz), D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
@@ -2435,7 +2429,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
D(S_trK), D(S_trK),
D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz), D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz),
D(S_Lapx), D(S_Lapy), D(S_Lapz), D(S_Lapx), D(S_Lapy), D(S_Lapz),
D(S_div_beta),
D(S_betaxx), D(S_betaxy), D(S_betaxz), D(S_betaxx), D(S_betaxy), D(S_betaxz),
D(S_betayx), D(S_betayy), D(S_betayz), D(S_betayx), D(S_betayy), D(S_betayz),
D(S_betazx), D(S_betazy), D(S_betazz), D(S_betazx), D(S_betazy), D(S_betazz),