Fuse Ricci assembly and optimize trK/Aij gauge kernels

This commit is contained in:
2026-04-14 19:20:12 +08:00
parent af344bf1e5
commit 726d743376

View File

@@ -3368,6 +3368,219 @@ void kern_phase11_ricci_offdiag(
}
}
/* Phase 11: fused Ricci assembly (diag + off-diag) */
__global__ __launch_bounds__(128, 2)
void kern_phase11_ricci_fused(
const double* __restrict__ gxx, const double* __restrict__ gxy,
const double* __restrict__ gxz, const double* __restrict__ gyy,
const double* __restrict__ gyz, const double* __restrict__ gzz,
const double* __restrict__ gupxx, const double* __restrict__ gupxy,
const double* __restrict__ gupxz, const double* __restrict__ gupyy,
const double* __restrict__ gupyz, const double* __restrict__ gupzz,
const double* __restrict__ Gamxa, const double* __restrict__ Gamya,
const double* __restrict__ Gamza,
const double* __restrict__ Gamxx, const double* __restrict__ Gamxy,
const double* __restrict__ Gamxz,
const double* __restrict__ Gamyx, const double* __restrict__ Gamyy_d,
const double* __restrict__ Gamyz_d,
const double* __restrict__ Gamzx, const double* __restrict__ Gamzy,
const double* __restrict__ Gamzz_d,
const double* __restrict__ Gxxx, const double* __restrict__ Gxxy,
const double* __restrict__ Gxxz, const double* __restrict__ Gxyy,
const double* __restrict__ Gxyz, const double* __restrict__ Gxzz,
const double* __restrict__ Gyxx, const double* __restrict__ Gyxy,
const double* __restrict__ Gyxz, const double* __restrict__ Gyyy,
const double* __restrict__ Gyyz, const double* __restrict__ Gyzz,
const double* __restrict__ Gzxx, const double* __restrict__ Gzxy,
const double* __restrict__ Gzxz, const double* __restrict__ Gzyy,
const double* __restrict__ Gzyz, const double* __restrict__ Gzzz,
const double* __restrict__ lxxx, const double* __restrict__ lxyx,
const double* __restrict__ lxzx, const double* __restrict__ lyyx,
const double* __restrict__ lyzx, const double* __restrict__ lzzx,
const double* __restrict__ lxxy, const double* __restrict__ lxyy,
const double* __restrict__ lxzy, const double* __restrict__ lyyy,
const double* __restrict__ lyzy, const double* __restrict__ lzzy,
const double* __restrict__ lxxz, const double* __restrict__ lxyz,
const double* __restrict__ lxzz, const double* __restrict__ lyyz,
const double* __restrict__ lyzz, const double* __restrict__ lzzz,
double* __restrict__ Rxx, double* __restrict__ Rxy, double* __restrict__ Rxz,
double* __restrict__ Ryy, double* __restrict__ Ryz, double* __restrict__ Rzz)
{
const double H = 0.5, TWO = 2.0;
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < d_gp.all; i += blockDim.x * gridDim.x) {
double uxx = gupxx[i], uxy = gupxy[i], uxz = gupxz[i];
double uyy = gupyy[i], uyz = gupyz[i], uzz = gupzz[i];
Rxx[i] = -H*Rxx[i]
+ gxx[i]*Gamxx[i]+gxy[i]*Gamyx[i]+gxz[i]*Gamzx[i]
+ Gamxa[i]*lxxx[i]+Gamya[i]*lxyx[i]+Gamza[i]*lxzx[i]
+ uxx*(TWO*(Gxxx[i]*lxxx[i]+Gyxx[i]*lxyx[i]+Gzxx[i]*lxzx[i])
+(Gxxx[i]*lxxx[i]+Gyxx[i]*lxxy[i]+Gzxx[i]*lxxz[i]))
+ uxy*(TWO*(Gxxx[i]*lxyx[i]+Gyxx[i]*lyyx[i]+Gzxx[i]*lyzx[i]
+Gxxy[i]*lxxx[i]+Gyxy[i]*lxyx[i]+Gzxy[i]*lxzx[i])
+(Gxxy[i]*lxxx[i]+Gyxy[i]*lxxy[i]+Gzxy[i]*lxxz[i])
+(Gxxx[i]*lxyx[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxyz[i]))
+ uxz*(TWO*(Gxxx[i]*lxzx[i]+Gyxx[i]*lyzx[i]+Gzxx[i]*lzzx[i]
+Gxxz[i]*lxxx[i]+Gyxz[i]*lxyx[i]+Gzxz[i]*lxzx[i])
+(Gxxz[i]*lxxx[i]+Gyxz[i]*lxxy[i]+Gzxz[i]*lxxz[i])
+(Gxxx[i]*lxzx[i]+Gyxx[i]*lxzy[i]+Gzxx[i]*lxzz[i]))
+ uyy*(TWO*(Gxxy[i]*lxyx[i]+Gyxy[i]*lyyx[i]+Gzxy[i]*lyzx[i])
+(Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i]))
+ uyz*(TWO*(Gxxy[i]*lxzx[i]+Gyxy[i]*lyzx[i]+Gzxy[i]*lzzx[i]
+Gxxz[i]*lxyx[i]+Gyxz[i]*lyyx[i]+Gzxz[i]*lyzx[i])
+(Gxxz[i]*lxyx[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxyz[i])
+(Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i]))
+ uzz*(TWO*(Gxxz[i]*lxzx[i]+Gyxz[i]*lyzx[i]+Gzxz[i]*lzzx[i])
+(Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i]));
Ryy[i] = -H*Ryy[i]
+ gxy[i]*Gamxy[i]+gyy[i]*Gamyy_d[i]+gyz[i]*Gamzy[i]
+ Gamxa[i]*lxyy[i]+Gamya[i]*lyyy[i]+Gamza[i]*lyzy[i]
+ uxx*(TWO*(Gxxy[i]*lxxy[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxzy[i])
+(Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i]))
+ uxy*(TWO*(Gxxy[i]*lxyy[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyzy[i]
+Gxyy[i]*lxxy[i]+Gyyy[i]*lxyy[i]+Gzyy[i]*lxzy[i])
+(Gxyy[i]*lxyx[i]+Gyyy[i]*lxyy[i]+Gzyy[i]*lxyz[i])
+(Gxxy[i]*lyyx[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyyz[i]))
+ uxz*(TWO*(Gxxy[i]*lxzy[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lzzy[i]
+Gxyz[i]*lxxy[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxzy[i])
+(Gxyz[i]*lxyx[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxyz[i])
+(Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i]))
+ uyy*(TWO*(Gxyy[i]*lxyy[i]+Gyyy[i]*lyyy[i]+Gzyy[i]*lyzy[i])
+(Gxyy[i]*lyyx[i]+Gyyy[i]*lyyy[i]+Gzyy[i]*lyyz[i]))
+ uyz*(TWO*(Gxyy[i]*lxzy[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lzzy[i]
+Gxyz[i]*lxyy[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyzy[i])
+(Gxyz[i]*lyyx[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyyz[i])
+(Gxyy[i]*lyzx[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lyzz[i]))
+ uzz*(TWO*(Gxyz[i]*lxzy[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lzzy[i])
+(Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i]));
Rzz[i] = -H*Rzz[i]
+ gxz[i]*Gamxz[i]+gyz[i]*Gamyz_d[i]+gzz[i]*Gamzz_d[i]
+ Gamxa[i]*lxzz[i]+Gamya[i]*lyzz[i]+Gamza[i]*lzzz[i]
+ uxx*(TWO*(Gxxz[i]*lxxz[i]+Gyxz[i]*lxyz[i]+Gzxz[i]*lxzz[i])
+(Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i]))
+ uxy*(TWO*(Gxxz[i]*lxyz[i]+Gyxz[i]*lyyz[i]+Gzxz[i]*lyzz[i]
+Gxyz[i]*lxxz[i]+Gyyz[i]*lxyz[i]+Gzyz[i]*lxzz[i])
+(Gxyz[i]*lxzx[i]+Gyyz[i]*lxzy[i]+Gzyz[i]*lxzz[i])
+(Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i]))
+ uxz*(TWO*(Gxxz[i]*lxzz[i]+Gyxz[i]*lyzz[i]+Gzxz[i]*lzzz[i]
+Gxzz[i]*lxxz[i]+Gyzz[i]*lxyz[i]+Gzzz[i]*lxzz[i])
+(Gxzz[i]*lxzx[i]+Gyzz[i]*lxzy[i]+Gzzz[i]*lxzz[i])
+(Gxxz[i]*lzzx[i]+Gyxz[i]*lzzy[i]+Gzxz[i]*lzzz[i]))
+ uyy*(TWO*(Gxyz[i]*lxyz[i]+Gyyz[i]*lyyz[i]+Gzyz[i]*lyzz[i])
+(Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i]))
+ uyz*(TWO*(Gxyz[i]*lxzz[i]+Gyyz[i]*lyzz[i]+Gzyz[i]*lzzz[i]
+Gxzz[i]*lxyz[i]+Gyzz[i]*lyyz[i]+Gzzz[i]*lyzz[i])
+(Gxzz[i]*lyzx[i]+Gyzz[i]*lyzy[i]+Gzzz[i]*lyzz[i])
+(Gxyz[i]*lzzx[i]+Gyyz[i]*lzzy[i]+Gzyz[i]*lzzz[i]))
+ uzz*(TWO*(Gxzz[i]*lxzz[i]+Gyzz[i]*lyzz[i]+Gzzz[i]*lzzz[i])
+(Gxzz[i]*lzzx[i]+Gyzz[i]*lzzy[i]+Gzzz[i]*lzzz[i]));
Rxy[i] = H*(
-Rxy[i]
+gxx[i]*Gamxy[i]+gxy[i]*Gamyy_d[i]+gxz[i]*Gamzy[i]
+gxy[i]*Gamxx[i]+gyy[i]*Gamyx[i]+gyz[i]*Gamzx[i]
+Gamxa[i]*lxyx[i]+Gamya[i]*lyyx[i]+Gamza[i]*lyzx[i]
+Gamxa[i]*lxxy[i]+Gamya[i]*lxyy[i]+Gamza[i]*lxzy[i])
+uxx*(Gxxx[i]*lxxy[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxzy[i]
+Gxxy[i]*lxxx[i]+Gyxy[i]*lxyx[i]+Gzxy[i]*lxzx[i]
+Gxxx[i]*lxyx[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxyz[i])
+uxy*(Gxxx[i]*lxyy[i]+Gyxx[i]*lyyy[i]+Gzxx[i]*lyzy[i]
+Gxxy[i]*lxyx[i]+Gyxy[i]*lyyx[i]+Gzxy[i]*lyzx[i]
+Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i]
+Gxxy[i]*lxxy[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxzy[i]
+Gxyy[i]*lxxx[i]+Gyyy[i]*lxyx[i]+Gzyy[i]*lxzx[i]
+Gxxx[i]*lyyx[i]+Gyxx[i]*lyyy[i]+Gzxx[i]*lyyz[i])
+uxz*(Gxxx[i]*lxzy[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lzzy[i]
+Gxxy[i]*lxzx[i]+Gyxy[i]*lyzx[i]+Gzxy[i]*lzzx[i]
+Gxxz[i]*lxyx[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxyz[i]
+Gxxz[i]*lxxy[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxzy[i]
+Gxyz[i]*lxxx[i]+Gyyz[i]*lxyx[i]+Gzyz[i]*lxzx[i]
+Gxxx[i]*lyzx[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lyzz[i])
+uyy*(Gxxy[i]*lxyy[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyzy[i]
+Gxyy[i]*lxyx[i]+Gyyy[i]*lyyx[i]+Gzyy[i]*lyzx[i]
+Gxxy[i]*lyyx[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyyz[i])
+uyz*(Gxxy[i]*lxzy[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lzzy[i]
+Gxyy[i]*lxzx[i]+Gyyy[i]*lyzx[i]+Gzyy[i]*lzzx[i]
+Gxxz[i]*lyyx[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyyz[i]
+Gxxz[i]*lxyy[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyzy[i]
+Gxyz[i]*lxyx[i]+Gyyz[i]*lyyx[i]+Gzyz[i]*lyzx[i]
+Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i])
+uzz*(Gxxz[i]*lxzy[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lzzy[i]
+Gxyz[i]*lxzx[i]+Gyyz[i]*lyzx[i]+Gzyz[i]*lzzx[i]
+Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i]);
Rxz[i] = H*(
-Rxz[i]
+gxx[i]*Gamxz[i]+gxy[i]*Gamyz_d[i]+gxz[i]*Gamzz_d[i]
+gxz[i]*Gamxx[i]+gyz[i]*Gamyx[i]+gzz[i]*Gamzx[i]
+Gamxa[i]*lxzx[i]+Gamya[i]*lyzx[i]+Gamza[i]*lzzx[i]
+Gamxa[i]*lxxz[i]+Gamya[i]*lxyz[i]+Gamza[i]*lxzz[i])
+uxx*(Gxxx[i]*lxxz[i]+Gyxx[i]*lxyz[i]+Gzxx[i]*lxzz[i]
+Gxxz[i]*lxxx[i]+Gyxz[i]*lxyx[i]+Gzxz[i]*lxzx[i]
+Gxxx[i]*lxzx[i]+Gyxx[i]*lxzy[i]+Gzxx[i]*lxzz[i])
+uxy*(Gxxx[i]*lxyz[i]+Gyxx[i]*lyyz[i]+Gzxx[i]*lyzz[i]
+Gxxz[i]*lxyx[i]+Gyxz[i]*lyyx[i]+Gzxz[i]*lyzx[i]
+Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i]
+Gxxy[i]*lxxz[i]+Gyxy[i]*lxyz[i]+Gzxy[i]*lxzz[i]
+Gxyz[i]*lxxx[i]+Gyyz[i]*lxyx[i]+Gzyz[i]*lxzx[i]
+Gxxx[i]*lyzx[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lyzz[i])
+uxz*(Gxxx[i]*lxzz[i]+Gyxx[i]*lyzz[i]+Gzxx[i]*lzzz[i]
+Gxxz[i]*lxzx[i]+Gyxz[i]*lyzx[i]+Gzxz[i]*lzzx[i]
+Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i]
+Gxxz[i]*lxxz[i]+Gyxz[i]*lxyz[i]+Gzxz[i]*lxzz[i]
+Gxzz[i]*lxxx[i]+Gyzz[i]*lxyx[i]+Gzzz[i]*lxzx[i]
+Gxxx[i]*lzzx[i]+Gyxx[i]*lzzy[i]+Gzxx[i]*lzzz[i])
+uyy*(Gxxy[i]*lxyz[i]+Gyxy[i]*lyyz[i]+Gzxy[i]*lyzz[i]
+Gxyz[i]*lxyx[i]+Gyyz[i]*lyyx[i]+Gzyz[i]*lyzx[i]
+Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i])
+uyz*(Gxxy[i]*lxzz[i]+Gyxy[i]*lyzz[i]+Gzxy[i]*lzzz[i]
+Gxyz[i]*lxzx[i]+Gyyz[i]*lyzx[i]+Gzyz[i]*lzzx[i]
+Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i]
+Gxxz[i]*lxyz[i]+Gyxz[i]*lyyz[i]+Gzxz[i]*lyzz[i]
+Gxzz[i]*lxyx[i]+Gyzz[i]*lyyx[i]+Gzzz[i]*lyzx[i]
+Gxxy[i]*lzzx[i]+Gyxy[i]*lzzy[i]+Gzxy[i]*lzzz[i])
+uzz*(Gxxz[i]*lxzz[i]+Gyxz[i]*lyzz[i]+Gzxz[i]*lzzz[i]
+Gxzz[i]*lxzx[i]+Gyzz[i]*lyzx[i]+Gzzz[i]*lzzx[i]
+Gxxz[i]*lzzx[i]+Gyxz[i]*lzzy[i]+Gzxz[i]*lzzz[i]);
Ryz[i] = H*(
-Ryz[i]
+gxy[i]*Gamxz[i]+gyy[i]*Gamyz_d[i]+gyz[i]*Gamzz_d[i]
+gxz[i]*Gamxy[i]+gyz[i]*Gamyy_d[i]+gzz[i]*Gamzy[i]
+Gamxa[i]*lxzy[i]+Gamya[i]*lyzy[i]+Gamza[i]*lzzy[i]
+Gamxa[i]*lxyz[i]+Gamya[i]*lyyz[i]+Gamza[i]*lyzz[i])
+uxx*(Gxxy[i]*lxxz[i]+Gyxy[i]*lxyz[i]+Gzxy[i]*lxzz[i]
+Gxxz[i]*lxxy[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxzy[i]
+Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i])
+uxy*(Gxxy[i]*lxyz[i]+Gyxy[i]*lyyz[i]+Gzxy[i]*lyzz[i]
+Gxxz[i]*lxyy[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyzy[i]
+Gxyy[i]*lxzx[i]+Gyyy[i]*lxzy[i]+Gzyy[i]*lxzz[i]
+Gxyy[i]*lxxz[i]+Gyyy[i]*lxyz[i]+Gzyy[i]*lxzz[i]
+Gxyz[i]*lxxy[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxzy[i]
+Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i])
+uxz*(Gxxy[i]*lxzz[i]+Gyxy[i]*lyzz[i]+Gzxy[i]*lzzz[i]
+Gxxz[i]*lxzy[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lzzy[i]
+Gxyz[i]*lxzx[i]+Gyyz[i]*lxzy[i]+Gzyz[i]*lxzz[i]
+Gxyz[i]*lxxz[i]+Gyyz[i]*lxyz[i]+Gzyz[i]*lxzz[i]
+Gxzz[i]*lxxy[i]+Gyzz[i]*lxyy[i]+Gzzz[i]*lxzy[i]
+Gxxy[i]*lzzx[i]+Gyxy[i]*lzzy[i]+Gzxy[i]*lzzz[i])
+uyy*(Gxyy[i]*lxyz[i]+Gyyy[i]*lyyz[i]+Gzyy[i]*lyzz[i]
+Gxyz[i]*lxyy[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyzy[i]
+Gxyy[i]*lyzx[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lyzz[i])
+uyz*(Gxyy[i]*lxzz[i]+Gyyy[i]*lyzz[i]+Gzyy[i]*lzzz[i]
+Gxyz[i]*lxzy[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lzzy[i]
+Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i]
+Gxyz[i]*lxyz[i]+Gyyz[i]*lyyz[i]+Gzyz[i]*lyzz[i]
+Gxzz[i]*lxyy[i]+Gyzz[i]*lyyy[i]+Gzzz[i]*lyzy[i]
+Gxyy[i]*lzzx[i]+Gyyy[i]*lzzy[i]+Gzyy[i]*lzzz[i])
+uzz*(Gxyz[i]*lxzz[i]+Gyyz[i]*lyzz[i]+Gzyz[i]*lzzz[i]
+Gxzz[i]*lxzy[i]+Gyzz[i]*lyzy[i]+Gzzz[i]*lzzy[i]
+Gxyz[i]*lzzx[i]+Gyyz[i]*lzzy[i]+Gzyz[i]*lzzz[i]);
}
}
/* Phase 13: chi correction to Ricci tensor
* After fdderivs(chi), subtract Christoffel*chi_deriv, compute conformal factor f,
* then add chi contribution to Rxx..Rzz.
@@ -3629,8 +3842,8 @@ void kern_phase13_chi_correction(
}
}
/* Phase 15: trK_rhs, Aij_rhs, gauge (after fdderivs(Lap) and fderivs(chi))
* Also updates Christoffel with physical chi correction, computes Lap_rhs, beta_rhs, dtSf_rhs.
/* Phase 15: trK_rhs, Aij_rhs, gauge.
* Also updates Christoffel with physical chi correction and computes Lap second derivatives on the fly.
*/
__global__ __launch_bounds__(128, 4)
void kern_phase15_trK_Aij_gauge(
@@ -3674,11 +3887,6 @@ void kern_phase15_trK_Aij_gauge(
double* __restrict__ Gzxx, double* __restrict__ Gzxy,
double* __restrict__ Gzxz, double* __restrict__ Gzyy,
double* __restrict__ Gzyz, double* __restrict__ Gzzz,
/* fxx..fzz = fdderivs(Lap) output */
double* __restrict__ fxx, double* __restrict__ fxy,
double* __restrict__ fxz, double* __restrict__ fyy,
double* __restrict__ fyz, double* __restrict__ fzz,
/* dtSfx_rhs..dtSfz_rhs = fderivs(chi) output, then overwritten */
double* __restrict__ dtSfx_rhs, double* __restrict__ dtSfy_rhs,
double* __restrict__ dtSfz_rhs,
double* __restrict__ trK_rhs,
@@ -3697,11 +3905,148 @@ void kern_phase15_trK_Aij_gauge(
const double PI_V=3.14159265358979323846;
const double F16=16.0, F8=8.0;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) {
const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2];
const int i0 = i % nx;
const int j0 = (i / nx) % ny;
const int k0 = i / (nx * ny);
const int iF = i0 + 1;
const int jF = j0 + 1;
const int kF = k0 + 1;
const int imaxF = d_gp.imaxF, jmaxF = d_gp.jmaxF, kmaxF = d_gp.kmaxF;
const int iminF = d_gp.iminF, jminF = d_gp.jminF, kminF = d_gp.kminF;
double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i];
double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i];
double a=alpn1[i], c1=chin1[i];
double cx=chix[i],cy=chiy[i],cz=chiz[i];
double lx=Lapx[i],ly=Lapy[i],lz=Lapz[i];
double fxx_v = 0.0, fxy_v = 0.0, fxz_v = 0.0;
double fyy_v = 0.0, fyz_v = 0.0, fzz_v = 0.0;
if (!(i0 > nx - 2 || j0 > ny - 2 || k0 > nz - 2)) {
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
{
const double c = fetch_sym_ord2_direct(alpn1, iF, jF, kF, 1, 1, 1);
fxx_v = d_gp.Fdxdx * (
-fetch_sym_ord2_direct(alpn1, iF - 2, jF, kF, 1, 1, 1)
+16.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF, 1, 1, 1)
-30.0 * c
+16.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF + 2, jF, kF, 1, 1, 1));
fyy_v = d_gp.Fdydy * (
-fetch_sym_ord2_direct(alpn1, iF, jF - 2, kF, 1, 1, 1)
+16.0 * fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF, 1, 1, 1)
-30.0 * c
+16.0 * fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF, jF + 2, kF, 1, 1, 1));
fzz_v = d_gp.Fdzdz * (
-fetch_sym_ord2_direct(alpn1, iF, jF, kF - 2, 1, 1, 1)
+16.0 * fetch_sym_ord2_direct(alpn1, iF, jF, kF - 1, 1, 1, 1)
-30.0 * c
+16.0 * fetch_sym_ord2_direct(alpn1, iF, jF, kF + 1, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF, jF, kF + 2, 1, 1, 1));
const double t_jm2 =
fetch_sym_ord2_direct(alpn1, iF - 2, jF - 2, kF, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF - 2, kF, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF - 2, kF, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF + 2, jF - 2, kF, 1, 1, 1);
const double t_jm1 =
fetch_sym_ord2_direct(alpn1, iF - 2, jF - 1, kF, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF - 1, kF, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF - 1, kF, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF + 2, jF - 1, kF, 1, 1, 1);
const double t_jp1 =
fetch_sym_ord2_direct(alpn1, iF - 2, jF + 1, kF, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF + 1, kF, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF + 1, kF, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF + 2, jF + 1, kF, 1, 1, 1);
const double t_jp2 =
fetch_sym_ord2_direct(alpn1, iF - 2, jF + 2, kF, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF + 2, kF, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF + 2, kF, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF + 2, jF + 2, kF, 1, 1, 1);
fxy_v = d_gp.Fdxdy * (t_jm2 - 8.0 * t_jm1 + 8.0 * t_jp1 - t_jp2);
const double t_km2_x =
fetch_sym_ord2_direct(alpn1, iF - 2, jF, kF - 2, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF - 2, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF - 2, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF + 2, jF, kF - 2, 1, 1, 1);
const double t_km1_x =
fetch_sym_ord2_direct(alpn1, iF - 2, jF, kF - 1, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF - 1, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF - 1, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF + 2, jF, kF - 1, 1, 1, 1);
const double t_kp1_x =
fetch_sym_ord2_direct(alpn1, iF - 2, jF, kF + 1, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF + 1, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF + 1, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF + 2, jF, kF + 1, 1, 1, 1);
const double t_kp2_x =
fetch_sym_ord2_direct(alpn1, iF - 2, jF, kF + 2, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF + 2, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF + 2, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF + 2, jF, kF + 2, 1, 1, 1);
fxz_v = d_gp.Fdxdz * (t_km2_x - 8.0 * t_km1_x + 8.0 * t_kp1_x - t_kp2_x);
const double t_km2_y =
fetch_sym_ord2_direct(alpn1, iF, jF - 2, kF - 2, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF - 2, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF - 2, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF, jF + 2, kF - 2, 1, 1, 1);
const double t_km1_y =
fetch_sym_ord2_direct(alpn1, iF, jF - 2, kF - 1, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF - 1, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF - 1, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF, jF + 2, kF - 1, 1, 1, 1);
const double t_kp1_y =
fetch_sym_ord2_direct(alpn1, iF, jF - 2, kF + 1, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF + 1, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF + 1, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF, jF + 2, kF + 1, 1, 1, 1);
const double t_kp2_y =
fetch_sym_ord2_direct(alpn1, iF, jF - 2, kF + 2, 1, 1, 1)
- 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF + 2, 1, 1, 1)
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF + 2, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF, jF + 2, kF + 2, 1, 1, 1);
fyz_v = d_gp.Fdydz * (t_km2_y - 8.0 * t_km1_y + 8.0 * t_kp1_y - t_kp2_y);
}
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
{
const double c = fetch_sym_ord2_direct(alpn1, iF, jF, kF, 1, 1, 1);
fxx_v = d_gp.Sdxdx * (
fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF, 1, 1, 1)
- 2.0 * c
+ fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF, 1, 1, 1));
fyy_v = d_gp.Sdydy * (
fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF, 1, 1, 1)
- 2.0 * c
+ fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF, 1, 1, 1));
fzz_v = d_gp.Sdzdz * (
fetch_sym_ord2_direct(alpn1, iF, jF, kF - 1, 1, 1, 1)
- 2.0 * c
+ fetch_sym_ord2_direct(alpn1, iF, jF, kF + 1, 1, 1, 1));
fxy_v = d_gp.Sdxdy * (
fetch_sym_ord2_direct(alpn1, iF - 1, jF - 1, kF, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF + 1, jF - 1, kF, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF - 1, jF + 1, kF, 1, 1, 1)
+ fetch_sym_ord2_direct(alpn1, iF + 1, jF + 1, kF, 1, 1, 1));
fxz_v = d_gp.Sdxdz * (
fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF - 1, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF - 1, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF + 1, 1, 1, 1)
+ fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF + 1, 1, 1, 1));
fyz_v = d_gp.Sdydz * (
fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF - 1, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF - 1, 1, 1, 1)
- fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF + 1, 1, 1, 1)
+ fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF + 1, 1, 1, 1));
}
}
/* raised chi/chi */
double gx=(uxx*cx+uxy*cy+uxz*cz)/c1;
@@ -3728,17 +4073,17 @@ void kern_phase15_trK_Aij_gauge(
Gyyz[i]-=(cz/c1-gyz[i]*gy)*H;
Gzyz[i]-=(cy/c1-gyz[i]*gz)*H;
/* fxx..fzz correction: subtract Gamma*Lap_deriv */
fxx[i]-=Gxxx[i]*lx+Gyxx[i]*ly+Gzxx[i]*lz;
fyy[i]-=Gxyy[i]*lx+Gyyy[i]*ly+Gzyy[i]*lz;
fzz[i]-=Gxzz[i]*lx+Gyzz[i]*ly+Gzzz[i]*lz;
fxy[i]-=Gxxy[i]*lx+Gyxy[i]*ly+Gzxy[i]*lz;
fxz[i]-=Gxxz[i]*lx+Gyxz[i]*ly+Gzxz[i]*lz;
fyz[i]-=Gxyz_o[i]*lx+Gyyz[i]*ly+Gzyz[i]*lz;
/* Lap second-derivative correction: subtract Gamma*Lap_deriv */
fxx_v -= Gxxx[i]*lx+Gyxx[i]*ly+Gzxx[i]*lz;
fyy_v -= Gxyy[i]*lx+Gyyy[i]*ly+Gzyy[i]*lz;
fzz_v -= Gxzz[i]*lx+Gyzz[i]*ly+Gzzz[i]*lz;
fxy_v -= Gxxy[i]*lx+Gyxy[i]*ly+Gzxy[i]*lz;
fxz_v -= Gxxz[i]*lx+Gyxz[i]*ly+Gzxz[i]*lz;
fyz_v -= Gxyz_o[i]*lx+Gyyz[i]*ly+Gzyz[i]*lz;
/* D^i D_i alpha */
double DDA = uxx*fxx[i]+uyy*fyy[i]+uzz*fzz[i]
+TWO*(uxy*fxy[i]+uxz*fxz[i]+uyz*fyz[i]);
double DDA = uxx*fxx_v+uyy*fyy_v+uzz*fzz_v
+TWO*(uxy*fxy_v+uxz*fxz_v+uyz*fyz_v);
/* trace of S_ij (physical) */
double S_v = c1*(uxx*Sxx_m[i]+uyy*Syy_m[i]+uzz*Szz_m[i]
@@ -3774,25 +4119,25 @@ void kern_phase15_trK_Aij_gauge(
/* f_arr = -(1/3) * (DDA + alpha/chi * (2/3*K^2 - AijAij - 16pi*rho + 8pi*S)) */
double f_v = F2o3*trK_v*trK_v - AijAij - F16*PI_V*rho[i] + EIGHT*PI_V*S_v;
f_arr[i] = -F1o3*(uxx*fxx[i]+uyy*fyy[i]+uzz*fzz[i]
+TWO*(uxy*fxy[i]+uxz*fxz[i]+uyz*fyz[i])
f_arr[i] = -F1o3*(uxx*fxx_v+uyy*fyy_v+uzz*fzz_v
+TWO*(uxy*fxy_v+uxz*fxz_v+uyz*fyz_v)
+(a/c1)*f_v);
/* fij = alpha*(Rij - 8pi*Sij) - D_iD_j alpha */
double fxx_v=a*(Rxx[i]-EIGHT*PI_V*Sxx_m[i])-fxx[i];
double fxy_v=a*(Rxy[i]-EIGHT*PI_V*Sxy_m[i])-fxy[i];
double fxz_v=a*(Rxz[i]-EIGHT*PI_V*Sxz_m[i])-fxz[i];
double fyy_v=a*(Ryy[i]-EIGHT*PI_V*Syy_m[i])-fyy[i];
double fyz_v=a*(Ryz[i]-EIGHT*PI_V*Syz_m[i])-fyz[i];
double fzz_v=a*(Rzz[i]-EIGHT*PI_V*Szz_m[i])-fzz[i];
double fij_xx=a*(Rxx[i]-EIGHT*PI_V*Sxx_m[i])-fxx_v;
double fij_xy=a*(Rxy[i]-EIGHT*PI_V*Sxy_m[i])-fxy_v;
double fij_xz=a*(Rxz[i]-EIGHT*PI_V*Sxz_m[i])-fxz_v;
double fij_yy=a*(Ryy[i]-EIGHT*PI_V*Syy_m[i])-fyy_v;
double fij_yz=a*(Ryz[i]-EIGHT*PI_V*Syz_m[i])-fyz_v;
double fij_zz=a*(Rzz[i]-EIGHT*PI_V*Szz_m[i])-fzz_v;
/* Aij_rhs = chi*(fij - gij*f) */
Axx_rhs[i]=fxx_v-gxx[i]*f_arr[i];
Ayy_rhs[i]=fyy_v-gyy[i]*f_arr[i];
Azz_rhs[i]=fzz_v-gzz[i]*f_arr[i];
Axy_rhs[i]=fxy_v-gxy[i]*f_arr[i];
Axz_rhs[i]=fxz_v-gxz[i]*f_arr[i];
Ayz_rhs[i]=fyz_v-gyz[i]*f_arr[i];
Axx_rhs[i]=fij_xx-gxx[i]*f_arr[i];
Ayy_rhs[i]=fij_yy-gyy[i]*f_arr[i];
Azz_rhs[i]=fij_zz-gzz[i]*f_arr[i];
Axy_rhs[i]=fij_xy-gxy[i]*f_arr[i];
Axz_rhs[i]=fij_xz-gxz[i]*f_arr[i];
Ayz_rhs[i]=fij_yz-gyz[i]*f_arr[i];
/* A_il A^l_j */
double AA_xx=uxx*Axx[i]*Axx[i]+uyy*Axy[i]*Axy[i]+uzz*Axz[i]*Axz[i]
@@ -4327,7 +4672,7 @@ static void launch_rhs_pipeline(int all, double eps, int co)
src_fields, dst_fields, soa_signs, all);
}
kern_phase11_ricci_diag<<<grid(all),BLK>>>(
kern_phase11_ricci_fused<<<grid(all),BLK>>>(
D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz),
D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz),
D(S_Gamxa),D(S_Gamya),D(S_Gamza),
@@ -4343,25 +4688,8 @@ static void launch_rhs_pipeline(int all, double eps, int co)
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
D(S_Rxx),D(S_Ryy),D(S_Rzz));
kern_phase11_ricci_offdiag<<<grid(all),BLK>>>(
D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz),
D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz),
D(S_Gamxa),D(S_Gamya),D(S_Gamza),
D(S_Gamxx),D(S_Gamxy),D(S_Gamxz),
D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t),
D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t),
D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz),
D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz),
D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz),
D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz),
D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz),
D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz),
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
D(S_Rxy),D(S_Rxz),D(S_Ryz));
D(S_Rxx),D(S_Rxy),D(S_Rxz),
D(S_Ryy),D(S_Ryz),D(S_Rzz));
kern_phase12_13_chi_correction_fused<<<grid((size_t)all),BLK>>>(
D(S_chi), D(S_chin1),
@@ -4378,12 +4706,6 @@ static void launch_rhs_pipeline(int all, double eps, int co)
D(S_Rxx), D(S_Rxy), D(S_Rxz),
D(S_Ryy), D(S_Ryz), D(S_Rzz));
kern_phase14_lap_chi_derivs<<<grid((size_t)all),BLK>>>(
D(S_Lap), D(S_chi),
D(S_fxx), D(S_fxy), D(S_fxz),
D(S_fyy), D(S_fyz), D(S_fzz),
D(S_dtSfx_rhs), D(S_dtSfy_rhs), D(S_dtSfz_rhs));
kern_phase15_trK_Aij_gauge<<<grid(all),BLK>>>(
D(S_alpn1), D(S_chin1),
D(S_chix), D(S_chiy), D(S_chiz),
@@ -4407,8 +4729,6 @@ static void launch_rhs_pipeline(int all, double eps, int co)
D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz),
D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz),
D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz),
D(S_fxx), D(S_fxy), D(S_fxz),
D(S_fyy), D(S_fyz), D(S_fzz),
D(S_dtSfx_rhs), D(S_dtSfy_rhs), D(S_dtSfz_rhs),
D(S_trK_rhs),
D(S_Axx_rhs), D(S_Axy_rhs), D(S_Axz_rhs),
@@ -4961,8 +5281,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
src_fields, dst_fields, soa_signs, all);
}
/* Phase 11: Ricci assembly (diagonal + off-diagonal) */
kern_phase11_ricci_diag<<<grid(all),BLK>>>(
/* Phase 11: fused Ricci assembly */
kern_phase11_ricci_fused<<<grid(all),BLK>>>(
D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz),
D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz),
D(S_Gamxa),D(S_Gamya),D(S_Gamza),
@@ -4978,25 +5298,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
D(S_Rxx),D(S_Ryy),D(S_Rzz));
kern_phase11_ricci_offdiag<<<grid(all),BLK>>>(
D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz),
D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz),
D(S_Gamxa),D(S_Gamya),D(S_Gamza),
D(S_Gamxx),D(S_Gamxy),D(S_Gamxz),
D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t),
D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t),
D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz),
D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz),
D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz),
D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz),
D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz),
D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz),
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
D(S_Rxy),D(S_Rxz),D(S_Ryz));
D(S_Rxx),D(S_Rxy),D(S_Rxz),
D(S_Ryy),D(S_Ryz),D(S_Rzz));
/* ============================================================ */
/* Phase 12/13: chi fdderivs + chi correction */
@@ -5017,16 +5320,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
D(S_Ryy), D(S_Ryz), D(S_Rzz));
/* ============================================================ */
/* Phase 14: fdderivs(Lap) + fderivs(chi) */
/* ============================================================ */
kern_phase14_lap_chi_derivs<<<grid((size_t)all),BLK>>>(
D(S_Lap), D(S_chi),
D(S_fxx), D(S_fxy), D(S_fxz),
D(S_fyy), D(S_fyz), D(S_fzz),
D(S_dtSfx_rhs), D(S_dtSfy_rhs), D(S_dtSfz_rhs));
/* ============================================================ */
/* Phase 15: trK_rhs, Aij_rhs, gauge */
/* Phase 14/15: fused trK_rhs, Aij_rhs, gauge */
/* ============================================================ */
kern_phase15_trK_Aij_gauge<<<grid(all),BLK>>>(
D(S_alpn1), D(S_chin1),
@@ -5051,8 +5345,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz),
D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz),
D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz),
D(S_fxx), D(S_fxy), D(S_fxz),
D(S_fyy), D(S_fyz), D(S_fzz),
D(S_dtSfx_rhs), D(S_dtSfy_rhs), D(S_dtSfz_rhs),
D(S_trK_rhs),
D(S_Axx_rhs), D(S_Axy_rhs), D(S_Axz_rhs),