Fuse Ricci assembly and optimize trK/Aij gauge kernels
This commit is contained in:
@@ -3368,6 +3368,219 @@ void kern_phase11_ricci_offdiag(
|
||||
}
|
||||
}
|
||||
|
||||
/* Phase 11: fused Ricci assembly (diag + off-diag) */
|
||||
__global__ __launch_bounds__(128, 2)
|
||||
void kern_phase11_ricci_fused(
|
||||
const double* __restrict__ gxx, const double* __restrict__ gxy,
|
||||
const double* __restrict__ gxz, const double* __restrict__ gyy,
|
||||
const double* __restrict__ gyz, const double* __restrict__ gzz,
|
||||
const double* __restrict__ gupxx, const double* __restrict__ gupxy,
|
||||
const double* __restrict__ gupxz, const double* __restrict__ gupyy,
|
||||
const double* __restrict__ gupyz, const double* __restrict__ gupzz,
|
||||
const double* __restrict__ Gamxa, const double* __restrict__ Gamya,
|
||||
const double* __restrict__ Gamza,
|
||||
const double* __restrict__ Gamxx, const double* __restrict__ Gamxy,
|
||||
const double* __restrict__ Gamxz,
|
||||
const double* __restrict__ Gamyx, const double* __restrict__ Gamyy_d,
|
||||
const double* __restrict__ Gamyz_d,
|
||||
const double* __restrict__ Gamzx, const double* __restrict__ Gamzy,
|
||||
const double* __restrict__ Gamzz_d,
|
||||
const double* __restrict__ Gxxx, const double* __restrict__ Gxxy,
|
||||
const double* __restrict__ Gxxz, const double* __restrict__ Gxyy,
|
||||
const double* __restrict__ Gxyz, const double* __restrict__ Gxzz,
|
||||
const double* __restrict__ Gyxx, const double* __restrict__ Gyxy,
|
||||
const double* __restrict__ Gyxz, const double* __restrict__ Gyyy,
|
||||
const double* __restrict__ Gyyz, const double* __restrict__ Gyzz,
|
||||
const double* __restrict__ Gzxx, const double* __restrict__ Gzxy,
|
||||
const double* __restrict__ Gzxz, const double* __restrict__ Gzyy,
|
||||
const double* __restrict__ Gzyz, const double* __restrict__ Gzzz,
|
||||
const double* __restrict__ lxxx, const double* __restrict__ lxyx,
|
||||
const double* __restrict__ lxzx, const double* __restrict__ lyyx,
|
||||
const double* __restrict__ lyzx, const double* __restrict__ lzzx,
|
||||
const double* __restrict__ lxxy, const double* __restrict__ lxyy,
|
||||
const double* __restrict__ lxzy, const double* __restrict__ lyyy,
|
||||
const double* __restrict__ lyzy, const double* __restrict__ lzzy,
|
||||
const double* __restrict__ lxxz, const double* __restrict__ lxyz,
|
||||
const double* __restrict__ lxzz, const double* __restrict__ lyyz,
|
||||
const double* __restrict__ lyzz, const double* __restrict__ lzzz,
|
||||
double* __restrict__ Rxx, double* __restrict__ Rxy, double* __restrict__ Rxz,
|
||||
double* __restrict__ Ryy, double* __restrict__ Ryz, double* __restrict__ Rzz)
|
||||
{
|
||||
const double H = 0.5, TWO = 2.0;
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < d_gp.all; i += blockDim.x * gridDim.x) {
|
||||
double uxx = gupxx[i], uxy = gupxy[i], uxz = gupxz[i];
|
||||
double uyy = gupyy[i], uyz = gupyz[i], uzz = gupzz[i];
|
||||
|
||||
Rxx[i] = -H*Rxx[i]
|
||||
+ gxx[i]*Gamxx[i]+gxy[i]*Gamyx[i]+gxz[i]*Gamzx[i]
|
||||
+ Gamxa[i]*lxxx[i]+Gamya[i]*lxyx[i]+Gamza[i]*lxzx[i]
|
||||
+ uxx*(TWO*(Gxxx[i]*lxxx[i]+Gyxx[i]*lxyx[i]+Gzxx[i]*lxzx[i])
|
||||
+(Gxxx[i]*lxxx[i]+Gyxx[i]*lxxy[i]+Gzxx[i]*lxxz[i]))
|
||||
+ uxy*(TWO*(Gxxx[i]*lxyx[i]+Gyxx[i]*lyyx[i]+Gzxx[i]*lyzx[i]
|
||||
+Gxxy[i]*lxxx[i]+Gyxy[i]*lxyx[i]+Gzxy[i]*lxzx[i])
|
||||
+(Gxxy[i]*lxxx[i]+Gyxy[i]*lxxy[i]+Gzxy[i]*lxxz[i])
|
||||
+(Gxxx[i]*lxyx[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxyz[i]))
|
||||
+ uxz*(TWO*(Gxxx[i]*lxzx[i]+Gyxx[i]*lyzx[i]+Gzxx[i]*lzzx[i]
|
||||
+Gxxz[i]*lxxx[i]+Gyxz[i]*lxyx[i]+Gzxz[i]*lxzx[i])
|
||||
+(Gxxz[i]*lxxx[i]+Gyxz[i]*lxxy[i]+Gzxz[i]*lxxz[i])
|
||||
+(Gxxx[i]*lxzx[i]+Gyxx[i]*lxzy[i]+Gzxx[i]*lxzz[i]))
|
||||
+ uyy*(TWO*(Gxxy[i]*lxyx[i]+Gyxy[i]*lyyx[i]+Gzxy[i]*lyzx[i])
|
||||
+(Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i]))
|
||||
+ uyz*(TWO*(Gxxy[i]*lxzx[i]+Gyxy[i]*lyzx[i]+Gzxy[i]*lzzx[i]
|
||||
+Gxxz[i]*lxyx[i]+Gyxz[i]*lyyx[i]+Gzxz[i]*lyzx[i])
|
||||
+(Gxxz[i]*lxyx[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxyz[i])
|
||||
+(Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i]))
|
||||
+ uzz*(TWO*(Gxxz[i]*lxzx[i]+Gyxz[i]*lyzx[i]+Gzxz[i]*lzzx[i])
|
||||
+(Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i]));
|
||||
|
||||
Ryy[i] = -H*Ryy[i]
|
||||
+ gxy[i]*Gamxy[i]+gyy[i]*Gamyy_d[i]+gyz[i]*Gamzy[i]
|
||||
+ Gamxa[i]*lxyy[i]+Gamya[i]*lyyy[i]+Gamza[i]*lyzy[i]
|
||||
+ uxx*(TWO*(Gxxy[i]*lxxy[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxzy[i])
|
||||
+(Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i]))
|
||||
+ uxy*(TWO*(Gxxy[i]*lxyy[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyzy[i]
|
||||
+Gxyy[i]*lxxy[i]+Gyyy[i]*lxyy[i]+Gzyy[i]*lxzy[i])
|
||||
+(Gxyy[i]*lxyx[i]+Gyyy[i]*lxyy[i]+Gzyy[i]*lxyz[i])
|
||||
+(Gxxy[i]*lyyx[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyyz[i]))
|
||||
+ uxz*(TWO*(Gxxy[i]*lxzy[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lzzy[i]
|
||||
+Gxyz[i]*lxxy[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxzy[i])
|
||||
+(Gxyz[i]*lxyx[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxyz[i])
|
||||
+(Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i]))
|
||||
+ uyy*(TWO*(Gxyy[i]*lxyy[i]+Gyyy[i]*lyyy[i]+Gzyy[i]*lyzy[i])
|
||||
+(Gxyy[i]*lyyx[i]+Gyyy[i]*lyyy[i]+Gzyy[i]*lyyz[i]))
|
||||
+ uyz*(TWO*(Gxyy[i]*lxzy[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lzzy[i]
|
||||
+Gxyz[i]*lxyy[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyzy[i])
|
||||
+(Gxyz[i]*lyyx[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyyz[i])
|
||||
+(Gxyy[i]*lyzx[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lyzz[i]))
|
||||
+ uzz*(TWO*(Gxyz[i]*lxzy[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lzzy[i])
|
||||
+(Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i]));
|
||||
|
||||
Rzz[i] = -H*Rzz[i]
|
||||
+ gxz[i]*Gamxz[i]+gyz[i]*Gamyz_d[i]+gzz[i]*Gamzz_d[i]
|
||||
+ Gamxa[i]*lxzz[i]+Gamya[i]*lyzz[i]+Gamza[i]*lzzz[i]
|
||||
+ uxx*(TWO*(Gxxz[i]*lxxz[i]+Gyxz[i]*lxyz[i]+Gzxz[i]*lxzz[i])
|
||||
+(Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i]))
|
||||
+ uxy*(TWO*(Gxxz[i]*lxyz[i]+Gyxz[i]*lyyz[i]+Gzxz[i]*lyzz[i]
|
||||
+Gxyz[i]*lxxz[i]+Gyyz[i]*lxyz[i]+Gzyz[i]*lxzz[i])
|
||||
+(Gxyz[i]*lxzx[i]+Gyyz[i]*lxzy[i]+Gzyz[i]*lxzz[i])
|
||||
+(Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i]))
|
||||
+ uxz*(TWO*(Gxxz[i]*lxzz[i]+Gyxz[i]*lyzz[i]+Gzxz[i]*lzzz[i]
|
||||
+Gxzz[i]*lxxz[i]+Gyzz[i]*lxyz[i]+Gzzz[i]*lxzz[i])
|
||||
+(Gxzz[i]*lxzx[i]+Gyzz[i]*lxzy[i]+Gzzz[i]*lxzz[i])
|
||||
+(Gxxz[i]*lzzx[i]+Gyxz[i]*lzzy[i]+Gzxz[i]*lzzz[i]))
|
||||
+ uyy*(TWO*(Gxyz[i]*lxyz[i]+Gyyz[i]*lyyz[i]+Gzyz[i]*lyzz[i])
|
||||
+(Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i]))
|
||||
+ uyz*(TWO*(Gxyz[i]*lxzz[i]+Gyyz[i]*lyzz[i]+Gzyz[i]*lzzz[i]
|
||||
+Gxzz[i]*lxyz[i]+Gyzz[i]*lyyz[i]+Gzzz[i]*lyzz[i])
|
||||
+(Gxzz[i]*lyzx[i]+Gyzz[i]*lyzy[i]+Gzzz[i]*lyzz[i])
|
||||
+(Gxyz[i]*lzzx[i]+Gyyz[i]*lzzy[i]+Gzyz[i]*lzzz[i]))
|
||||
+ uzz*(TWO*(Gxzz[i]*lxzz[i]+Gyzz[i]*lyzz[i]+Gzzz[i]*lzzz[i])
|
||||
+(Gxzz[i]*lzzx[i]+Gyzz[i]*lzzy[i]+Gzzz[i]*lzzz[i]));
|
||||
|
||||
Rxy[i] = H*(
|
||||
-Rxy[i]
|
||||
+gxx[i]*Gamxy[i]+gxy[i]*Gamyy_d[i]+gxz[i]*Gamzy[i]
|
||||
+gxy[i]*Gamxx[i]+gyy[i]*Gamyx[i]+gyz[i]*Gamzx[i]
|
||||
+Gamxa[i]*lxyx[i]+Gamya[i]*lyyx[i]+Gamza[i]*lyzx[i]
|
||||
+Gamxa[i]*lxxy[i]+Gamya[i]*lxyy[i]+Gamza[i]*lxzy[i])
|
||||
+uxx*(Gxxx[i]*lxxy[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxzy[i]
|
||||
+Gxxy[i]*lxxx[i]+Gyxy[i]*lxyx[i]+Gzxy[i]*lxzx[i]
|
||||
+Gxxx[i]*lxyx[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxyz[i])
|
||||
+uxy*(Gxxx[i]*lxyy[i]+Gyxx[i]*lyyy[i]+Gzxx[i]*lyzy[i]
|
||||
+Gxxy[i]*lxyx[i]+Gyxy[i]*lyyx[i]+Gzxy[i]*lyzx[i]
|
||||
+Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i]
|
||||
+Gxxy[i]*lxxy[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxzy[i]
|
||||
+Gxyy[i]*lxxx[i]+Gyyy[i]*lxyx[i]+Gzyy[i]*lxzx[i]
|
||||
+Gxxx[i]*lyyx[i]+Gyxx[i]*lyyy[i]+Gzxx[i]*lyyz[i])
|
||||
+uxz*(Gxxx[i]*lxzy[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lzzy[i]
|
||||
+Gxxy[i]*lxzx[i]+Gyxy[i]*lyzx[i]+Gzxy[i]*lzzx[i]
|
||||
+Gxxz[i]*lxyx[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxyz[i]
|
||||
+Gxxz[i]*lxxy[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxzy[i]
|
||||
+Gxyz[i]*lxxx[i]+Gyyz[i]*lxyx[i]+Gzyz[i]*lxzx[i]
|
||||
+Gxxx[i]*lyzx[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lyzz[i])
|
||||
+uyy*(Gxxy[i]*lxyy[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyzy[i]
|
||||
+Gxyy[i]*lxyx[i]+Gyyy[i]*lyyx[i]+Gzyy[i]*lyzx[i]
|
||||
+Gxxy[i]*lyyx[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyyz[i])
|
||||
+uyz*(Gxxy[i]*lxzy[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lzzy[i]
|
||||
+Gxyy[i]*lxzx[i]+Gyyy[i]*lyzx[i]+Gzyy[i]*lzzx[i]
|
||||
+Gxxz[i]*lyyx[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyyz[i]
|
||||
+Gxxz[i]*lxyy[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyzy[i]
|
||||
+Gxyz[i]*lxyx[i]+Gyyz[i]*lyyx[i]+Gzyz[i]*lyzx[i]
|
||||
+Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i])
|
||||
+uzz*(Gxxz[i]*lxzy[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lzzy[i]
|
||||
+Gxyz[i]*lxzx[i]+Gyyz[i]*lyzx[i]+Gzyz[i]*lzzx[i]
|
||||
+Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i]);
|
||||
|
||||
Rxz[i] = H*(
|
||||
-Rxz[i]
|
||||
+gxx[i]*Gamxz[i]+gxy[i]*Gamyz_d[i]+gxz[i]*Gamzz_d[i]
|
||||
+gxz[i]*Gamxx[i]+gyz[i]*Gamyx[i]+gzz[i]*Gamzx[i]
|
||||
+Gamxa[i]*lxzx[i]+Gamya[i]*lyzx[i]+Gamza[i]*lzzx[i]
|
||||
+Gamxa[i]*lxxz[i]+Gamya[i]*lxyz[i]+Gamza[i]*lxzz[i])
|
||||
+uxx*(Gxxx[i]*lxxz[i]+Gyxx[i]*lxyz[i]+Gzxx[i]*lxzz[i]
|
||||
+Gxxz[i]*lxxx[i]+Gyxz[i]*lxyx[i]+Gzxz[i]*lxzx[i]
|
||||
+Gxxx[i]*lxzx[i]+Gyxx[i]*lxzy[i]+Gzxx[i]*lxzz[i])
|
||||
+uxy*(Gxxx[i]*lxyz[i]+Gyxx[i]*lyyz[i]+Gzxx[i]*lyzz[i]
|
||||
+Gxxz[i]*lxyx[i]+Gyxz[i]*lyyx[i]+Gzxz[i]*lyzx[i]
|
||||
+Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i]
|
||||
+Gxxy[i]*lxxz[i]+Gyxy[i]*lxyz[i]+Gzxy[i]*lxzz[i]
|
||||
+Gxyz[i]*lxxx[i]+Gyyz[i]*lxyx[i]+Gzyz[i]*lxzx[i]
|
||||
+Gxxx[i]*lyzx[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lyzz[i])
|
||||
+uxz*(Gxxx[i]*lxzz[i]+Gyxx[i]*lyzz[i]+Gzxx[i]*lzzz[i]
|
||||
+Gxxz[i]*lxzx[i]+Gyxz[i]*lyzx[i]+Gzxz[i]*lzzx[i]
|
||||
+Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i]
|
||||
+Gxxz[i]*lxxz[i]+Gyxz[i]*lxyz[i]+Gzxz[i]*lxzz[i]
|
||||
+Gxzz[i]*lxxx[i]+Gyzz[i]*lxyx[i]+Gzzz[i]*lxzx[i]
|
||||
+Gxxx[i]*lzzx[i]+Gyxx[i]*lzzy[i]+Gzxx[i]*lzzz[i])
|
||||
+uyy*(Gxxy[i]*lxyz[i]+Gyxy[i]*lyyz[i]+Gzxy[i]*lyzz[i]
|
||||
+Gxyz[i]*lxyx[i]+Gyyz[i]*lyyx[i]+Gzyz[i]*lyzx[i]
|
||||
+Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i])
|
||||
+uyz*(Gxxy[i]*lxzz[i]+Gyxy[i]*lyzz[i]+Gzxy[i]*lzzz[i]
|
||||
+Gxyz[i]*lxzx[i]+Gyyz[i]*lyzx[i]+Gzyz[i]*lzzx[i]
|
||||
+Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i]
|
||||
+Gxxz[i]*lxyz[i]+Gyxz[i]*lyyz[i]+Gzxz[i]*lyzz[i]
|
||||
+Gxzz[i]*lxyx[i]+Gyzz[i]*lyyx[i]+Gzzz[i]*lyzx[i]
|
||||
+Gxxy[i]*lzzx[i]+Gyxy[i]*lzzy[i]+Gzxy[i]*lzzz[i])
|
||||
+uzz*(Gxxz[i]*lxzz[i]+Gyxz[i]*lyzz[i]+Gzxz[i]*lzzz[i]
|
||||
+Gxzz[i]*lxzx[i]+Gyzz[i]*lyzx[i]+Gzzz[i]*lzzx[i]
|
||||
+Gxxz[i]*lzzx[i]+Gyxz[i]*lzzy[i]+Gzxz[i]*lzzz[i]);
|
||||
|
||||
Ryz[i] = H*(
|
||||
-Ryz[i]
|
||||
+gxy[i]*Gamxz[i]+gyy[i]*Gamyz_d[i]+gyz[i]*Gamzz_d[i]
|
||||
+gxz[i]*Gamxy[i]+gyz[i]*Gamyy_d[i]+gzz[i]*Gamzy[i]
|
||||
+Gamxa[i]*lxzy[i]+Gamya[i]*lyzy[i]+Gamza[i]*lzzy[i]
|
||||
+Gamxa[i]*lxyz[i]+Gamya[i]*lyyz[i]+Gamza[i]*lyzz[i])
|
||||
+uxx*(Gxxy[i]*lxxz[i]+Gyxy[i]*lxyz[i]+Gzxy[i]*lxzz[i]
|
||||
+Gxxz[i]*lxxy[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxzy[i]
|
||||
+Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i])
|
||||
+uxy*(Gxxy[i]*lxyz[i]+Gyxy[i]*lyyz[i]+Gzxy[i]*lyzz[i]
|
||||
+Gxxz[i]*lxyy[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyzy[i]
|
||||
+Gxyy[i]*lxzx[i]+Gyyy[i]*lxzy[i]+Gzyy[i]*lxzz[i]
|
||||
+Gxyy[i]*lxxz[i]+Gyyy[i]*lxyz[i]+Gzyy[i]*lxzz[i]
|
||||
+Gxyz[i]*lxxy[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxzy[i]
|
||||
+Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i])
|
||||
+uxz*(Gxxy[i]*lxzz[i]+Gyxy[i]*lyzz[i]+Gzxy[i]*lzzz[i]
|
||||
+Gxxz[i]*lxzy[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lzzy[i]
|
||||
+Gxyz[i]*lxzx[i]+Gyyz[i]*lxzy[i]+Gzyz[i]*lxzz[i]
|
||||
+Gxyz[i]*lxxz[i]+Gyyz[i]*lxyz[i]+Gzyz[i]*lxzz[i]
|
||||
+Gxzz[i]*lxxy[i]+Gyzz[i]*lxyy[i]+Gzzz[i]*lxzy[i]
|
||||
+Gxxy[i]*lzzx[i]+Gyxy[i]*lzzy[i]+Gzxy[i]*lzzz[i])
|
||||
+uyy*(Gxyy[i]*lxyz[i]+Gyyy[i]*lyyz[i]+Gzyy[i]*lyzz[i]
|
||||
+Gxyz[i]*lxyy[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyzy[i]
|
||||
+Gxyy[i]*lyzx[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lyzz[i])
|
||||
+uyz*(Gxyy[i]*lxzz[i]+Gyyy[i]*lyzz[i]+Gzyy[i]*lzzz[i]
|
||||
+Gxyz[i]*lxzy[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lzzy[i]
|
||||
+Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i]
|
||||
+Gxyz[i]*lxyz[i]+Gyyz[i]*lyyz[i]+Gzyz[i]*lyzz[i]
|
||||
+Gxzz[i]*lxyy[i]+Gyzz[i]*lyyy[i]+Gzzz[i]*lyzy[i]
|
||||
+Gxyy[i]*lzzx[i]+Gyyy[i]*lzzy[i]+Gzyy[i]*lzzz[i])
|
||||
+uzz*(Gxyz[i]*lxzz[i]+Gyyz[i]*lyzz[i]+Gzyz[i]*lzzz[i]
|
||||
+Gxzz[i]*lxzy[i]+Gyzz[i]*lyzy[i]+Gzzz[i]*lzzy[i]
|
||||
+Gxyz[i]*lzzx[i]+Gyyz[i]*lzzy[i]+Gzyz[i]*lzzz[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Phase 13: chi correction to Ricci tensor
|
||||
* After fdderivs(chi), subtract Christoffel*chi_deriv, compute conformal factor f,
|
||||
* then add chi contribution to Rxx..Rzz.
|
||||
@@ -3629,8 +3842,8 @@ void kern_phase13_chi_correction(
|
||||
}
|
||||
}
|
||||
|
||||
/* Phase 15: trK_rhs, Aij_rhs, gauge (after fdderivs(Lap) and fderivs(chi))
|
||||
* Also updates Christoffel with physical chi correction, computes Lap_rhs, beta_rhs, dtSf_rhs.
|
||||
/* Phase 15: trK_rhs, Aij_rhs, gauge.
|
||||
* Also updates Christoffel with physical chi correction and computes Lap second derivatives on the fly.
|
||||
*/
|
||||
__global__ __launch_bounds__(128, 4)
|
||||
void kern_phase15_trK_Aij_gauge(
|
||||
@@ -3674,11 +3887,6 @@ void kern_phase15_trK_Aij_gauge(
|
||||
double* __restrict__ Gzxx, double* __restrict__ Gzxy,
|
||||
double* __restrict__ Gzxz, double* __restrict__ Gzyy,
|
||||
double* __restrict__ Gzyz, double* __restrict__ Gzzz,
|
||||
/* fxx..fzz = fdderivs(Lap) output */
|
||||
double* __restrict__ fxx, double* __restrict__ fxy,
|
||||
double* __restrict__ fxz, double* __restrict__ fyy,
|
||||
double* __restrict__ fyz, double* __restrict__ fzz,
|
||||
/* dtSfx_rhs..dtSfz_rhs = fderivs(chi) output, then overwritten */
|
||||
double* __restrict__ dtSfx_rhs, double* __restrict__ dtSfy_rhs,
|
||||
double* __restrict__ dtSfz_rhs,
|
||||
double* __restrict__ trK_rhs,
|
||||
@@ -3697,11 +3905,148 @@ void kern_phase15_trK_Aij_gauge(
|
||||
const double PI_V=3.14159265358979323846;
|
||||
const double F16=16.0, F8=8.0;
|
||||
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) {
|
||||
const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2];
|
||||
const int i0 = i % nx;
|
||||
const int j0 = (i / nx) % ny;
|
||||
const int k0 = i / (nx * ny);
|
||||
const int iF = i0 + 1;
|
||||
const int jF = j0 + 1;
|
||||
const int kF = k0 + 1;
|
||||
const int imaxF = d_gp.imaxF, jmaxF = d_gp.jmaxF, kmaxF = d_gp.kmaxF;
|
||||
const int iminF = d_gp.iminF, jminF = d_gp.jminF, kminF = d_gp.kminF;
|
||||
double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i];
|
||||
double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i];
|
||||
double a=alpn1[i], c1=chin1[i];
|
||||
double cx=chix[i],cy=chiy[i],cz=chiz[i];
|
||||
double lx=Lapx[i],ly=Lapy[i],lz=Lapz[i];
|
||||
double fxx_v = 0.0, fxy_v = 0.0, fxz_v = 0.0;
|
||||
double fyy_v = 0.0, fyz_v = 0.0, fzz_v = 0.0;
|
||||
|
||||
if (!(i0 > nx - 2 || j0 > ny - 2 || k0 > nz - 2)) {
|
||||
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
|
||||
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
|
||||
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
|
||||
{
|
||||
const double c = fetch_sym_ord2_direct(alpn1, iF, jF, kF, 1, 1, 1);
|
||||
fxx_v = d_gp.Fdxdx * (
|
||||
-fetch_sym_ord2_direct(alpn1, iF - 2, jF, kF, 1, 1, 1)
|
||||
+16.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF, 1, 1, 1)
|
||||
-30.0 * c
|
||||
+16.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF + 2, jF, kF, 1, 1, 1));
|
||||
fyy_v = d_gp.Fdydy * (
|
||||
-fetch_sym_ord2_direct(alpn1, iF, jF - 2, kF, 1, 1, 1)
|
||||
+16.0 * fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF, 1, 1, 1)
|
||||
-30.0 * c
|
||||
+16.0 * fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF, jF + 2, kF, 1, 1, 1));
|
||||
fzz_v = d_gp.Fdzdz * (
|
||||
-fetch_sym_ord2_direct(alpn1, iF, jF, kF - 2, 1, 1, 1)
|
||||
+16.0 * fetch_sym_ord2_direct(alpn1, iF, jF, kF - 1, 1, 1, 1)
|
||||
-30.0 * c
|
||||
+16.0 * fetch_sym_ord2_direct(alpn1, iF, jF, kF + 1, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF, jF, kF + 2, 1, 1, 1));
|
||||
|
||||
const double t_jm2 =
|
||||
fetch_sym_ord2_direct(alpn1, iF - 2, jF - 2, kF, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF - 2, kF, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF - 2, kF, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF + 2, jF - 2, kF, 1, 1, 1);
|
||||
const double t_jm1 =
|
||||
fetch_sym_ord2_direct(alpn1, iF - 2, jF - 1, kF, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF - 1, kF, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF - 1, kF, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF + 2, jF - 1, kF, 1, 1, 1);
|
||||
const double t_jp1 =
|
||||
fetch_sym_ord2_direct(alpn1, iF - 2, jF + 1, kF, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF + 1, kF, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF + 1, kF, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF + 2, jF + 1, kF, 1, 1, 1);
|
||||
const double t_jp2 =
|
||||
fetch_sym_ord2_direct(alpn1, iF - 2, jF + 2, kF, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF + 2, kF, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF + 2, kF, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF + 2, jF + 2, kF, 1, 1, 1);
|
||||
fxy_v = d_gp.Fdxdy * (t_jm2 - 8.0 * t_jm1 + 8.0 * t_jp1 - t_jp2);
|
||||
|
||||
const double t_km2_x =
|
||||
fetch_sym_ord2_direct(alpn1, iF - 2, jF, kF - 2, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF - 2, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF - 2, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF + 2, jF, kF - 2, 1, 1, 1);
|
||||
const double t_km1_x =
|
||||
fetch_sym_ord2_direct(alpn1, iF - 2, jF, kF - 1, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF - 1, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF - 1, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF + 2, jF, kF - 1, 1, 1, 1);
|
||||
const double t_kp1_x =
|
||||
fetch_sym_ord2_direct(alpn1, iF - 2, jF, kF + 1, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF + 1, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF + 1, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF + 2, jF, kF + 1, 1, 1, 1);
|
||||
const double t_kp2_x =
|
||||
fetch_sym_ord2_direct(alpn1, iF - 2, jF, kF + 2, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF + 2, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF + 2, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF + 2, jF, kF + 2, 1, 1, 1);
|
||||
fxz_v = d_gp.Fdxdz * (t_km2_x - 8.0 * t_km1_x + 8.0 * t_kp1_x - t_kp2_x);
|
||||
|
||||
const double t_km2_y =
|
||||
fetch_sym_ord2_direct(alpn1, iF, jF - 2, kF - 2, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF - 2, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF - 2, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF, jF + 2, kF - 2, 1, 1, 1);
|
||||
const double t_km1_y =
|
||||
fetch_sym_ord2_direct(alpn1, iF, jF - 2, kF - 1, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF - 1, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF - 1, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF, jF + 2, kF - 1, 1, 1, 1);
|
||||
const double t_kp1_y =
|
||||
fetch_sym_ord2_direct(alpn1, iF, jF - 2, kF + 1, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF + 1, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF + 1, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF, jF + 2, kF + 1, 1, 1, 1);
|
||||
const double t_kp2_y =
|
||||
fetch_sym_ord2_direct(alpn1, iF, jF - 2, kF + 2, 1, 1, 1)
|
||||
- 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF + 2, 1, 1, 1)
|
||||
+ 8.0 * fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF + 2, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF, jF + 2, kF + 2, 1, 1, 1);
|
||||
fyz_v = d_gp.Fdydz * (t_km2_y - 8.0 * t_km1_y + 8.0 * t_kp1_y - t_kp2_y);
|
||||
}
|
||||
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
|
||||
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
|
||||
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
|
||||
{
|
||||
const double c = fetch_sym_ord2_direct(alpn1, iF, jF, kF, 1, 1, 1);
|
||||
fxx_v = d_gp.Sdxdx * (
|
||||
fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF, 1, 1, 1)
|
||||
- 2.0 * c
|
||||
+ fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF, 1, 1, 1));
|
||||
fyy_v = d_gp.Sdydy * (
|
||||
fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF, 1, 1, 1)
|
||||
- 2.0 * c
|
||||
+ fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF, 1, 1, 1));
|
||||
fzz_v = d_gp.Sdzdz * (
|
||||
fetch_sym_ord2_direct(alpn1, iF, jF, kF - 1, 1, 1, 1)
|
||||
- 2.0 * c
|
||||
+ fetch_sym_ord2_direct(alpn1, iF, jF, kF + 1, 1, 1, 1));
|
||||
fxy_v = d_gp.Sdxdy * (
|
||||
fetch_sym_ord2_direct(alpn1, iF - 1, jF - 1, kF, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF + 1, jF - 1, kF, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF - 1, jF + 1, kF, 1, 1, 1)
|
||||
+ fetch_sym_ord2_direct(alpn1, iF + 1, jF + 1, kF, 1, 1, 1));
|
||||
fxz_v = d_gp.Sdxdz * (
|
||||
fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF - 1, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF - 1, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF - 1, jF, kF + 1, 1, 1, 1)
|
||||
+ fetch_sym_ord2_direct(alpn1, iF + 1, jF, kF + 1, 1, 1, 1));
|
||||
fyz_v = d_gp.Sdydz * (
|
||||
fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF - 1, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF - 1, 1, 1, 1)
|
||||
- fetch_sym_ord2_direct(alpn1, iF, jF - 1, kF + 1, 1, 1, 1)
|
||||
+ fetch_sym_ord2_direct(alpn1, iF, jF + 1, kF + 1, 1, 1, 1));
|
||||
}
|
||||
}
|
||||
|
||||
/* raised chi/chi */
|
||||
double gx=(uxx*cx+uxy*cy+uxz*cz)/c1;
|
||||
@@ -3728,17 +4073,17 @@ void kern_phase15_trK_Aij_gauge(
|
||||
Gyyz[i]-=(cz/c1-gyz[i]*gy)*H;
|
||||
Gzyz[i]-=(cy/c1-gyz[i]*gz)*H;
|
||||
|
||||
/* fxx..fzz correction: subtract Gamma*Lap_deriv */
|
||||
fxx[i]-=Gxxx[i]*lx+Gyxx[i]*ly+Gzxx[i]*lz;
|
||||
fyy[i]-=Gxyy[i]*lx+Gyyy[i]*ly+Gzyy[i]*lz;
|
||||
fzz[i]-=Gxzz[i]*lx+Gyzz[i]*ly+Gzzz[i]*lz;
|
||||
fxy[i]-=Gxxy[i]*lx+Gyxy[i]*ly+Gzxy[i]*lz;
|
||||
fxz[i]-=Gxxz[i]*lx+Gyxz[i]*ly+Gzxz[i]*lz;
|
||||
fyz[i]-=Gxyz_o[i]*lx+Gyyz[i]*ly+Gzyz[i]*lz;
|
||||
/* Lap second-derivative correction: subtract Gamma*Lap_deriv */
|
||||
fxx_v -= Gxxx[i]*lx+Gyxx[i]*ly+Gzxx[i]*lz;
|
||||
fyy_v -= Gxyy[i]*lx+Gyyy[i]*ly+Gzyy[i]*lz;
|
||||
fzz_v -= Gxzz[i]*lx+Gyzz[i]*ly+Gzzz[i]*lz;
|
||||
fxy_v -= Gxxy[i]*lx+Gyxy[i]*ly+Gzxy[i]*lz;
|
||||
fxz_v -= Gxxz[i]*lx+Gyxz[i]*ly+Gzxz[i]*lz;
|
||||
fyz_v -= Gxyz_o[i]*lx+Gyyz[i]*ly+Gzyz[i]*lz;
|
||||
|
||||
/* D^i D_i alpha */
|
||||
double DDA = uxx*fxx[i]+uyy*fyy[i]+uzz*fzz[i]
|
||||
+TWO*(uxy*fxy[i]+uxz*fxz[i]+uyz*fyz[i]);
|
||||
double DDA = uxx*fxx_v+uyy*fyy_v+uzz*fzz_v
|
||||
+TWO*(uxy*fxy_v+uxz*fxz_v+uyz*fyz_v);
|
||||
|
||||
/* trace of S_ij (physical) */
|
||||
double S_v = c1*(uxx*Sxx_m[i]+uyy*Syy_m[i]+uzz*Szz_m[i]
|
||||
@@ -3774,25 +4119,25 @@ void kern_phase15_trK_Aij_gauge(
|
||||
|
||||
/* f_arr = -(1/3) * (DDA + alpha/chi * (2/3*K^2 - AijAij - 16pi*rho + 8pi*S)) */
|
||||
double f_v = F2o3*trK_v*trK_v - AijAij - F16*PI_V*rho[i] + EIGHT*PI_V*S_v;
|
||||
f_arr[i] = -F1o3*(uxx*fxx[i]+uyy*fyy[i]+uzz*fzz[i]
|
||||
+TWO*(uxy*fxy[i]+uxz*fxz[i]+uyz*fyz[i])
|
||||
f_arr[i] = -F1o3*(uxx*fxx_v+uyy*fyy_v+uzz*fzz_v
|
||||
+TWO*(uxy*fxy_v+uxz*fxz_v+uyz*fyz_v)
|
||||
+(a/c1)*f_v);
|
||||
|
||||
/* fij = alpha*(Rij - 8pi*Sij) - D_iD_j alpha */
|
||||
double fxx_v=a*(Rxx[i]-EIGHT*PI_V*Sxx_m[i])-fxx[i];
|
||||
double fxy_v=a*(Rxy[i]-EIGHT*PI_V*Sxy_m[i])-fxy[i];
|
||||
double fxz_v=a*(Rxz[i]-EIGHT*PI_V*Sxz_m[i])-fxz[i];
|
||||
double fyy_v=a*(Ryy[i]-EIGHT*PI_V*Syy_m[i])-fyy[i];
|
||||
double fyz_v=a*(Ryz[i]-EIGHT*PI_V*Syz_m[i])-fyz[i];
|
||||
double fzz_v=a*(Rzz[i]-EIGHT*PI_V*Szz_m[i])-fzz[i];
|
||||
double fij_xx=a*(Rxx[i]-EIGHT*PI_V*Sxx_m[i])-fxx_v;
|
||||
double fij_xy=a*(Rxy[i]-EIGHT*PI_V*Sxy_m[i])-fxy_v;
|
||||
double fij_xz=a*(Rxz[i]-EIGHT*PI_V*Sxz_m[i])-fxz_v;
|
||||
double fij_yy=a*(Ryy[i]-EIGHT*PI_V*Syy_m[i])-fyy_v;
|
||||
double fij_yz=a*(Ryz[i]-EIGHT*PI_V*Syz_m[i])-fyz_v;
|
||||
double fij_zz=a*(Rzz[i]-EIGHT*PI_V*Szz_m[i])-fzz_v;
|
||||
|
||||
/* Aij_rhs = chi*(fij - gij*f) */
|
||||
Axx_rhs[i]=fxx_v-gxx[i]*f_arr[i];
|
||||
Ayy_rhs[i]=fyy_v-gyy[i]*f_arr[i];
|
||||
Azz_rhs[i]=fzz_v-gzz[i]*f_arr[i];
|
||||
Axy_rhs[i]=fxy_v-gxy[i]*f_arr[i];
|
||||
Axz_rhs[i]=fxz_v-gxz[i]*f_arr[i];
|
||||
Ayz_rhs[i]=fyz_v-gyz[i]*f_arr[i];
|
||||
Axx_rhs[i]=fij_xx-gxx[i]*f_arr[i];
|
||||
Ayy_rhs[i]=fij_yy-gyy[i]*f_arr[i];
|
||||
Azz_rhs[i]=fij_zz-gzz[i]*f_arr[i];
|
||||
Axy_rhs[i]=fij_xy-gxy[i]*f_arr[i];
|
||||
Axz_rhs[i]=fij_xz-gxz[i]*f_arr[i];
|
||||
Ayz_rhs[i]=fij_yz-gyz[i]*f_arr[i];
|
||||
|
||||
/* A_il A^l_j */
|
||||
double AA_xx=uxx*Axx[i]*Axx[i]+uyy*Axy[i]*Axy[i]+uzz*Axz[i]*Axz[i]
|
||||
@@ -4327,7 +4672,7 @@ static void launch_rhs_pipeline(int all, double eps, int co)
|
||||
src_fields, dst_fields, soa_signs, all);
|
||||
}
|
||||
|
||||
kern_phase11_ricci_diag<<<grid(all),BLK>>>(
|
||||
kern_phase11_ricci_fused<<<grid(all),BLK>>>(
|
||||
D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz),
|
||||
D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz),
|
||||
D(S_Gamxa),D(S_Gamya),D(S_Gamza),
|
||||
@@ -4343,25 +4688,8 @@ static void launch_rhs_pipeline(int all, double eps, int co)
|
||||
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
|
||||
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
|
||||
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
|
||||
D(S_Rxx),D(S_Ryy),D(S_Rzz));
|
||||
|
||||
kern_phase11_ricci_offdiag<<<grid(all),BLK>>>(
|
||||
D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz),
|
||||
D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz),
|
||||
D(S_Gamxa),D(S_Gamya),D(S_Gamza),
|
||||
D(S_Gamxx),D(S_Gamxy),D(S_Gamxz),
|
||||
D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t),
|
||||
D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t),
|
||||
D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz),
|
||||
D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz),
|
||||
D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz),
|
||||
D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz),
|
||||
D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz),
|
||||
D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz),
|
||||
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
|
||||
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
|
||||
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
|
||||
D(S_Rxy),D(S_Rxz),D(S_Ryz));
|
||||
D(S_Rxx),D(S_Rxy),D(S_Rxz),
|
||||
D(S_Ryy),D(S_Ryz),D(S_Rzz));
|
||||
|
||||
kern_phase12_13_chi_correction_fused<<<grid((size_t)all),BLK>>>(
|
||||
D(S_chi), D(S_chin1),
|
||||
@@ -4378,12 +4706,6 @@ static void launch_rhs_pipeline(int all, double eps, int co)
|
||||
D(S_Rxx), D(S_Rxy), D(S_Rxz),
|
||||
D(S_Ryy), D(S_Ryz), D(S_Rzz));
|
||||
|
||||
kern_phase14_lap_chi_derivs<<<grid((size_t)all),BLK>>>(
|
||||
D(S_Lap), D(S_chi),
|
||||
D(S_fxx), D(S_fxy), D(S_fxz),
|
||||
D(S_fyy), D(S_fyz), D(S_fzz),
|
||||
D(S_dtSfx_rhs), D(S_dtSfy_rhs), D(S_dtSfz_rhs));
|
||||
|
||||
kern_phase15_trK_Aij_gauge<<<grid(all),BLK>>>(
|
||||
D(S_alpn1), D(S_chin1),
|
||||
D(S_chix), D(S_chiy), D(S_chiz),
|
||||
@@ -4407,8 +4729,6 @@ static void launch_rhs_pipeline(int all, double eps, int co)
|
||||
D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz),
|
||||
D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz),
|
||||
D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz),
|
||||
D(S_fxx), D(S_fxy), D(S_fxz),
|
||||
D(S_fyy), D(S_fyz), D(S_fzz),
|
||||
D(S_dtSfx_rhs), D(S_dtSfy_rhs), D(S_dtSfz_rhs),
|
||||
D(S_trK_rhs),
|
||||
D(S_Axx_rhs), D(S_Axy_rhs), D(S_Axz_rhs),
|
||||
@@ -4961,8 +5281,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
src_fields, dst_fields, soa_signs, all);
|
||||
}
|
||||
|
||||
/* Phase 11: Ricci assembly (diagonal + off-diagonal) */
|
||||
kern_phase11_ricci_diag<<<grid(all),BLK>>>(
|
||||
/* Phase 11: fused Ricci assembly */
|
||||
kern_phase11_ricci_fused<<<grid(all),BLK>>>(
|
||||
D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz),
|
||||
D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz),
|
||||
D(S_Gamxa),D(S_Gamya),D(S_Gamza),
|
||||
@@ -4978,25 +5298,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
|
||||
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
|
||||
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
|
||||
D(S_Rxx),D(S_Ryy),D(S_Rzz));
|
||||
|
||||
kern_phase11_ricci_offdiag<<<grid(all),BLK>>>(
|
||||
D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz),
|
||||
D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz),
|
||||
D(S_Gamxa),D(S_Gamya),D(S_Gamza),
|
||||
D(S_Gamxx),D(S_Gamxy),D(S_Gamxz),
|
||||
D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t),
|
||||
D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t),
|
||||
D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz),
|
||||
D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz),
|
||||
D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz),
|
||||
D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz),
|
||||
D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz),
|
||||
D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz),
|
||||
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
|
||||
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
|
||||
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
|
||||
D(S_Rxy),D(S_Rxz),D(S_Ryz));
|
||||
D(S_Rxx),D(S_Rxy),D(S_Rxz),
|
||||
D(S_Ryy),D(S_Ryz),D(S_Rzz));
|
||||
|
||||
/* ============================================================ */
|
||||
/* Phase 12/13: chi fdderivs + chi correction */
|
||||
@@ -5017,16 +5320,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
D(S_Ryy), D(S_Ryz), D(S_Rzz));
|
||||
|
||||
/* ============================================================ */
|
||||
/* Phase 14: fdderivs(Lap) + fderivs(chi) */
|
||||
/* ============================================================ */
|
||||
kern_phase14_lap_chi_derivs<<<grid((size_t)all),BLK>>>(
|
||||
D(S_Lap), D(S_chi),
|
||||
D(S_fxx), D(S_fxy), D(S_fxz),
|
||||
D(S_fyy), D(S_fyz), D(S_fzz),
|
||||
D(S_dtSfx_rhs), D(S_dtSfy_rhs), D(S_dtSfz_rhs));
|
||||
|
||||
/* ============================================================ */
|
||||
/* Phase 15: trK_rhs, Aij_rhs, gauge */
|
||||
/* Phase 14/15: fused trK_rhs, Aij_rhs, gauge */
|
||||
/* ============================================================ */
|
||||
kern_phase15_trK_Aij_gauge<<<grid(all),BLK>>>(
|
||||
D(S_alpn1), D(S_chin1),
|
||||
@@ -5051,8 +5345,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz),
|
||||
D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz),
|
||||
D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz),
|
||||
D(S_fxx), D(S_fxy), D(S_fxz),
|
||||
D(S_fyy), D(S_fyz), D(S_fzz),
|
||||
D(S_dtSfx_rhs), D(S_dtSfy_rhs), D(S_dtSfz_rhs),
|
||||
D(S_trK_rhs),
|
||||
D(S_Axx_rhs), D(S_Axy_rhs), D(S_Axz_rhs),
|
||||
|
||||
Reference in New Issue
Block a user