compute div_beta on-the-fly to remove temp array
This commit is contained in:
@@ -201,7 +201,7 @@ enum {
|
|||||||
S_Gamyx, S_Gamyy_t, S_Gamyz_t,
|
S_Gamyx, S_Gamyy_t, S_Gamyz_t,
|
||||||
S_Gamzx, S_Gamzy, S_Gamzz_t,
|
S_Gamzx, S_Gamzy, S_Gamzz_t,
|
||||||
S_Kx, S_Ky, S_Kz,
|
S_Kx, S_Ky, S_Kz,
|
||||||
S_div_beta, S_S_arr, S_f_arr,
|
S_S_arr, S_f_arr,
|
||||||
S_fxx, S_fxy, S_fxz, S_fyy, S_fyz, S_fzz,
|
S_fxx, S_fxy, S_fxz, S_fyy, S_fyz, S_fzz,
|
||||||
S_Gamxa, S_Gamya, S_Gamza,
|
S_Gamxa, S_Gamya, S_Gamza,
|
||||||
S_gupxx, S_gupxy, S_gupxz,
|
S_gupxx, S_gupxy, S_gupxz,
|
||||||
@@ -959,7 +959,7 @@ __global__ void kern_phase1_prep(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Phase 2a: chi_rhs, gij_rhs, div_beta */
|
/* Phase 2a: chi_rhs, gij_rhs */
|
||||||
__global__ void kern_phase2_metric_rhs(
|
__global__ void kern_phase2_metric_rhs(
|
||||||
const double* __restrict__ alpn1, const double* __restrict__ chin1,
|
const double* __restrict__ alpn1, const double* __restrict__ chin1,
|
||||||
const double* __restrict__ gxx, const double* __restrict__ gxy,
|
const double* __restrict__ gxx, const double* __restrict__ gxy,
|
||||||
@@ -974,7 +974,6 @@ __global__ void kern_phase2_metric_rhs(
|
|||||||
const double* __restrict__ betayy, const double* __restrict__ betayz,
|
const double* __restrict__ betayy, const double* __restrict__ betayz,
|
||||||
const double* __restrict__ betazx, const double* __restrict__ betazy,
|
const double* __restrict__ betazx, const double* __restrict__ betazy,
|
||||||
const double* __restrict__ betazz,
|
const double* __restrict__ betazz,
|
||||||
double* __restrict__ div_beta,
|
|
||||||
double* __restrict__ chi_rhs, double* __restrict__ gxx_rhs,
|
double* __restrict__ chi_rhs, double* __restrict__ gxx_rhs,
|
||||||
double* __restrict__ gyy_rhs, double* __restrict__ gzz_rhs,
|
double* __restrict__ gyy_rhs, double* __restrict__ gzz_rhs,
|
||||||
double* __restrict__ gxy_rhs, double* __restrict__ gyz_rhs,
|
double* __restrict__ gxy_rhs, double* __restrict__ gyz_rhs,
|
||||||
@@ -983,7 +982,6 @@ __global__ void kern_phase2_metric_rhs(
|
|||||||
const double F2o3 = 2.0/3.0, F1o3 = 1.0/3.0, TWO = 2.0;
|
const double F2o3 = 2.0/3.0, F1o3 = 1.0/3.0, TWO = 2.0;
|
||||||
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) {
|
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) {
|
||||||
double db = betaxx[i] + betayy[i] + betazz[i];
|
double db = betaxx[i] + betayy[i] + betazz[i];
|
||||||
div_beta[i] = db;
|
|
||||||
chi_rhs[i] = F2o3 * chin1[i] * (alpn1[i] * trK[i] - db);
|
chi_rhs[i] = F2o3 * chin1[i] * (alpn1[i] * trK[i] - db);
|
||||||
gxx_rhs[i] = -TWO*alpn1[i]*Axx[i] - F2o3*gxx[i]*db
|
gxx_rhs[i] = -TWO*alpn1[i]*Axx[i] - F2o3*gxx[i]*db
|
||||||
+ TWO*(gxx[i]*betaxx[i] + gxy[i]*betayx[i] + gxz[i]*betazx[i]);
|
+ TWO*(gxx[i]*betaxx[i] + gxy[i]*betayx[i] + gxz[i]*betazx[i]);
|
||||||
@@ -1244,7 +1242,6 @@ void kern_phase8_gamma_rhs_part2(
|
|||||||
const double* __restrict__ gupxx, const double* __restrict__ gupxy,
|
const double* __restrict__ gupxx, const double* __restrict__ gupxy,
|
||||||
const double* __restrict__ gupxz, const double* __restrict__ gupyy,
|
const double* __restrict__ gupxz, const double* __restrict__ gupyy,
|
||||||
const double* __restrict__ gupyz, const double* __restrict__ gupzz,
|
const double* __restrict__ gupyz, const double* __restrict__ gupzz,
|
||||||
const double* __restrict__ div_beta,
|
|
||||||
/* fdderivs(betax) -> gxxx,gxyx,gxzx,gyyx,gyzx,gzzx */
|
/* fdderivs(betax) -> gxxx,gxyx,gxzx,gyyx,gyzx,gzzx */
|
||||||
const double* __restrict__ bxx_xx, const double* __restrict__ bxx_xy,
|
const double* __restrict__ bxx_xx, const double* __restrict__ bxx_xy,
|
||||||
const double* __restrict__ bxx_xz, const double* __restrict__ bxx_yy,
|
const double* __restrict__ bxx_xz, const double* __restrict__ bxx_yy,
|
||||||
@@ -1303,7 +1300,7 @@ void kern_phase8_gamma_rhs_part2(
|
|||||||
double Ga_z = uxx*Gzxx[i]+uyy*Gzyy[i]+uzz*Gzzz[i]
|
double Ga_z = uxx*Gzxx[i]+uyy*Gzyy[i]+uzz*Gzzz[i]
|
||||||
+TWO*(uxy*Gzxy[i]+uxz*Gzxz[i]+uyz*Gzyz[i]);
|
+TWO*(uxy*Gzxy[i]+uxz*Gzxz[i]+uyz*Gzyz[i]);
|
||||||
Gamxa_out[i]=Ga_x; Gamya_out[i]=Ga_y; Gamza_out[i]=Ga_z;
|
Gamxa_out[i]=Ga_x; Gamya_out[i]=Ga_y; Gamza_out[i]=Ga_z;
|
||||||
double db = div_beta[i];
|
double db = betaxx[i] + betayy[i] + betazz[i];
|
||||||
Gamx_rhs[i] += F2o3*Ga_x*db
|
Gamx_rhs[i] += F2o3*Ga_x*db
|
||||||
- Ga_x*betaxx[i] - Ga_y*betaxy[i] - Ga_z*betaxz[i]
|
- Ga_x*betaxx[i] - Ga_y*betaxy[i] - Ga_z*betaxz[i]
|
||||||
+ F1o3*(uxx*fxx_v+uxy*fxy_v+uxz*fxz_v)
|
+ F1o3*(uxx*fxx_v+uxy*fxy_v+uxz*fxz_v)
|
||||||
@@ -1738,7 +1735,6 @@ void kern_phase15_trK_Aij_gauge(
|
|||||||
const double* __restrict__ Ayz, const double* __restrict__ Azz,
|
const double* __restrict__ Ayz, const double* __restrict__ Azz,
|
||||||
const double* __restrict__ Lapx, const double* __restrict__ Lapy,
|
const double* __restrict__ Lapx, const double* __restrict__ Lapy,
|
||||||
const double* __restrict__ Lapz,
|
const double* __restrict__ Lapz,
|
||||||
const double* __restrict__ div_beta,
|
|
||||||
const double* __restrict__ betaxx, const double* __restrict__ betaxy,
|
const double* __restrict__ betaxx, const double* __restrict__ betaxy,
|
||||||
const double* __restrict__ betaxz, const double* __restrict__ betayx,
|
const double* __restrict__ betaxz, const double* __restrict__ betayx,
|
||||||
const double* __restrict__ betayy, const double* __restrict__ betayz,
|
const double* __restrict__ betayy, const double* __restrict__ betayz,
|
||||||
@@ -1857,7 +1853,7 @@ void kern_phase15_trK_Aij_gauge(
|
|||||||
+uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i])));
|
+uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i])));
|
||||||
|
|
||||||
double trK_v = trK[i];
|
double trK_v = trK[i];
|
||||||
double db = div_beta[i];
|
double db = betaxx[i] + betayy[i] + betazz[i];
|
||||||
|
|
||||||
/* trK_rhs step 1: store D^iD_i alpha * chin1 */
|
/* trK_rhs step 1: store D^iD_i alpha * chin1 */
|
||||||
trK_rhs[i] = c1 * DDA;
|
trK_rhs[i] = c1 * DDA;
|
||||||
@@ -2207,7 +2203,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
D(S_betaxx), D(S_betaxy), D(S_betaxz),
|
D(S_betaxx), D(S_betaxy), D(S_betaxz),
|
||||||
D(S_betayx), D(S_betayy), D(S_betayz),
|
D(S_betayx), D(S_betayy), D(S_betayz),
|
||||||
D(S_betazx), D(S_betazy), D(S_betazz),
|
D(S_betazx), D(S_betazy), D(S_betazz),
|
||||||
D(S_div_beta),
|
|
||||||
D(S_chi_rhs), D(S_gxx_rhs), D(S_gyy_rhs), D(S_gzz_rhs),
|
D(S_chi_rhs), D(S_gxx_rhs), D(S_gyy_rhs), D(S_gzz_rhs),
|
||||||
D(S_gxy_rhs), D(S_gyz_rhs), D(S_gxz_rhs));
|
D(S_gxy_rhs), D(S_gyz_rhs), D(S_gxz_rhs));
|
||||||
|
|
||||||
@@ -2283,7 +2278,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
kern_phase8_gamma_rhs_part2<<<grid(all),BLK>>>(
|
kern_phase8_gamma_rhs_part2<<<grid(all),BLK>>>(
|
||||||
D(S_gupxx), D(S_gupxy), D(S_gupxz),
|
D(S_gupxx), D(S_gupxy), D(S_gupxz),
|
||||||
D(S_gupyy), D(S_gupyz), D(S_gupzz),
|
D(S_gupyy), D(S_gupyz), D(S_gupzz),
|
||||||
D(S_div_beta),
|
|
||||||
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
|
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
|
||||||
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
|
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
|
||||||
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
|
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
|
||||||
@@ -2435,7 +2429,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
D(S_trK),
|
D(S_trK),
|
||||||
D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz),
|
D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz),
|
||||||
D(S_Lapx), D(S_Lapy), D(S_Lapz),
|
D(S_Lapx), D(S_Lapy), D(S_Lapz),
|
||||||
D(S_div_beta),
|
|
||||||
D(S_betaxx), D(S_betaxy), D(S_betaxz),
|
D(S_betaxx), D(S_betaxy), D(S_betaxz),
|
||||||
D(S_betayx), D(S_betayy), D(S_betayz),
|
D(S_betayx), D(S_betayy), D(S_betayz),
|
||||||
D(S_betazx), D(S_betazy), D(S_betazz),
|
D(S_betazx), D(S_betazy), D(S_betazz),
|
||||||
|
|||||||
Reference in New Issue
Block a user