From f754aa1ec203aefc5c85faf5fb21e4bb52e5306b Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Sun, 10 May 2026 13:52:48 +0800 Subject: [PATCH] Add Z4C Shell-Patch GPU acceleration (Phase 3 complete) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create z4c_gpu_rhs_ss.cu (reusing BSSN shell FD/chain-rule kernels): - Uploads trKd = trK + 2*TZ to GPU so existing BSSN algebraic kernels compute correct Z4C physical equations without modification - New kern_z4c_post applies TZ_rhs = alpn1 * Hcon / 2, kappa1/kappa2 constraint damping, TZ advection (lopsided), and dissipation (kodis) - Adds TZ/TZ_rhs to Meta struct, alloc/upload/download/free lifecycle Add cuda_compute_rhs_z4c_ss() wrapper in Z4c_class.C matching the Fortran f_compute_rhs_Z4c_ss signature, with #define redirection for Step/SHStep call sites and #undef before analysis functions. Add z4c_gpu_rhs_ss.o to ABE_CUDA_CFILES and build rule in makefile. Add kappa1_c/kappa2_c constants to gpu_rhsSS_mem.h. Build verified with USE_CUDA_Z4C=1 + WithShell — compiles and links cleanly. All three Shell GPU files now coexist: bssn_gpu_rhs_ss.o (BSSN), z4c_gpu_rhs_ss.o (Z4C), both sharing FD/chain-rule kernels. Co-Authored-By: Claude Opus 4.7 --- AMSS_NCKU_source/Z4c_class.C | 83 + AMSS_NCKU_source/bssn_gpu.h | 4 + AMSS_NCKU_source/gpu_rhsSS_mem.h | 4 + AMSS_NCKU_source/makefile | 6 +- AMSS_NCKU_source/z4c_gpu_rhs_ss.cu | 2565 ++++++++++++++++++++++++++++ 5 files changed, 2661 insertions(+), 1 deletion(-) create mode 100644 AMSS_NCKU_source/z4c_gpu_rhs_ss.cu diff --git a/AMSS_NCKU_source/Z4c_class.C b/AMSS_NCKU_source/Z4c_class.C index 1775761..a110f2a 100644 --- a/AMSS_NCKU_source/Z4c_class.C +++ b/AMSS_NCKU_source/Z4c_class.C @@ -38,6 +38,9 @@ using namespace std; #endif #if USE_CUDA_BSSN #include "bssn_rhs_cuda.h" +#ifdef WithShell +#include "bssn_gpu.h" +#endif #endif #ifdef With_AHF @@ -49,6 +52,81 @@ using namespace std; // Define Z4c_class +#if USE_CUDA_Z4C && (ABEtype == 2) && defined(WithShell) +// GPU-accelerated Z4C shell RHS: same parameter signature as f_compute_rhs_Z4c_ss. +// Internally calls gpu_rhs_z4c_ss which modifies trK→trKd before upload, +// runs BSSN algebraic kernels, then applies Z4C post-processing (TZ_rhs, damping). +extern "C" { +static int cuda_compute_rhs_z4c_ss( + int *ex, double &T, double *crho, double *sigma, double *R, + double *X, double *Y, double *Z, + double *drhodx, double *drhody, double *drhodz, + double *dsigmadx, double *dsigmady, double *dsigmadz, + double *dRdx, double *dRdy, double *dRdz, + double *drhodxx, double *drhodxy, double *drhodxz, double *drhodyy, double *drhodyz, double *drhodzz, + double *dsigmadxx, double *dsigmadxy, double *dsigmadxz, double *dsigmadyy, double *dsigmadyz, double *dsigmadzz, + double *dRdxx, double *dRdxy, double *dRdxz, double *dRdyy, double *dRdyz, double *dRdzz, + double *chi, double *trK, + double *gxx, double *gxy, double *gxz, double *gyy, double *gyz, double *gzz, + double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, + double *Gamx, double *Gamy, double *Gamz, + double *Lap, double *betax, double *betay, double *betaz, + double *dtSfx, double *dtSfy, double *dtSfz, + double *TZ, + double *chi_rhs, double *trK_rhs, + double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, + double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, + double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, + double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, + double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, + double *TZ_rhs, + double *rho_mat, double *Sx, double *Sy, double *Sz, + double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, + double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, + double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, + double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, + double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, + double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, + double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, + int &Symmetry, int &Lev, double &eps, int &sst, int &co) +{ + return gpu_rhs_z4c_ss(0, 0, // calledby=ABE_main, mpi_rank=device_0 + ex, T, crho, sigma, R, X, Y, Z, + drhodx, drhody, drhodz, + dsigmadx, dsigmady, dsigmadz, + dRdx, dRdy, dRdz, + drhodxx, drhodxy, drhodxz, drhodyy, drhodyz, drhodzz, + dsigmadxx, dsigmadxy, dsigmadxz, dsigmadyy, dsigmadyz, dsigmadzz, + dRdxx, dRdxy, dRdxz, dRdyy, dRdyz, dRdzz, + chi, trK, + gxx, gxy, gxz, gyy, gyz, gzz, + Axx, Axy, Axz, Ayy, Ayz, Azz, + Gamx, Gamy, Gamz, + Lap, betax, betay, betaz, + dtSfx, dtSfy, dtSfz, + TZ, + chi_rhs, trK_rhs, + gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs, + Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs, + Gamx_rhs, Gamy_rhs, Gamz_rhs, + Lap_rhs, betax_rhs, betay_rhs, betaz_rhs, + dtSfx_rhs, dtSfy_rhs, dtSfz_rhs, + TZ_rhs, + rho_mat, Sx, Sy, Sz, + Sxx, Sxy, Sxz, Syy, Syz, Szz, + Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz, + Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz, + Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz, + Rxx, Rxy, Rxz, Ryy, Ryz, Rzz, + ham_Res, movx_Res, movy_Res, movz_Res, + Gmx_Res, Gmy_Res, Gmz_Res, + Symmetry, Lev, eps, sst, co); +} +} +// Redirect all Z4C shell RHS calls in Step/SHStep to GPU +#define f_compute_rhs_Z4c_ss cuda_compute_rhs_z4c_ss +#endif + // This class inherits some members and methods from the parent `bssn_class` and modifies others. // The modified members and methods are defined below (and in the header Z4c_class.h). // The remaining members/methods are inherited from `bssn_class` (declared in bssn_class.h). @@ -3005,6 +3083,11 @@ void Z4c_class::Check_extrop() //================================================================================================ +#if USE_CUDA_Z4C && (ABEtype == 2) && defined(WithShell) +#undef f_compute_rhs_Z4c_ss +#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_ +#endif + // this member function is used to compute and output constraint violation //================================================================================================ diff --git a/AMSS_NCKU_source/bssn_gpu.h b/AMSS_NCKU_source/bssn_gpu.h index 97d7c74..ae2fc0d 100644 --- a/AMSS_NCKU_source/bssn_gpu.h +++ b/AMSS_NCKU_source/bssn_gpu.h @@ -49,4 +49,8 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T, int gpu_rhs_ss(RHS_SS_PARA); +#define Z4C_SS_PARA int calledby, int mpi_rank, int *ex, double &T, double *crho, double *sigma, double *R, double *X, double *Y, double *Z, double *drhodx, double *drhody, double *drhodz, double *dsigmadx, double *dsigmady, double *dsigmadz, double *dRdx, double *dRdy, double *dRdz, double *drhodxx, double *drhodxy, double *drhodxz, double *drhodyy, double *drhodyz, double *drhodzz, double *dsigmadxx, double *dsigmadxy, double *dsigmadxz, double *dsigmadyy, double *dsigmadyz, double *dsigmadzz, double *dRdxx, double *dRdxy, double *dRdxz, double *dRdyy, double *dRdyz, double *dRdzz, double *chi, double *trK, double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, double *Gamx, double *Gamy, double *Gamz, double *Lap, double *betax, double *betay, double *betaz, double *dtSfx, double *dtSfy, double *dtSfz, double *TZ, double *chi_rhs, double *trK_rhs, double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, double *TZ_rhs, double *rho, double *Sx, double *Sy, double *Sz, double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, int &Symmetry, int &Lev, double &eps, int &sst, int &co + +int gpu_rhs_z4c_ss(Z4C_SS_PARA); + #endif diff --git a/AMSS_NCKU_source/gpu_rhsSS_mem.h b/AMSS_NCKU_source/gpu_rhsSS_mem.h index c2b4c2b..6c2c040 100644 --- a/AMSS_NCKU_source/gpu_rhsSS_mem.h +++ b/AMSS_NCKU_source/gpu_rhsSS_mem.h @@ -48,6 +48,8 @@ struct Meta double * Gamx_rhs,*Gamy_rhs,*Gamz_rhs;//out double * Lap_rhs, *betax_rhs, *betay_rhs, *betaz_rhs;//out double * dtSfx_rhs,*dtSfy_rhs,*dtSfz_rhs;//out + double * TZ; //in (Z4C) + double * TZ_rhs; //out (Z4C) double * rho,*Sx,*Sy,*Sz ; //in double * Sxx,*Sxy,*Sxz,*Syy,*Syz,*Szz; //in @@ -132,6 +134,8 @@ __constant__ double SYM = 1.0; __constant__ double ANTI = -1.0; __constant__ double FF = 0.75; __constant__ double eta = 2.0; +__constant__ double kappa1_c = 0.02; +__constant__ double kappa2_c = 0.0; __constant__ double F1o3; __constant__ double F2o3; __constant__ double F3o2 = 1.5; diff --git a/AMSS_NCKU_source/makefile b/AMSS_NCKU_source/makefile index b3f6914..d777710 100644 --- a/AMSS_NCKU_source/makefile +++ b/AMSS_NCKU_source/makefile @@ -74,6 +74,10 @@ bssn_rhs_cuda.o: bssn_rhs_cuda.cu bssn_rhs.h macrodef.h fd_cuda_helpers.cuh bssn_gpu_rhs_ss.o: bssn_gpu_rhs_ss.cu bssn_gpu.h gpu_rhsSS_mem.h bssn_macro.h macrodef.fh $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) +# CUDA rewrite of Z4C Shell-Patch RHS (extends BSSN shells with TZ + constraint damping) +z4c_gpu_rhs_ss.o: z4c_gpu_rhs_ss.cu bssn_gpu.h gpu_rhsSS_mem.h bssn_macro.h macrodef.fh + $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) + # CUDA rewrite of Z4C Cartesian RHS z4c_rhs_cuda.o: z4c_rhs_cuda.cu z4c_rhs_cuda.h bssn_rhs.h macrodef.h ricci_gamma.h fd_cuda_helpers.cuh $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) @@ -159,7 +163,7 @@ RK4_F90_OBJ = rungekutta4_rout.o endif CFILES += $(RK4_C_OBJ) -ABE_CUDA_CFILES = $(CFILES_CUDA_BSSN) z4c_rhs_cuda.o $(RK4_C_OBJ) +ABE_CUDA_CFILES = $(CFILES_CUDA_BSSN) z4c_rhs_cuda.o z4c_gpu_rhs_ss.o $(RK4_C_OBJ) ABE_LDLIBS = $(LDLIBS) ifeq ($(USE_CUDA_BSSN),1) diff --git a/AMSS_NCKU_source/z4c_gpu_rhs_ss.cu b/AMSS_NCKU_source/z4c_gpu_rhs_ss.cu new file mode 100644 index 0000000..f008a2a --- /dev/null +++ b/AMSS_NCKU_source/z4c_gpu_rhs_ss.cu @@ -0,0 +1,2565 @@ +// includes, system +#include +#include +#include +#include +#include +#include +#include +//#include "cutil.h" +#ifdef RESULT_CHECK +#include +#endif +using namespace std; + +//includes, bssn +#include "gpu_rhsSS_mem.h" +#include "bssn_gpu.h" + +#ifdef WithShell + +__device__ volatile unsigned int global_count = 0; + +#ifdef RESULT_CHECK +void compare_result_gpu(int ftag1,double * datac,int data_num){ + double * data = (double*)malloc(sizeof(double)*data_num); + cudaMemcpy(data, datac, data_num * sizeof(double), cudaMemcpyDeviceToHost); + compare_result(ftag1,data,data_num); + free(data); +} +#endif + +__global__ void sub_symmetry_bd_ss_partF(int ord, double * func, double *funcc) +{ + int curr = blockIdx.x*blockDim.x+threadIdx.x; + int ps; //TOTRY: i,j,k; double value; + + while(curr < _3D_SIZE[0]) + { + int k = curr / _2D_SIZE[0]; + ps = curr - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; + int j = ps / ex_c[0]; + int i = ps - (j * ex_c[0]); //= ps % ex_c[0]; + + funcc[i+ ord + (ord +j)* _1D_SIZE[ord] + k * _2D_SIZE[ord]] = func[curr]; + + curr += STEP_SIZE; + } +} + +#ifdef Vertex +__global__ void sub_symmetry_bd_ss_partI(int ord, double * func, double * funcc,double S1){ + //for i + int curr = blockIdx.x*blockDim.x+threadIdx.x; + int ps,ps2; + int m; + while(curr < (ex_c[1]+ord*2)*ex_c[2] ){ + m = ord * 2; + ps = curr * _1D_SIZE[ord]; + ps2 = ps + _1D_SIZE[ord] - 1; + for(int i = 0;i < ord; ++i){ + //funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) + + //funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-1-i,1:extc(2),1:extc(3))*SoA(1) + funcc[ps] = funcc [ps + m] * S1; + funcc[ps2] = funcc[ps2 - m] * S1; + ps ++; + ps2 --; + m -= 2; + } + curr+= STEP_SIZE; + } + __syncthreads(); +} +__global__ void sub_symmetry_bd_ss_partJ(int ord,double * func, double * funcc,double S2){ + //for j + int curr = blockIdx.x*blockDim.x+threadIdx.x; + int ps,ps2; + int m; + + while(curr < (ex_c[0]+ord*2)*ex_c[2]) + { + m = (2 * ord) * _1D_SIZE[ord]; + ps = (curr/_1D_SIZE[ord])*_2D_SIZE[ord] + (curr % _1D_SIZE[ord]); + //noticed that length_j == length_i, + //in other words, (ex[2]+ord*2) == (ex[2]+ord*2) == 1D_size[ord] + //so here we use "(_1D_SIZE[ord] - 1)" instead of "(ex[2]+ord*2) - 1" + ps2 = ps + (_1D_SIZE[ord] - 1) * _1D_SIZE[ord]; + for(int i = 0;i>>(ord,func,funcc); + cudaDeviceSynchronize(); + sub_symmetry_bd_ss_partI<<>>(ord,func,funcc,SoA[0]); + cudaDeviceSynchronize(); + sub_symmetry_bd_ss_partJ<<>>(ord,func,funcc,SoA[1]); + cudaDeviceSynchronize(); +} + +__global__ void sub_fderivs_shc_part1(double *fx,double *fy,double *fz){ + int tid = blockIdx.x*blockDim.x+threadIdx.x; + int t_ = tid; + while(t_ < _3D_SIZE[0]) + { + fx[t_] = Ms_ dRdx[t_] * Ms_ gz[t_] + Ms_ drhodx[t_] * Ms_ gx[t_] + Ms_ dsigmadx[t_] * Ms_ gy[t_]; + + fy[t_] = Ms_ dRdy[t_] * Ms_ gz[t_] + Ms_ drhody[t_] * Ms_ gx[t_] + Ms_ dsigmady[t_] * Ms_ gy[t_]; + + fz[t_] = Ms_ dRdz[t_] * Ms_ gz[t_] + Ms_ drhodz[t_] * Ms_ gx[t_] + Ms_ dsigmadz[t_] * Ms_ gy[t_]; + + t_ += STEP_SIZE; + } +} + +__global__ void sub_fderivs_sh(double * fh,double *fx,double *fy,double *fz ) +{ + int curr = blockIdx.x*blockDim.x+threadIdx.x; + int ps; //TOTRY: i,j,k; double value; + + while(curr < _3D_SIZE[0]) + { + int k = curr / _2D_SIZE[0]; + ps = curr - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; + int j = ps / ex_c[0]; + int i = ps - (j * ex_c[0]); + + if(k == ex_c[2] || i == ex_c[0] || j == ex_c[1]){ + curr += STEP_SIZE; + continue; + } + + //X-- + if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0]) + fx[curr] = d12dxyz[0]*(fh[i+(j+2)*_1D_SIZE[2]+(k)*_2D_SIZE[2]] - + 8*fh[i+1+(j+2)*_1D_SIZE[2]+(k)*_2D_SIZE[2]] + + 8*fh[i+3+(j+2)*_1D_SIZE[2]+(k)*_2D_SIZE[2]] - + fh[i+4+(j+2)*_1D_SIZE[2]+(k)*_2D_SIZE[2]] ); + + else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0]) + fx[curr] = d2dxyz[0]*(-fh[i+1+(j+2)*_1D_SIZE[2]+(k)*_2D_SIZE[2]] + + fh[i+3+(j+2)*_1D_SIZE[2]+(k)*_2D_SIZE[2]] ); + + //Y-- + if(j+2 <= ijk_max[1] && j-2 >= ijk_min[1]) + fy[curr]=d12dxyz[1]*(fh[i+2+j*_1D_SIZE[2]+(k)*_2D_SIZE[2]]- + 8*fh[i+2+(j+1)*_1D_SIZE[2]+(k)*_2D_SIZE[2]] + + 8*fh[i+2+(j+3)*_1D_SIZE[2]+(k)*_2D_SIZE[2]] - + fh[i+2+(j+4)*_1D_SIZE[2]+(k)*_2D_SIZE[2]]); + + else if(j+1 <= ijk_max[1] && j-1 >= ijk_min[1]) + fy[curr]=d2dxyz[1]*(-fh[i+2+(j+1)*_1D_SIZE[2]+(k)*_2D_SIZE[2]] + + fh[i+2+(j+3)*_1D_SIZE[2]+(k)*_2D_SIZE[2]]); + //Z-- + + if(k+2 <= ijk_max[2] && k-2 >= ijk_min[2]) + fz[curr]=d12dxyz[2]*( fh[i+2+(j+2)*_1D_SIZE[2]+(k-2) *_2D_SIZE[2]] - + 8* fh[i+2+(j+2)*_1D_SIZE[2]+(k-1)*_2D_SIZE[2]] + + 8* fh[i+2+(j+2)*_1D_SIZE[2]+(k+1)*_2D_SIZE[2]] - + fh[i+2+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]]); + + else if(k+1 <= ijk_max[2] && k-1 >= ijk_min[2]) + fz[curr]=d2dxyz[2]*(-fh[i+2+(j+2)*_1D_SIZE[2]+(k-1)*_2D_SIZE[2]]+ + fh[i+2+(j+2)*_1D_SIZE[2]+(k+1)*_2D_SIZE[2]]); + + curr += STEP_SIZE; + } +} +inline void sub_fderivs_shc(int& sst,double * f,double * fh,double *fx,double *fy,double *fz, double * SoA) +{ + double SoA1[2]; + if(sst == 0){ + SoA1[0] = SoA[0]; + SoA1[1] = SoA[1]; + } + else if(sst == 2 || sst == 3 ){ + SoA1[0] = SoA[1]; + SoA1[1] = SoA[2]; + } + else if(sst == 4 || sst==5){ + SoA1[0] = SoA[0]; + SoA1[1] = SoA[2]; + } + //cudaMemset(Msh_ gx,0,h_3D_SIZE[0] * sizeof(double)); + //cudaMemset(Msh_ gy,0,h_3D_SIZE[0] * sizeof(double)); + //cudaMemset(Msh_ gz,0,h_3D_SIZE[0] * sizeof(double)); + sub_symmetry_bd_ss(2,f,fh,SoA1); + cudaDeviceSynchronize(); + //compare_result_gpu(0,fh,h_3D_SIZE[2]); + sub_fderivs_sh<<>>(fh,Msh_ gx,Msh_ gy,Msh_ gz); + cudaDeviceSynchronize(); + + sub_fderivs_shc_part1<<>>(fx,fy,fz); + cudaDeviceSynchronize(); + //compare_result_gpu(1,fx,h_3D_SIZE[0]); + //compare_result_gpu(2,fy,h_3D_SIZE[0]); + //compare_result_gpu(3,fz,h_3D_SIZE[0]); +} +__global__ void compute_rhs_ss_part1() +{ + int tid = blockIdx.x*blockDim.x+threadIdx.x; + int t_ = tid; + while(t_ < _3D_SIZE[0]) + { + metac.alpn1[t_] = metac.Lap[t_] + 1; + metac.chin1[t_] = metac.chi[t_] + 1; + metac.gxx[t_] = metac.dxx[t_] + 1; + metac.gyy[t_] = metac.dyy[t_] + 1; + metac.gzz[t_] = metac.dzz[t_] + 1; + + t_ += STEP_SIZE; + } +} + +__global__ void sub_fdderivs_shc_part1(double *fxx,double *fxy,double *fxz,double *fyy,double *fyz,double *fzz) +{ + int tid = blockIdx.x*blockDim.x+threadIdx.x; + int t_ = tid; + while(t_ < _3D_SIZE[0]) + { + fxx[t_] = Ms_ dRdxx[t_] * Ms_ gz[t_] + Ms_ drhodxx[t_] * Ms_ gx[t_] + Ms_ dsigmadxx[t_] * Ms_ gy[t_] + + + Ms_ dRdx[t_] * Ms_ dRdx[t_] * Ms_ gzz[t_] + Ms_ drhodx[t_] * Ms_ drhodx[t_] * Ms_ gxx[t_] + Ms_ dsigmadx[t_] * Ms_ dsigmadx[t_] * Ms_ gyy[t_] + + + 2 * (Ms_ dRdx[t_] * Ms_ drhodx[t_] * Ms_ gxz[t_] + Ms_ dRdx[t_] * Ms_ dsigmadx[t_] * Ms_ gyz[t_] + Ms_ drhodx[t_] * Ms_ dsigmadx[t_] * Ms_ gxy[t_]); + + + fyy[t_] = Ms_ dRdyy[t_] * Ms_ gz[t_] + Ms_ drhodyy[t_] * Ms_ gx[t_] + Ms_ dsigmadyy[t_] * Ms_ gy[t_] + + + Ms_ dRdy[t_] * Ms_ dRdy[t_] * Ms_ gzz[t_] + Ms_ drhody[t_] * Ms_ drhody[t_] * Ms_ gxx[t_] + Ms_ dsigmady[t_] * Ms_ dsigmady[t_] * Ms_ gyy[t_] + + + 2 * (Ms_ dRdy[t_] * Ms_ drhody[t_] * Ms_ gxz[t_] + Ms_ dRdy[t_] * Ms_ dsigmady[t_] * Ms_ gyz[t_] + Ms_ drhody[t_] * Ms_ dsigmady[t_] * Ms_ gxy[t_]); + + + fzz[t_] = Ms_ dRdzz[t_] * Ms_ gz[t_] + Ms_ drhodzz[t_] * Ms_ gx[t_] + Ms_ dsigmadzz[t_] * Ms_ gy[t_] + + + Ms_ dRdz[t_] * Ms_ dRdz[t_] * Ms_ gzz[t_] + Ms_ drhodz[t_] * Ms_ drhodz[t_] * Ms_ gxx[t_] + Ms_ dsigmadz[t_] * Ms_ dsigmadz[t_] * Ms_ gyy[t_] + + + 2 * (Ms_ dRdz[t_] * Ms_ drhodz[t_] * Ms_ gxz[t_] + Ms_ dRdz[t_] * Ms_ dsigmadz[t_] * Ms_ gyz[t_] + Ms_ drhodz[t_] * Ms_ dsigmadz[t_] * Ms_ gxy[t_]); + + + fxy[t_] = Ms_ dRdxy[t_] * Ms_ gz[t_] + Ms_ drhodxy[t_] * Ms_ gx[t_] + Ms_ dsigmadxy[t_] * Ms_ gy[t_] + + + Ms_ dRdx[t_] * Ms_ drhody[t_] * Ms_ gxz[t_] + Ms_ dRdx[t_] * Ms_ dsigmady[t_] * Ms_ gyz[t_] + Ms_ drhodx[t_] * Ms_ dsigmady[t_] * Ms_ gxy[t_] + + + Ms_ dRdy[t_] * Ms_ drhodx[t_] * Ms_ gxz[t_] + Ms_ dRdy[t_] * Ms_ dsigmadx[t_] * Ms_ gyz[t_] + Ms_ drhody[t_] * Ms_ dsigmadx[t_] * Ms_ gxy[t_] + + + Ms_ dRdx[t_] * Ms_ dRdy[t_] * Ms_ gzz[t_] + Ms_ drhodx[t_] * Ms_ drhody[t_] * Ms_ gxx[t_] + Ms_ dsigmadx[t_] * Ms_ dsigmady[t_] * Ms_ gyy[t_]; + + + fxz[t_] = Ms_ dRdxz[t_] * Ms_ gz[t_] + Ms_ drhodxz[t_] * Ms_ gx[t_] + Ms_ dsigmadxz[t_] * Ms_ gy[t_] + + + Ms_ dRdx[t_] * Ms_ drhodz[t_] * Ms_ gxz[t_] + Ms_ dRdx[t_] * Ms_ dsigmadz[t_] * Ms_ gyz[t_] + Ms_ drhodx[t_] * Ms_ dsigmadz[t_] * Ms_ gxy[t_] + + + Ms_ dRdz[t_] * Ms_ drhodx[t_] * Ms_ gxz[t_] + Ms_ dRdz[t_] * Ms_ dsigmadx[t_] * Ms_ gyz[t_] + Ms_ drhodz[t_] * Ms_ dsigmadx[t_] * Ms_ gxy[t_] + + + Ms_ dRdx[t_] * Ms_ dRdz[t_] * Ms_ gzz[t_] + Ms_ drhodx[t_] * Ms_ drhodz[t_] * Ms_ gxx[t_] + Ms_ dsigmadx[t_] * Ms_ dsigmadz[t_] * Ms_ gyy[t_]; + + + fyz[t_] = Ms_ dRdyz[t_] * Ms_ gz[t_] + Ms_ drhodyz[t_] * Ms_ gx[t_] + Ms_ dsigmadyz[t_] * Ms_ gy[t_] + + + Ms_ dRdz[t_] * Ms_ drhody[t_] * Ms_ gxz[t_] + Ms_ dRdz[t_] * Ms_ dsigmady[t_] * Ms_ gyz[t_] + Ms_ drhodz[t_] * Ms_ dsigmady[t_] * Ms_ gxy[t_] + + + Ms_ dRdy[t_] * Ms_ drhodz[t_] * Ms_ gxz[t_] + Ms_ dRdy[t_] * Ms_ dsigmadz[t_] * Ms_ gyz[t_] + Ms_ drhody[t_] * Ms_ dsigmadz[t_] * Ms_ gxy[t_] + + + Ms_ dRdz[t_] * Ms_ dRdy[t_] * Ms_ gzz[t_] + Ms_ drhodz[t_] * Ms_ drhody[t_] * Ms_ gxx[t_] + Ms_ dsigmadz[t_] * Ms_ dsigmady[t_] * Ms_ gyy[t_]; + + t_ += STEP_SIZE; + } +} + +__global__ void sub_fdderivs_sh(double *fh,double *fxx,double *fxy,double *fxz,double *fyy,double *fyz,double *fzz) + { + int curr = blockIdx.x*blockDim.x+threadIdx.x; + int ps; //TOTRY: i,j,k; double value; + + while(curr < _3D_SIZE[0]) + { + int k = curr / _2D_SIZE[0]; + ps = curr - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; + int j = ps / ex_c[0]; + int i = ps - (j * ex_c[0]); + + if(k == ex_c[2] || i == ex_c[0] || j == ex_c[1]){ + curr += STEP_SIZE; + continue; + } + else + { + //xx + if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0]){ + fxx[curr] = Fdxdx*(-_FH2_(i,(j+2),(k))+16*_FH2_((i+1),(j+2),(k))-30*_FH2_((i+2),(j+2),(k)) + -_FH2_((i+4),(j+2),(k))+16*_FH2_((i+3),(j+2),(k)) ); + + } + else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0]){ + fxx[curr] = Sdxdx*(_FH2_((i+1),(j+2),(k))-2*_FH2_((i+2),(j+2),(k)) + +_FH2_(i+3,(j+2),(k)) ); + } + + + + //zz-- + if(k+2 <= ijk_max[2] && k-2 >= ijk_min[2]){ + fzz[curr] = Fdzdz * (-_FH2_((i+2),(j+2),(k-2)) + 16 *_FH2_((i+2),(j+2),(k-1))- 30*_FH2_((i+2),(j+2),(k)) + -_FH2_((i+2),(j+2),(k+2))+ 16*_FH2_((i+2),(j+2),(k+1))); + + } + else if(k+1 <= ijk_max[2] && k-1 >= ijk_min[2]){ + fzz[curr] = Sdzdz*(_FH2_((i+2),(j+2),(k-1))- 2 * _FH2_((i+2),(j+2),(k)) + + _FH2_((i+2),(j+2),(k+1)) ); + + //fzz[curr] = 256; + } + + //yy-- + if(j+2 <= ijk_max[1] && j-2 >= ijk_min[1]){ + fyy[curr] = Fdydy*(-_FH2_((i+2),j,(k))+16*_FH2_((i+2),(j+1),(k))-30*_FH2_((i+2),(j+2),(k)) + -_FH2_((i+2),(j+4),(k))+16*_FH2_((i+2),(j+3),(k)) ); + } + else if(j+1 <= ijk_max[1] && j-1 >= ijk_min[1]){ + fyy[curr] = Sdydy*(_FH2_((i+2),(j+1),(k))-2*_FH2_((i+2),(j+2),(k)) + +_FH2_((i+2),(j+3),(k)) ); + } + + + + //xy + if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0] && j+2 <= ijk_max[1] && j-2 >= ijk_min[1]) + fxy[curr] = Fdxdy*((_FH2_(i,j,(k))-8*_FH2_((i+1),j,(k))+8*_FH2_((i+3),j,(k))-_FH2_((i+4),j,(k))) + -8 *(_FH2_(i,(j+1),(k))-8*_FH2_((i+1),(j+1),(k))+8*_FH2_((i+3),(j+1),(k))-_FH2_((i+4),(j+1),(k))) + +8 *(_FH2_(i,(j+3),(k))-8*_FH2_((i+1),(j+3),(k))+8*_FH2_((i+3),(j+3),(k))-_FH2_((i+4),(j+3),(k))) + - (_FH2_(i,(j+4),(k))-8*_FH2_((i+1),(j+4),(k))+8*_FH2_((i+3),(j+4),(k))-_FH2_((i+4),(j+4),(k)))); + + else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0] && j+1 <= ijk_max[1] && j-1 >= ijk_min[1]) + + fxy[curr] = Sdxdy*(_FH2_((i+1),(j+1),(k))-_FH2_((i+3),(j+1),(k))-_FH2_((i+1),(j+3),(k))+_FH2_((i+3),(j+3),(k))); + //xz + if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0] && k+2 <= ijk_max[2] && k-2 >= ijk_min[2]) + fxz[curr] = Fdxdz*((_FH2_(i,(j+2),(k-2))-8*_FH2_((i+1),(j+2),(k-2))+8*_FH2_((i+3),(j+2),(k-2))-_FH2_((i+4),(j+2),(k-2))) + -8 *(_FH2_(i,(j+2),(k-1))-8*_FH2_((i+1),(j+2),(k-1))+8*_FH2_((i+3),(j+2),(k-1))-_FH2_((i+4),(j+2),(k-1))) + +8 *(_FH2_(i,(j+2),(k+1))-8*_FH2_((i+1),(j+2),(k+1))+8*_FH2_((i+3),(j+2),(k+1))-_FH2_((i+4),(j+2),(k+1))) + - (_FH2_(i,(j+2),(k+2))-8*_FH2_((i+1),(j+2),(k+2))+8*_FH2_((i+3),(j+2),(k+2))-_FH2_((i+4),(j+2),(k+2)))); + + else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0] && k+1 <= ijk_max[2] && k-1 >= ijk_min[2]) + fxz[curr] = Sdxdz*(_FH2_((i+1),(j+2),(k-1))-_FH2_((i+3),(j+2),(k-1))-_FH2_((i+1),(j+2),(k+1))+_FH2_((i+3),(j+2),(k+1))); + //yz + if(j+2 <= ijk_max[1] && j-2 >= ijk_min[1] && k+2 <= ijk_max[2] && k-2 >= ijk_min[2]) + fyz[curr] = Fdydz*( (_FH2_((i+2),j,(k-2))-8*_FH2_((i+2),(j+1),(k-2))+8*_FH2_((i+2),(j+3),(k-2))-_FH2_((i+2),(j+4),(k-2))) + -8 *(_FH2_((i+2),j,(k-1))-8*_FH2_((i+2),(j+1),(k-1))+8*_FH2_((i+2),(j+3),(k-1))-_FH2_((i+2),(j+4),(k-1))) + +8 *(_FH2_((i+2),j,(k+1))-8*_FH2_((i+2),(j+1),(k+1))+8*_FH2_((i+2),(j+3),(k+1))-_FH2_((i+2),(j+4),(k+1))) + - (_FH2_((i+2),j,(k+2))-8*_FH2_((i+2),(j+1),(k+2))+8*_FH2_((i+2),(j+3),(k+2))-_FH2_((i+2),(j+4),(k+2)))); + + else if(j+1 <= ijk_max[1] && j-1 >= ijk_min[1] && k+1 <= ijk_max[2] && k-1 >= ijk_min[2]) + fyz[curr] = Sdydz*(_FH2_((i+2),(j+1),(k-1))-_FH2_((i+2),(j+3),(k-1))-_FH2_((i+2),(j+1),(k+1))+_FH2_((i+2),(j+3),(k+1))); + + curr += STEP_SIZE; + } + } + __syncthreads(); + } + +inline void sub_fdderivs_shc(int& sst,double * f,double * fh, + double * fxx,double * fxy,double * fxz, + double * fyy,double * fyz,double * fzz, double * SoA) +{ + double SoA1[2]; + if(sst == 0){ + SoA1[0] = SoA[0]; + SoA1[1] = SoA[1]; + } + else if(sst == 2 || sst == 3 ){ + SoA1[0] = SoA[1]; + SoA1[1] = SoA[2]; + } + else if(sst == 4 || sst==5){ + SoA1[0] = SoA[0]; + SoA1[1] = SoA[2]; + } + cudaMemset(Msh_ gx,0,h_3D_SIZE[0] * sizeof(double)); + cudaMemset(Msh_ gy,0,h_3D_SIZE[0] * sizeof(double)); + cudaMemset(Msh_ gz,0,h_3D_SIZE[0] * sizeof(double)); + cudaMemset(Msh_ gxx,0,h_3D_SIZE[0] * sizeof(double)); + cudaMemset(Msh_ gxy,0,h_3D_SIZE[0] * sizeof(double)); + cudaMemset(Msh_ gxz,0,h_3D_SIZE[0] * sizeof(double)); + cudaMemset(Msh_ gyy,0,h_3D_SIZE[0] * sizeof(double)); + cudaMemset(Msh_ gyz,0,h_3D_SIZE[0] * sizeof(double)); + cudaMemset(Msh_ gzz,0,h_3D_SIZE[0] * sizeof(double)); + + //fderivs_sh + sub_symmetry_bd_ss(2,f,fh,SoA1); + cudaDeviceSynchronize(); + //compare_result_gpu(1,fh,h_3D_SIZE[2]); + sub_fderivs_sh<<>>(fh,Msh_ gx,Msh_ gy,Msh_ gz); + cudaDeviceSynchronize(); + + //fdderivs_sh + sub_symmetry_bd_ss(2,f,fh,SoA1); + cudaDeviceSynchronize(); + //compare_result_gpu(21,fh,h_3D_SIZE[2]); + sub_fdderivs_sh<<>>(fh,Msh_ gxx,Msh_ gxy,Msh_ gxz,Msh_ gyy,Msh_ gyz,Msh_ gzz); + cudaDeviceSynchronize(); + /*compare_result_gpu(11,Msh_ gx,h_3D_SIZE[0]); + compare_result_gpu(12,Msh_ gy,h_3D_SIZE[0]); + compare_result_gpu(13,Msh_ gz,h_3D_SIZE[0]); + compare_result_gpu(1,Msh_ gxx,h_3D_SIZE[0]); + compare_result_gpu(2,Msh_ gxy,h_3D_SIZE[0]); + compare_result_gpu(3,Msh_ gxz,h_3D_SIZE[0]); + compare_result_gpu(4,Msh_ gyy,h_3D_SIZE[0]); + compare_result_gpu(5,Msh_ gyz,h_3D_SIZE[0]); + compare_result_gpu(6,Msh_ gzz,h_3D_SIZE[0]);*/ + sub_fdderivs_shc_part1<<>>(fxx,fxy,fxz,fyy,fyz,fzz); + cudaDeviceSynchronize(); + /*compare_result_gpu(1,fxx,h_3D_SIZE[0]); + compare_result_gpu(2,fxy,h_3D_SIZE[0]); + compare_result_gpu(3,fxz,h_3D_SIZE[0]); + compare_result_gpu(4,fyy,h_3D_SIZE[0]); + compare_result_gpu(5,fyz,h_3D_SIZE[0]); + compare_result_gpu(6,fzz,h_3D_SIZE[0]);*/ +} + +__global__ void computeRicci_ss_part1(double * dst) +{ + int _t = blockIdx.x*blockDim.x+threadIdx.x; + while(_t < _3D_SIZE[0]) + { + dst[_t] = M_ gupxx [_t]* M_ fxx [_t]+ M_ gupyy[_t]* M_ fyy[_t]+ M_ gupzz[_t]* M_ fzz[_t]+ + ( M_ gupxy[_t]* M_ fxy[_t]+ M_ gupxz[_t]* M_ fxz[_t]+ M_ gupyz[_t]* M_ fyz[_t]) * 2; + + _t += STEP_SIZE; + } +} + + inline void computeRicci_ss(int &sst,double * src,double* dst,double * SoA, Meta* meta) +{ + sub_fdderivs_shc(sst,src,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,SoA); + cudaDeviceSynchronize(); + computeRicci_ss_part1<<>>(dst); + cudaDeviceSynchronize(); + +} +__global__ void sub_lopsided_ss_part1(double * dst) +{ + int _t = blockIdx.x*blockDim.x+threadIdx.x; + while(_t < _3D_SIZE[0]) + { + dst[_t] += M_ betax[_t] * M_ fxx[_t] + + M_ betay[_t] * M_ fxy[_t] + + M_ betaz[_t] * M_ fxz[_t]; + + _t += STEP_SIZE; + } +} +inline void sub_lopsided_ss(int& sst,double *src,double* dst,double *SoA) +{ + sub_fderivs_shc(sst,src,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,SoA); + cudaDeviceSynchronize(); + sub_lopsided_ss_part1<<>>(dst); + cudaDeviceSynchronize(); +} + +__global__ void sub_kodis_sh_part1(double *f,double *fh,double *f_rhs) +{ + int _t = blockIdx.x*blockDim.x+threadIdx.x; + int ps; //TOTRY: i,j,k; double value; + double inc_f_rhs; + while(_t < _3D_SIZE[0]) + { + int k = _t / _2D_SIZE[0]; + ps = _t - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; + int j = ps / ex_c[0]; + int i = ps - (j * ex_c[0]); + + if(k == ex_c[2] && i == ex_c[0] && j == ex_c[1]){ + _t += STEP_SIZE; + continue; + } + + if(i-3 >= ijk_min3[0] && i+3 <= ijk_max3[0] && + j-3 >= ijk_min3[1] && j+3 <= ijk_max3[1] && + k-3 >= ijk_min3[2] && k+3 <= ijk_max3[2]) + { + + // x direction + inc_f_rhs = ( (_FH3_(i,(j+3),(k))+_FH3_((i+6),(j+3),(k))) - + 6*(_FH3_((i+1),(j+3),(k))+_FH3_((i+5),(j+3),(k))) + + 15*(_FH3_((i+2),(j+3),(k))+_FH3_((i+4),(j+3),(k))) - + 20* _FH3_((i+3),(j+3),(k)) ) /dX; + + + // y direction + + inc_f_rhs += ( (_FH3_((i+3),j,(k))+_FH3_((i+3),(j+6),(k))) - + 6*(_FH3_((i+3),(j+1),(k))+_FH3_((i+3),(j+5),(k))) + + 15*(_FH3_((i+3),(j+2),(k))+_FH3_((i+3),(j+4),(k))) - + 20* _FH3_((i+3),(j+3),(k)) )/dY; + + // z direction + + inc_f_rhs += ( (_FH3_((i+3),(j+3),(k-3))+_FH3_((i+3),(j+3),(k+3))) - + 6*(_FH3_((i+3),(j+3),(k-2))+_FH3_((i+3),(j+3),(k+2))) + + 15*(_FH3_((i+3),(j+3),(k-1))+_FH3_((i+3),(j+3),(k+1))) - + 20* _FH3_((i+3),(j+3),(k)) )/dZ; + inc_f_rhs *= eps_c; + inc_f_rhs /= 64; + + f_rhs[_t] += inc_f_rhs; //be careful the mark is "+=" not "==" ! + } + + _t += STEP_SIZE; + } +} + +inline void sub_kodis_ss(int &sst,double *f,double *fh,double *f_rhs,double *SoA) +{ + double SoA1[2]; + if(sst == 0){ + SoA1[0] = SoA[0]; + SoA1[1] = SoA[1]; + } + else if(sst == 2 || sst == 3 ){ + SoA1[0] = SoA[1]; + SoA1[1] = SoA[2]; + } + else if(sst == 4 || sst==5){ + SoA1[0] = SoA[0]; + SoA1[1] = SoA[2]; + } + //compare_result_gpu(10,f,h_3D_SIZE[0]); + sub_symmetry_bd_ss(3,f,fh,SoA1); + cudaDeviceSynchronize(); + //compare_result_gpu(0,fh,h_3D_SIZE[3]); + + sub_kodis_sh_part1<<>>(f,fh,f_rhs); + cudaDeviceSynchronize(); + //compare_result_gpu(1,f_rhs,h_3D_SIZE[0]); +} + +__global__ void compute_rhs_ss_part2() +{ + //__shared__ int judge = 1; + int _t = blockIdx.x*blockDim.x+threadIdx.x; + while(_t < _3D_SIZE[0]) + { + if(co_c == 0) + { + // M_ Gam^i_Res = M_ Gam^i + M_ gup^ij_,j + M_ Gmx_Res[_t] = M_ Gamx[_t] - (M_ gupxx[_t]*(M_ gupxx[_t]*M_ gxxx[_t]+M_ gupxy[_t]*M_ gxyx[_t]+M_ gupxz[_t]*M_ gxzx[_t]) + +M_ gupxy[_t]*(M_ gupxx[_t]*M_ gxyx[_t]+M_ gupxy[_t]*M_ gyyx[_t]+M_ gupxz[_t]*M_ gyzx[_t]) + +M_ gupxz[_t]*(M_ gupxx[_t]*M_ gxzx[_t]+M_ gupxy[_t]*M_ gyzx[_t]+M_ gupxz[_t]*M_ gzzx[_t]) + +M_ gupxx[_t]*(M_ gupxy[_t]*M_ gxxy[_t]+M_ gupyy[_t]*M_ gxyy[_t]+M_ gupyz[_t]*M_ gxzy[_t]) + +M_ gupxy[_t]*(M_ gupxy[_t]*M_ gxyy[_t]+M_ gupyy[_t]*M_ gyyy[_t]+M_ gupyz[_t]*M_ gyzy[_t]) + +M_ gupxz[_t]*(M_ gupxy[_t]*M_ gxzy[_t]+M_ gupyy[_t]*M_ gyzy[_t]+M_ gupyz[_t]*M_ gzzy[_t]) + +M_ gupxx[_t]*(M_ gupxz[_t]*M_ gxxz[_t]+M_ gupyz[_t]*M_ gxyz[_t]+M_ gupzz[_t]*M_ gxzz[_t]) + +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxyz[_t]+M_ gupyz[_t]*M_ gyyz[_t]+M_ gupzz[_t]*M_ gyzz[_t]) + +M_ gupxz[_t]*(M_ gupxz[_t]*M_ gxzz[_t]+M_ gupyz[_t]*M_ gyzz[_t]+M_ gupzz[_t]*M_ gzzz[_t])); + M_ Gmy_Res[_t] = M_ Gamy[_t] - (M_ gupxx[_t]*(M_ gupxy[_t]*M_ gxxx[_t]+M_ gupyy[_t]*M_ gxyx[_t]+M_ gupyz[_t]*M_ gxzx[_t]) + +M_ gupxy[_t]*(M_ gupxy[_t]*M_ gxyx[_t]+M_ gupyy[_t]*M_ gyyx[_t]+M_ gupyz[_t]*M_ gyzx[_t]) + +M_ gupxz[_t]*(M_ gupxy[_t]*M_ gxzx[_t]+M_ gupyy[_t]*M_ gyzx[_t]+M_ gupyz[_t]*M_ gzzx[_t]) + +M_ gupxy[_t]*(M_ gupxy[_t]*M_ gxxy[_t]+M_ gupyy[_t]*M_ gxyy[_t]+M_ gupyz[_t]*M_ gxzy[_t]) + +M_ gupyy[_t]*(M_ gupxy[_t]*M_ gxyy[_t]+M_ gupyy[_t]*M_ gyyy[_t]+M_ gupyz[_t]*M_ gyzy[_t]) + +M_ gupyz[_t]*(M_ gupxy[_t]*M_ gxzy[_t]+M_ gupyy[_t]*M_ gyzy[_t]+M_ gupyz[_t]*M_ gzzy[_t]) + +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxxz[_t]+M_ gupyz[_t]*M_ gxyz[_t]+M_ gupzz[_t]*M_ gxzz[_t]) + +M_ gupyy[_t]*(M_ gupxz[_t]*M_ gxyz[_t]+M_ gupyz[_t]*M_ gyyz[_t]+M_ gupzz[_t]*M_ gyzz[_t]) + +M_ gupyz[_t]*(M_ gupxz[_t]*M_ gxzz[_t]+M_ gupyz[_t]*M_ gyzz[_t]+M_ gupzz[_t]*M_ gzzz[_t])); + M_ Gmz_Res[_t] = M_ Gamz[_t] - (M_ gupxx[_t]*(M_ gupxz[_t]*M_ gxxx[_t]+M_ gupyz[_t]*M_ gxyx[_t]+M_ gupzz[_t]*M_ gxzx[_t]) + +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxyx[_t]+M_ gupyz[_t]*M_ gyyx[_t]+M_ gupzz[_t]*M_ gyzx[_t]) + +M_ gupxz[_t]*(M_ gupxz[_t]*M_ gxzx[_t]+M_ gupyz[_t]*M_ gyzx[_t]+M_ gupzz[_t]*M_ gzzx[_t]) + +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxxy[_t]+M_ gupyz[_t]*M_ gxyy[_t]+M_ gupzz[_t]*M_ gxzy[_t]) + +M_ gupyy[_t]*(M_ gupxz[_t]*M_ gxyy[_t]+M_ gupyz[_t]*M_ gyyy[_t]+M_ gupzz[_t]*M_ gyzy[_t]) + +M_ gupyz[_t]*(M_ gupxz[_t]*M_ gxzy[_t]+M_ gupyz[_t]*M_ gyzy[_t]+M_ gupzz[_t]*M_ gzzy[_t]) + +M_ gupxz[_t]*(M_ gupxz[_t]*M_ gxxz[_t]+M_ gupyz[_t]*M_ gxyz[_t]+M_ gupzz[_t]*M_ gxzz[_t]) + +M_ gupyz[_t]*(M_ gupxz[_t]*M_ gxyz[_t]+M_ gupyz[_t]*M_ gyyz[_t]+M_ gupzz[_t]*M_ gyzz[_t]) + +M_ gupzz[_t]*(M_ gupxz[_t]*M_ gxzz[_t]+M_ gupyz[_t]*M_ gyzz[_t]+M_ gupzz[_t]*M_ gzzz[_t])); + }//if(co == 0) + + M_ div_beta[_t] = M_ betaxx[_t] + M_ betayy[_t] + M_ betazz[_t]; + M_ chi_rhs[_t] = F2o3 *M_ chin1[_t]*( M_ alpn1[_t] * M_ trK[_t] - M_ div_beta[_t] ); //rhs[_t] for M_ chi + + M_ gxx_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Axx[_t] - F2o3 * M_ gxx[_t]* M_ div_beta[_t] + + 2 *( M_ gxx[_t]* M_ betaxx[_t]+ M_ gxy[_t]* M_ betayx[_t]+ M_ gxz[_t]* M_ betazx[_t]); + M_ gyy_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Ayy[_t] - F2o3 * M_ gyy[_t]* M_ div_beta[_t] + + 2 *( M_ gxy[_t]* M_ betaxy[_t]+ M_ gyy[_t]* M_ betayy[_t]+ M_ gyz[_t]* M_ betazy[_t]); + + M_ gzz_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Azz[_t] - F2o3 * M_ gzz[_t]* M_ div_beta[_t] + + 2 *( M_ gxz[_t]* M_ betaxz[_t]+ M_ gyz[_t]* M_ betayz[_t]+ M_ gzz[_t]* M_ betazz[_t]); + + M_ gxy_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Axy[_t] + F1o3 * M_ gxy[_t] * M_ div_beta[_t] + + M_ gxx[_t]* M_ betaxy[_t] + M_ gxz[_t]* M_ betazy[_t]+ + M_ gyy[_t]* M_ betayx[_t]+ M_ gyz[_t]* M_ betazx[_t] + - M_ gxy[_t]* M_ betazz[_t]; + + M_ gyz_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Ayz[_t] + F1o3 * M_ gyz[_t] * M_ div_beta[_t] + + M_ gxy[_t]* M_ betaxz[_t]+ M_ gyy[_t]* M_ betayz[_t] + + M_ gxz[_t]* M_ betaxy[_t] + M_ gzz[_t]* M_ betazy[_t] + - M_ gyz[_t]* M_ betaxx[_t]; + + M_ gxz_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Axz[_t] + F1o3 * M_ gxz[_t] * M_ div_beta[_t] + + M_ gxx[_t]* M_ betaxz[_t]+ M_ gxy[_t]* M_ betayz[_t] + + M_ gyz[_t]* M_ betayx[_t]+ M_ gzz[_t]* M_ betazx[_t] + - M_ gxz[_t]* M_ betayy[_t]; //rhs[_t] for gij + + // invert tilted metric + M_ gupzz[_t]= M_ gxx[_t]* M_ gyy[_t]* M_ gzz[_t]+ M_ gxy[_t]* M_ gyz[_t]* M_ gxz[_t]+ M_ gxz[_t]* M_ gxy[_t]* M_ gyz[_t]- + M_ gxz[_t]* M_ gyy[_t]* M_ gxz[_t]- M_ gxy[_t]* M_ gxy[_t]* M_ gzz[_t]- M_ gxx[_t]* M_ gyz[_t]* M_ gyz[_t]; + M_ gupxx[_t]= ( M_ gyy[_t]* M_ gzz[_t]- M_ gyz[_t]* M_ gyz[_t]) / M_ gupzz[_t]; + M_ gupxy[_t]= - ( M_ gxy[_t]* M_ gzz[_t]- M_ gyz[_t]* M_ gxz[_t]) / M_ gupzz[_t]; + M_ gupxz[_t]= ( M_ gxy[_t]* M_ gyz[_t]- M_ gyy[_t]* M_ gxz[_t]) / M_ gupzz[_t]; + M_ gupyy[_t]= ( M_ gxx[_t]* M_ gzz[_t]- M_ gxz[_t]* M_ gxz[_t]) / M_ gupzz[_t]; + M_ gupyz[_t]= - ( M_ gxx[_t]* M_ gyz[_t]- M_ gxy[_t]* M_ gxz[_t]) / M_ gupzz[_t]; + M_ gupzz[_t]= ( M_ gxx[_t]* M_ gyy[_t]- M_ gxy[_t]* M_ gxy[_t]) / M_ gupzz[_t]; + //if(threadIdx.x == 0){ + // judge = co_c; + //} + //__syncthreads(); + + + + // second kind of connection + M_ Gamxxx[_t]=HALF*( M_ gupxx[_t]*M_ gxxx[_t]+ M_ gupxy[_t]*(2*M_ gxyx[_t]- M_ gxxy[_t]) + M_ gupxz[_t]*(2*M_ gxzx[_t]- M_ gxxz[_t])); + M_ Gamyxx[_t]=HALF*( M_ gupxy[_t]*M_ gxxx[_t]+ M_ gupyy[_t]*(2*M_ gxyx[_t]- M_ gxxy[_t]) + M_ gupyz[_t]*(2*M_ gxzx[_t]- M_ gxxz[_t])); + M_ Gamzxx[_t]=HALF*( M_ gupxz[_t]*M_ gxxx[_t]+ M_ gupyz[_t]*(2*M_ gxyx[_t]- M_ gxxy[_t]) + M_ gupzz[_t]*(2*M_ gxzx[_t]- M_ gxxz[_t])); + + M_ Gamxyy[_t]=HALF*( M_ gupxx[_t]*(2*M_ gxyy[_t]- M_ gyyx[_t]) + M_ gupxy[_t]*M_ gyyy[_t]+ M_ gupxz[_t]*(2*M_ gyzy[_t]- M_ gyyz[_t])); + M_ Gamyyy[_t]=HALF*( M_ gupxy[_t]*(2*M_ gxyy[_t]- M_ gyyx[_t]) + M_ gupyy[_t]*M_ gyyy[_t]+ M_ gupyz[_t]*(2*M_ gyzy[_t]- M_ gyyz[_t])); + M_ Gamzyy[_t]=HALF*( M_ gupxz[_t]*(2*M_ gxyy[_t]- M_ gyyx[_t]) + M_ gupyz[_t]*M_ gyyy[_t]+ M_ gupzz[_t]*(2*M_ gyzy[_t]- M_ gyyz[_t])); + + M_ Gamxzz[_t]=HALF*( M_ gupxx[_t]*(2*M_ gxzz[_t]- M_ gzzx[_t]) + M_ gupxy[_t]*(2*M_ gyzz[_t]- M_ gzzy[_t]) + M_ gupxz[_t]*M_ gzzz[_t]); + M_ Gamyzz[_t]=HALF*( M_ gupxy[_t]*(2*M_ gxzz[_t]- M_ gzzx[_t]) + M_ gupyy[_t]*(2*M_ gyzz[_t]- M_ gzzy[_t]) + M_ gupyz[_t]*M_ gzzz[_t]); + M_ Gamzzz[_t]=HALF*( M_ gupxz[_t]*(2*M_ gxzz[_t]- M_ gzzx[_t]) + M_ gupyz[_t]*(2*M_ gyzz[_t]- M_ gzzy[_t]) + M_ gupzz[_t]*M_ gzzz[_t]); + + M_ Gamxxy[_t]=HALF*( M_ gupxx[_t]*M_ gxxy[_t]+ M_ gupxy[_t]*M_ gyyx[_t]+ M_ gupxz[_t]*( M_ gxzy[_t]+ M_ gyzx[_t]- M_ gxyz[_t]) ); + M_ Gamyxy[_t]=HALF*( M_ gupxy[_t]*M_ gxxy[_t]+ M_ gupyy[_t]*M_ gyyx[_t]+ M_ gupyz[_t]*( M_ gxzy[_t]+ M_ gyzx[_t]- M_ gxyz[_t]) ); + M_ Gamzxy[_t]=HALF*( M_ gupxz[_t]*M_ gxxy[_t]+ M_ gupyz[_t]*M_ gyyx[_t]+ M_ gupzz[_t]*( M_ gxzy[_t]+ M_ gyzx[_t]- M_ gxyz[_t]) ); + + M_ Gamxxz[_t]=HALF*( M_ gupxx[_t]*M_ gxxz[_t]+ M_ gupxy[_t]*( M_ gxyz[_t]+ M_ gyzx[_t]- M_ gxzy[_t]) + M_ gupxz[_t]*M_ gzzx[_t]); + M_ Gamyxz[_t]=HALF*( M_ gupxy[_t]*M_ gxxz[_t]+ M_ gupyy[_t]*( M_ gxyz[_t]+ M_ gyzx[_t]- M_ gxzy[_t]) + M_ gupyz[_t]*M_ gzzx[_t]); + M_ Gamzxz[_t]=HALF*( M_ gupxz[_t]*M_ gxxz[_t]+ M_ gupyz[_t]*( M_ gxyz[_t]+ M_ gyzx[_t]- M_ gxzy[_t]) + M_ gupzz[_t]*M_ gzzx[_t]); + + M_ Gamxyz[_t]=HALF*( M_ gupxx[_t]*( M_ gxyz[_t]+ M_ gxzy[_t]- M_ gyzx[_t]) + M_ gupxy[_t]*M_ gyyz[_t]+ M_ gupxz[_t]*M_ gzzy[_t]); + M_ Gamyyz[_t]=HALF*( M_ gupxy[_t]*( M_ gxyz[_t]+ M_ gxzy[_t]- M_ gyzx[_t]) + M_ gupyy[_t]*M_ gyyz[_t]+ M_ gupyz[_t]*M_ gzzy[_t]); + M_ Gamzyz[_t]=HALF*( M_ gupxz[_t]*( M_ gxyz[_t]+ M_ gxzy[_t]- M_ gyzx[_t]) + M_ gupyz[_t]*M_ gyyz[_t]+ M_ gupzz[_t]*M_ gzzy[_t]); + // Raise indices of \tilde A_{ij} and store in R_ij + + M_ Rxx[_t]= M_ gupxx[_t]* M_ gupxx[_t]* M_ Axx[_t]+ M_ gupxy[_t]* M_ gupxy[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ gupxz[_t]* M_ Azz[_t]+ + 2*(M_ gupxx[_t]* M_ gupxy[_t]* M_ Axy[_t]+ M_ gupxx[_t]* M_ gupxz[_t]* M_ Axz[_t]+ M_ gupxy[_t]* M_ gupxz[_t]* M_ Ayz[_t]); + + M_ Ryy[_t]= M_ gupxy[_t]* M_ gupxy[_t]* M_ Axx[_t]+ M_ gupyy[_t]* M_ gupyy[_t]* M_ Ayy[_t]+ M_ gupyz[_t]* M_ gupyz[_t]* M_ Azz[_t]+ + 2*(M_ gupxy[_t]* M_ gupyy[_t]* M_ Axy[_t]+ M_ gupxy[_t]* M_ gupyz[_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ gupyz[_t]* M_ Ayz[_t]); + + M_ Rzz[_t]= M_ gupxz[_t]* M_ gupxz[_t]* M_ Axx[_t]+ M_ gupyz[_t]* M_ gupyz[_t]* M_ Ayy[_t]+ M_ gupzz[_t]* M_ gupzz[_t]* M_ Azz[_t]+ + 2*(M_ gupxz[_t]* M_ gupyz[_t]* M_ Axy[_t]+ M_ gupxz[_t]* M_ gupzz[_t]* M_ Axz[_t]+ M_ gupyz[_t]* M_ gupzz[_t]* M_ Ayz[_t]); + + M_ Rxy[_t]= M_ gupxx[_t]* M_ gupxy[_t]* M_ Axx[_t]+ M_ gupxy[_t]* M_ gupyy[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ gupyz[_t]* M_ Azz[_t]+ + (M_ gupxx[_t]* M_ gupyy[_t] + M_ gupxy[_t]* M_ gupxy[_t])* M_ Axy[_t] + + (M_ gupxx[_t]* M_ gupyz[_t] + M_ gupxz[_t]* M_ gupxy[_t])* M_ Axz[_t] + + (M_ gupxy[_t]* M_ gupyz[_t] + M_ gupxz[_t]* M_ gupyy[_t])* M_ Ayz[_t]; + + M_ Rxz[_t]= M_ gupxx[_t]* M_ gupxz[_t]* M_ Axx[_t]+ M_ gupxy[_t]* M_ gupyz[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ gupzz[_t]* M_ Azz[_t]+ + (M_ gupxx[_t]* M_ gupyz[_t] + M_ gupxy[_t]* M_ gupxz[_t])* M_ Axy[_t] + + (M_ gupxx[_t]* M_ gupzz[_t] + M_ gupxz[_t]* M_ gupxz[_t])* M_ Axz[_t] + + (M_ gupxy[_t]* M_ gupzz[_t] + M_ gupxz[_t]* M_ gupyz[_t])* M_ Ayz[_t]; + + M_ Ryz[_t]= M_ gupxy[_t]* M_ gupxz[_t]* M_ Axx[_t]+ M_ gupyy[_t]* M_ gupyz[_t]* M_ Ayy[_t]+ M_ gupyz[_t]* M_ gupzz[_t]* M_ Azz[_t]+ + (M_ gupxy[_t]* M_ gupyz[_t] + M_ gupyy[_t]* M_ gupxz[_t])* M_ Axy[_t] + + (M_ gupxy[_t]* M_ gupzz[_t] + M_ gupyz[_t]* M_ gupxz[_t])* M_ Axz[_t] + + (M_ gupyy[_t]* M_ gupzz[_t] + M_ gupyz[_t]* M_ gupyz[_t])* M_ Ayz[_t]; + + // Right hand side for M_ Gam^i without shift terms... + + M_ Gamx_rhs[_t] = - 2 * ( M_ Lapx[_t] * M_ Rxx[_t]+ M_ Lapy[_t] * M_ Rxy[_t]+ M_ Lapz[_t] * M_ Rxz[_t]) + + 2 * M_ alpn1[_t] * ( + -F3o2/M_ chin1[_t] * ( M_ chix[_t] * M_ Rxx[_t]+ M_ chiy[_t] * M_ Rxy[_t]+ M_ chiz[_t] * M_ Rxz[_t]) - + M_ gupxx[_t]* ( F2o3 * M_ Kx[_t] + 8 * PI * M_ Sx[_t] ) - + M_ gupxy[_t]* ( F2o3 * M_ Ky[_t] + 8 * PI * M_ Sy[_t] ) - + M_ gupxz[_t]* ( F2o3 * M_ Kz[_t] + 8 * PI * M_ Sz[_t] ) + + M_ Gamxxx[_t]* M_ Rxx[_t]+ M_ Gamxyy[_t]* M_ Ryy[_t]+ M_ Gamxzz[_t]* M_ Rzz[_t] + + 2 * ( M_ Gamxxy[_t]* M_ Rxy[_t]+ M_ Gamxxz[_t]* M_ Rxz[_t]+ M_ Gamxyz[_t]* M_ Ryz[_t]) ); + + M_ Gamy_rhs[_t] = - 2 * ( M_ Lapx[_t] * M_ Rxy[_t]+ M_ Lapy[_t] * M_ Ryy[_t]+ M_ Lapz[_t] * M_ Ryz[_t]) + + 2 * M_ alpn1[_t] * ( + -F3o2/M_ chin1[_t] * ( M_ chix[_t] * M_ Rxy[_t]+ M_ chiy[_t] * M_ Ryy[_t]+ M_ chiz[_t] * M_ Ryz[_t]) - + M_ gupxy[_t]* ( F2o3 * M_ Kx[_t] + 8 * PI * M_ Sx[_t] ) - + M_ gupyy[_t]* ( F2o3 * M_ Ky[_t] + 8 * PI * M_ Sy[_t] ) - + M_ gupyz[_t]* ( F2o3 * M_ Kz [_t] + 8 * PI * M_ Sz[_t] ) + + M_ Gamyxx[_t]* M_ Rxx[_t]+ M_ Gamyyy[_t]* M_ Ryy[_t]+ M_ Gamyzz[_t]* M_ Rzz[_t] + + 2 * ( M_ Gamyxy[_t]* M_ Rxy[_t]+ M_ Gamyxz[_t]* M_ Rxz[_t]+ M_ Gamyyz[_t]* M_ Ryz[_t]) ); + + M_ Gamz_rhs[_t] = - 2 * ( M_ Lapx[_t] * M_ Rxz[_t]+ M_ Lapy[_t] * M_ Ryz[_t]+ M_ Lapz[_t] * M_ Rzz[_t]) + + 2 * M_ alpn1[_t] * ( + -F3o2/M_ chin1[_t] * ( M_ chix[_t] * M_ Rxz[_t]+ M_ chiy[_t] * M_ Ryz[_t]+ M_ chiz[_t] * M_ Rzz[_t]) - + M_ gupxz[_t]* ( F2o3 * M_ Kx[_t] + 8 * PI * M_ Sx[_t] ) - + M_ gupyz[_t]* ( F2o3 * M_ Ky[_t] + 8 * PI * M_ Sy[_t] ) - + M_ gupzz[_t]* ( F2o3 * M_ Kz[_t] + 8 * PI * M_ Sz[_t] ) + + M_ Gamzxx[_t]* M_ Rxx[_t]+ M_ Gamzyy[_t]* M_ Ryy[_t]+ M_ Gamzzz[_t]* M_ Rzz[_t] + + 2 * ( M_ Gamzxy[_t]* M_ Rxy[_t]+ M_ Gamzxz[_t]* M_ Rxz[_t]+ M_ Gamzyz[_t]* M_ Ryz[_t]) ); + + _t += STEP_SIZE; + } +} + +__global__ void compute_rhs_ss_part3() +{ + int _t = blockIdx.x*blockDim.x+threadIdx.x; + while(_t < _3D_SIZE[0]) + { + M_ fxx [_t]= M_ gxxx[_t]+ M_ gxyy[_t]+ M_ gxzz[_t]; + M_ fxy[_t]= M_ gxyx[_t]+ M_ gyyy[_t]+ M_ gyzz[_t]; + M_ fxz[_t]= M_ gxzx[_t]+ M_ gyzy[_t]+ M_ gzzz[_t]; + + M_ Gamxa[_t]= M_ gupxx [_t]* M_ Gamxxx [_t]+ M_ gupyy[_t]* M_ Gamxyy[_t]+ M_ gupzz[_t]* M_ Gamxzz[_t]+ + 2*( M_ gupxy[_t]* M_ Gamxxy[_t]+ M_ gupxz[_t]* M_ Gamxxz[_t]+ M_ gupyz[_t]* M_ Gamxyz[_t]); + M_ Gamya[_t]= M_ gupxx [_t]* M_ Gamyxx [_t]+ M_ gupyy[_t]* M_ Gamyyy[_t]+ M_ gupzz[_t]* M_ Gamyzz[_t]+ + 2*( M_ gupxy[_t]* M_ Gamyxy[_t]+ M_ gupxz[_t]* M_ Gamyxz[_t]+ M_ gupyz[_t]* M_ Gamyyz[_t]); + M_ Gamza[_t]= M_ gupxx [_t]* M_ Gamzxx [_t]+ M_ gupyy[_t]* M_ Gamzyy[_t]+ M_ gupzz[_t]* M_ Gamzzz[_t]+ + 2*( M_ gupxy[_t]* M_ Gamzxy[_t]+ M_ gupxz[_t]* M_ Gamzxz[_t]+ M_ gupyz[_t]* M_ Gamzyz[_t]); + + + + M_ Gamx_rhs[_t] = M_ Gamx_rhs[_t] + F2o3 * M_ Gamxa[_t]* M_ div_beta[_t] - + M_ Gamxa[_t]* M_ betaxx [_t]- M_ Gamya[_t]* M_ betaxy[_t]- M_ Gamza[_t]* M_ betaxz[_t] + + F1o3 * (M_ gupxx [_t]* M_ fxx [_t] + M_ gupxy[_t]* M_ fxy[_t] + M_ gupxz[_t]* M_ fxz[_t] ) + + M_ gupxx [_t]* M_ gxxx [_t] + M_ gupyy[_t]* M_ gyyx [_t] + M_ gupzz[_t]* M_ gzzx [_t] + + 2 * (M_ gupxy[_t]* M_ gxyx [_t] + M_ gupxz[_t]* M_ gxzx [_t] + M_ gupyz[_t]* M_ gyzx [_t] ); + + M_ Gamy_rhs[_t] = M_ Gamy_rhs[_t] + F2o3 * M_ Gamya[_t]* M_ div_beta[_t] - + M_ Gamxa[_t]* M_ betayx [_t]- M_ Gamya[_t]* M_ betayy[_t]- M_ Gamza[_t]* M_ betayz[_t] + + F1o3 * (M_ gupxy[_t]* M_ fxx [_t] + M_ gupyy[_t]* M_ fxy[_t] + M_ gupyz[_t]* M_ fxz[_t] ) + + M_ gupxx [_t]* M_ gxxy[_t] + M_ gupyy[_t]* M_ gyyy[_t] + M_ gupzz[_t]* M_ gzzy[_t] + + 2 * (M_ gupxy[_t]* M_ gxyy[_t] + M_ gupxz[_t]* M_ gxzy[_t] + M_ gupyz[_t]* M_ gyzy[_t] ); + + M_ Gamz_rhs[_t] = M_ Gamz_rhs[_t] + F2o3 * M_ Gamza[_t]* M_ div_beta[_t] - + M_ Gamxa[_t]* M_ betazx [_t]- M_ Gamya[_t]* M_ betazy[_t]- M_ Gamza[_t]* M_ betazz[_t] + + F1o3 * (M_ gupxz[_t]* M_ fxx [_t] + M_ gupyz[_t]* M_ fxy[_t] + M_ gupzz[_t]* M_ fxz[_t] ) + + M_ gupxx [_t]* M_ gxxz[_t] + M_ gupyy[_t]* M_ gyyz[_t] + M_ gupzz[_t]* M_ gzzz[_t] + + 2 * (M_ gupxy[_t]* M_ gxyz[_t] + M_ gupxz[_t]* M_ gxzz[_t] + M_ gupyz[_t]* M_ gyzz[_t] ) ; //rhs M_ for M_ Gam^i + + //first kind of connection stored in M_ gij,k + M_ gxxx [_t]= M_ gxx [_t]* M_ Gamxxx [_t]+ M_ gxy[_t]* M_ Gamyxx [_t]+ M_ gxz[_t]* M_ Gamzxx[_t]; + M_ gxyx [_t]= M_ gxx [_t]* M_ Gamxxy[_t]+ M_ gxy[_t]* M_ Gamyxy[_t]+ M_ gxz[_t]* M_ Gamzxy[_t]; + M_ gxzx [_t]= M_ gxx [_t]* M_ Gamxxz[_t]+ M_ gxy[_t]* M_ Gamyxz[_t]+ M_ gxz[_t]* M_ Gamzxz[_t]; + M_ gyyx [_t]= M_ gxx [_t]* M_ Gamxyy[_t]+ M_ gxy[_t]* M_ Gamyyy[_t]+ M_ gxz[_t]* M_ Gamzyy[_t]; + M_ gyzx [_t]= M_ gxx [_t]* M_ Gamxyz[_t]+ M_ gxy[_t]* M_ Gamyyz[_t]+ M_ gxz[_t]* M_ Gamzyz[_t]; + M_ gzzx [_t]= M_ gxx [_t]* M_ Gamxzz[_t]+ M_ gxy[_t]* M_ Gamyzz[_t]+ M_ gxz[_t]* M_ Gamzzz[_t]; + M_ gxxy[_t]= M_ gxy[_t]* M_ Gamxxx [_t]+ M_ gyy[_t]* M_ Gamyxx [_t]+ M_ gyz[_t]* M_ Gamzxx[_t]; + M_ gxyy[_t]= M_ gxy[_t]* M_ Gamxxy[_t]+ M_ gyy[_t]* M_ Gamyxy[_t]+ M_ gyz[_t]* M_ Gamzxy[_t]; + M_ gxzy[_t]= M_ gxy[_t]* M_ Gamxxz[_t]+ M_ gyy[_t]* M_ Gamyxz[_t]+ M_ gyz[_t]* M_ Gamzxz[_t]; + M_ gyyy[_t]= M_ gxy[_t]* M_ Gamxyy[_t]+ M_ gyy[_t]* M_ Gamyyy[_t]+ M_ gyz[_t]* M_ Gamzyy[_t]; + M_ gyzy[_t]= M_ gxy[_t]* M_ Gamxyz[_t]+ M_ gyy[_t]* M_ Gamyyz[_t]+ M_ gyz[_t]* M_ Gamzyz[_t]; + M_ gzzy[_t]= M_ gxy[_t]* M_ Gamxzz[_t]+ M_ gyy[_t]* M_ Gamyzz[_t]+ M_ gyz[_t]* M_ Gamzzz[_t]; + M_ gxxz[_t]= M_ gxz[_t]* M_ Gamxxx [_t]+ M_ gyz[_t]* M_ Gamyxx [_t]+ M_ gzz[_t]* M_ Gamzxx[_t]; + M_ gxyz[_t]= M_ gxz[_t]* M_ Gamxxy[_t]+ M_ gyz[_t]* M_ Gamyxy[_t]+ M_ gzz[_t]* M_ Gamzxy[_t]; + M_ gxzz[_t]= M_ gxz[_t]* M_ Gamxxz[_t]+ M_ gyz[_t]* M_ Gamyxz[_t]+ M_ gzz[_t]* M_ Gamzxz[_t]; + M_ gyyz[_t]= M_ gxz[_t]* M_ Gamxyy[_t]+ M_ gyz[_t]* M_ Gamyyy[_t]+ M_ gzz[_t]* M_ Gamzyy[_t]; + M_ gyzz[_t]= M_ gxz[_t]* M_ Gamxyz[_t]+ M_ gyz[_t]* M_ Gamyyz[_t]+ M_ gzz[_t]* M_ Gamzyz[_t]; + M_ gzzz[_t]= M_ gxz[_t]* M_ Gamxzz[_t]+ M_ gyz[_t]* M_ Gamyzz[_t]+ M_ gzz[_t]* M_ Gamzzz[_t]; + + + _t += STEP_SIZE; + } +} + +__global__ void compute_rhs_ss_part4() +{ + int _t = blockIdx.x*blockDim.x+threadIdx.x; + while(_t < _3D_SIZE[0]) + { + M_ Rxx [_t]= - HALF *M_ Rxx [_t] + + M_ gxx [_t]* M_ Gamxx[_t] +M_ gxy[_t]* M_ Gamyx [_t] + M_ gxz[_t]* M_ Gamzx [_t]+ + M_ Gamxa[_t]*M_ gxxx [_t]+ M_ Gamya[_t]*M_ gxyx [_t]+ M_ Gamza[_t]*M_ gxzx [_t] + + M_ gupxx [_t]*( + 2*(M_ Gamxxx [_t]*M_ gxxx [_t]+ M_ Gamyxx [_t]*M_ gxyx [_t]+ M_ Gamzxx [_t]*M_ gxzx[_t]) + + M_ Gamxxx [_t]*M_ gxxx [_t]+ M_ Gamyxx [_t]*M_ gxxy[_t]+ M_ Gamzxx [_t]*M_ gxxz[_t])+ + M_ gupxy[_t]*( + 2*(M_ Gamxxx [_t]*M_ gxyx [_t]+ M_ Gamyxx [_t]*M_ gyyx [_t]+ M_ Gamzxx [_t]*M_ gyzx [_t] + + M_ Gamxxy[_t]*M_ gxxx [_t]+ M_ Gamyxy[_t]*M_ gxyx [_t]+ M_ Gamzxy[_t]*M_ gxzx[_t]) + + M_ Gamxxy[_t]*M_ gxxx [_t]+ M_ Gamyxy[_t]*M_ gxxy[_t]+ M_ Gamzxy[_t]*M_ gxxz[_t] + + M_ Gamxxx [_t]*M_ gxyx [_t]+ M_ Gamyxx [_t]*M_ gxyy[_t]+ M_ Gamzxx [_t]*M_ gxyz[_t])+ + M_ gupxz[_t]*( + 2*(M_ Gamxxx [_t]*M_ gxzx [_t]+ M_ Gamyxx [_t]*M_ gyzx [_t]+ M_ Gamzxx [_t]*M_ gzzx [_t] + + M_ Gamxxz[_t]*M_ gxxx [_t]+ M_ Gamyxz[_t]*M_ gxyx [_t]+ M_ Gamzxz[_t]*M_ gxzx[_t]) + + M_ Gamxxz[_t]*M_ gxxx [_t]+ M_ Gamyxz[_t]*M_ gxxy[_t]+ M_ Gamzxz[_t]*M_ gxxz[_t] + + M_ Gamxxx [_t]*M_ gxzx [_t]+ M_ Gamyxx [_t]*M_ gxzy[_t]+ M_ Gamzxx [_t]*M_ gxzz[_t])+ + M_ gupyy[_t]*( + 2*(M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gyyx [_t]+ M_ Gamzxy[_t]*M_ gyzx[_t]) + + M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxyz[_t])+ + M_ gupyz[_t]*( + 2*(M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gyzx [_t]+ M_ Gamzxy[_t]*M_ gzzx [_t] + + M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gyyx [_t]+ M_ Gamzxz[_t]*M_ gyzx[_t]) + + M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxyz[_t] + + M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gxzy[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t])+ + M_ gupzz[_t]*( + 2*(M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gyzx [_t]+ M_ Gamzxz[_t]*M_ gzzx[_t]) + + M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gxzy[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t]); + + M_ Ryy[_t]= - HALF *M_ Ryy[_t] + + M_ gxy[_t]* M_ Gamxy[_t]+ M_ gyy[_t]* M_ Gamyy[_t] + M_ gyz[_t]* M_ Gamzy[_t] + + M_ Gamxa[_t]*M_ gxyy[_t]+ M_ Gamya[_t]*M_ gyyy[_t]+ M_ Gamza[_t]*M_ gyzy[_t] + + M_ gupxx [_t]*( + 2*(M_ Gamxxy[_t]*M_ gxxy[_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxzy[_t]) + + M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxyz[_t])+ + M_ gupxy[_t]*( + 2*(M_ Gamxxy[_t]*M_ gxyy[_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyzy[_t] + + M_ Gamxyy[_t]*M_ gxxy[_t]+ M_ Gamyyy[_t]*M_ gxyy[_t]+ M_ Gamzyy[_t]*M_ gxzy[_t]) + + M_ Gamxyy[_t]*M_ gxyx [_t]+ M_ Gamyyy[_t]*M_ gxyy[_t]+ M_ Gamzyy[_t]*M_ gxyz[_t] + + M_ Gamxxy[_t]*M_ gyyx [_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyyz[_t])+ + M_ gupxz[_t]*( + 2*(M_ Gamxxy[_t]*M_ gxzy[_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gzzy[_t] + + M_ Gamxyz[_t]*M_ gxxy[_t]+ M_ Gamyyz[_t]*M_ gxyy[_t]+ M_ Gamzyz[_t]*M_ gxzy[_t]) + + M_ Gamxyz[_t]*M_ gxyx [_t]+ M_ Gamyyz[_t]*M_ gxyy[_t]+ M_ Gamzyz[_t]*M_ gxyz[_t] + + M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ + M_ gupyy[_t]*( + 2*(M_ Gamxyy[_t]*M_ gxyy[_t]+ M_ Gamyyy[_t]*M_ gyyy[_t]+ M_ Gamzyy[_t]*M_ gyzy[_t]) + + M_ Gamxyy[_t]*M_ gyyx [_t]+ M_ Gamyyy[_t]*M_ gyyy[_t]+ M_ Gamzyy[_t]*M_ gyyz[_t])+ + M_ gupyz[_t]*( + 2*(M_ Gamxyy[_t]*M_ gxzy[_t]+ M_ Gamyyy[_t]*M_ gyzy[_t]+ M_ Gamzyy[_t]*M_ gzzy[_t] + + M_ Gamxyz[_t]*M_ gxyy[_t]+ M_ Gamyyz[_t]*M_ gyyy[_t]+ M_ Gamzyz[_t]*M_ gyzy[_t]) + + M_ Gamxyz[_t]*M_ gyyx [_t]+ M_ Gamyyz[_t]*M_ gyyy[_t]+ M_ Gamzyz[_t]*M_ gyyz[_t] + + M_ Gamxyy[_t]*M_ gyzx [_t]+ M_ Gamyyy[_t]*M_ gyzy[_t]+ M_ Gamzyy[_t]*M_ gyzz[_t])+ + M_ gupzz[_t]*( + 2*(M_ Gamxyz[_t]*M_ gxzy[_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gzzy[_t]) + + M_ Gamxyz[_t]*M_ gyzx [_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t]); + + M_ Rzz[_t]= - HALF *M_ Rzz[_t] + + M_ gxz[_t]* M_ Gamxz[_t] +M_ gyz[_t]* M_ Gamyz[_t] + M_ gzz[_t]* M_ Gamzz[_t] + + M_ Gamxa[_t]*M_ gxzz[_t]+ M_ Gamya[_t]*M_ gyzz[_t]+ M_ Gamza[_t]*M_ gzzz[_t] + + M_ gupxx [_t]*( + 2*(M_ Gamxxz[_t]*M_ gxxz[_t]+ M_ Gamyxz[_t]*M_ gxyz[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t]) + + M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gxzy[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t])+ + M_ gupxy[_t]*( + 2*(M_ Gamxxz[_t]*M_ gxyz[_t]+ M_ Gamyxz[_t]*M_ gyyz[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t] + + M_ Gamxyz[_t]*M_ gxxz[_t]+ M_ Gamyyz[_t]*M_ gxyz[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t]) + + M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gxzy[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t] + + M_ Gamxxz[_t]*M_ gyzx [_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t])+ + M_ gupxz[_t]*( + 2*(M_ Gamxxz[_t]*M_ gxzz[_t]+ M_ Gamyxz[_t]*M_ gyzz[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t] + + M_ Gamxzz[_t]*M_ gxxz[_t]+ M_ Gamyzz[_t]*M_ gxyz[_t]+ M_ Gamzzz[_t]*M_ gxzz[_t]) + + M_ Gamxzz[_t]*M_ gxzx [_t]+ M_ Gamyzz[_t]*M_ gxzy[_t]+ M_ Gamzzz[_t]*M_ gxzz[_t] + + M_ Gamxxz[_t]*M_ gzzx [_t]+ M_ Gamyxz[_t]*M_ gzzy[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t])+ + M_ gupyy[_t]*( + 2*(M_ Gamxyz[_t]*M_ gxyz[_t]+ M_ Gamyyz[_t]*M_ gyyz[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t]) + + M_ Gamxyz[_t]*M_ gyzx [_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t])+ + M_ gupyz[_t]*( + 2*(M_ Gamxyz[_t]*M_ gxzz[_t]+ M_ Gamyyz[_t]*M_ gyzz[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t] + + M_ Gamxzz[_t]*M_ gxyz[_t]+ M_ Gamyzz[_t]*M_ gyyz[_t]+ M_ Gamzzz[_t]*M_ gyzz[_t]) + + M_ Gamxzz[_t]*M_ gyzx [_t]+ M_ Gamyzz[_t]*M_ gyzy[_t]+ M_ Gamzzz[_t]*M_ gyzz[_t] + + M_ Gamxyz[_t]*M_ gzzx [_t]+ M_ Gamyyz[_t]*M_ gzzy[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t])+ + M_ gupzz[_t]*( + 2*(M_ Gamxzz[_t]*M_ gxzz[_t]+ M_ Gamyzz[_t]*M_ gyzz[_t]+ M_ Gamzzz[_t]*M_ gzzz[_t]) + + M_ Gamxzz[_t]*M_ gzzx [_t]+ M_ Gamyzz[_t]*M_ gzzy[_t]+ M_ Gamzzz[_t]*M_ gzzz[_t]); + + M_ Rxy[_t]= HALF*( -M_ Rxy[_t] + + M_ gxx [_t]* M_ Gamxy[_t]+ M_ gxy[_t]* M_ Gamyy[_t]+M_ gxz[_t]* M_ Gamzy[_t] + + M_ gxy[_t]* M_ Gamxx [_t]+ M_ gyy[_t]* M_ Gamyx [_t]+M_ gyz[_t]* M_ Gamzx [_t] + + M_ Gamxa[_t]*M_ gxyx [_t]+ M_ Gamya[_t]*M_ gyyx [_t]+ M_ Gamza[_t]*M_ gyzx [_t] + + M_ Gamxa[_t]*M_ gxxy[_t]+ M_ Gamya[_t]*M_ gxyy[_t]+ M_ Gamza[_t]*M_ gxzy[_t])+ + M_ gupxx [_t]*( + M_ Gamxxx [_t]*M_ gxxy[_t]+ M_ Gamyxx [_t]*M_ gxyy[_t]+ M_ Gamzxx [_t]*M_ gxzy[_t] + + M_ Gamxxy[_t]*M_ gxxx [_t]+ M_ Gamyxy[_t]*M_ gxyx [_t]+ M_ Gamzxy[_t]*M_ gxzx [_t] + + M_ Gamxxx [_t]*M_ gxyx [_t]+ M_ Gamyxx [_t]*M_ gxyy[_t]+ M_ Gamzxx [_t]*M_ gxyz[_t])+ + M_ gupxy[_t]*( + M_ Gamxxx [_t]*M_ gxyy[_t]+ M_ Gamyxx [_t]*M_ gyyy[_t]+ M_ Gamzxx [_t]*M_ gyzy[_t] + + M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gyyx [_t]+ M_ Gamzxy[_t]*M_ gyzx [_t] + + M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxyz[_t] + + M_ Gamxxy[_t]*M_ gxxy[_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxzy[_t] + + M_ Gamxyy[_t]*M_ gxxx [_t]+ M_ Gamyyy[_t]*M_ gxyx [_t]+ M_ Gamzyy[_t]*M_ gxzx [_t] + + M_ Gamxxx [_t]*M_ gyyx [_t]+ M_ Gamyxx [_t]*M_ gyyy[_t]+ M_ Gamzxx [_t]*M_ gyyz[_t])+ + M_ gupxz[_t]*( + M_ Gamxxx [_t]*M_ gxzy[_t]+ M_ Gamyxx [_t]*M_ gyzy[_t]+ M_ Gamzxx [_t]*M_ gzzy[_t] + + M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gyzx [_t]+ M_ Gamzxy[_t]*M_ gzzx [_t] + + M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxyz[_t] + + M_ Gamxxz[_t]*M_ gxxy[_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxzy[_t] + + M_ Gamxyz[_t]*M_ gxxx [_t]+ M_ Gamyyz[_t]*M_ gxyx [_t]+ M_ Gamzyz[_t]*M_ gxzx [_t] + + M_ Gamxxx [_t]*M_ gyzx [_t]+ M_ Gamyxx [_t]*M_ gyzy[_t]+ M_ Gamzxx [_t]*M_ gyzz[_t])+ + M_ gupyy[_t]*( + M_ Gamxxy[_t]*M_ gxyy[_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyzy[_t] + + M_ Gamxyy[_t]*M_ gxyx [_t]+ M_ Gamyyy[_t]*M_ gyyx [_t]+ M_ Gamzyy[_t]*M_ gyzx [_t] + + M_ Gamxxy[_t]*M_ gyyx [_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyyz[_t])+ + M_ gupyz[_t]*( + M_ Gamxxy[_t]*M_ gxzy[_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gzzy[_t] + + M_ Gamxyy[_t]*M_ gxzx [_t]+ M_ Gamyyy[_t]*M_ gyzx [_t]+ M_ Gamzyy[_t]*M_ gzzx [_t] + + M_ Gamxxz[_t]*M_ gyyx [_t]+ M_ Gamyxz[_t]*M_ gyyy[_t]+ M_ Gamzxz[_t]*M_ gyyz[_t] + + M_ Gamxxz[_t]*M_ gxyy[_t]+ M_ Gamyxz[_t]*M_ gyyy[_t]+ M_ Gamzxz[_t]*M_ gyzy[_t] + + M_ Gamxyz[_t]*M_ gxyx [_t]+ M_ Gamyyz[_t]*M_ gyyx [_t]+ M_ Gamzyz[_t]*M_ gyzx [_t] + + M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ + M_ gupzz[_t]*( + M_ Gamxxz[_t]*M_ gxzy[_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gzzy[_t] + + M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gyzx [_t]+ M_ Gamzyz[_t]*M_ gzzx [_t] + + M_ Gamxxz[_t]*M_ gyzx [_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t]); + + M_ Rxz[_t]= HALF*( -M_ Rxz[_t] + + M_ gxx [_t]* M_ Gamxz[_t]+ M_ gxy[_t]* M_ Gamyz[_t]+M_ gxz[_t]* M_ Gamzz[_t] + + M_ gxz[_t]* M_ Gamxx [_t]+ M_ gyz[_t]* M_ Gamyx [_t]+M_ gzz[_t]* M_ Gamzx [_t] + + M_ Gamxa[_t]*M_ gxzx [_t]+ M_ Gamya[_t]*M_ gyzx [_t]+ M_ Gamza[_t]*M_ gzzx [_t] + + M_ Gamxa[_t]*M_ gxxz[_t]+ M_ Gamya[_t]*M_ gxyz[_t]+ M_ Gamza[_t]*M_ gxzz[_t])+ + M_ gupxx [_t]*( + M_ Gamxxx [_t]*M_ gxxz[_t]+ M_ Gamyxx [_t]*M_ gxyz[_t]+ M_ Gamzxx [_t]*M_ gxzz[_t] + + M_ Gamxxz[_t]*M_ gxxx [_t]+ M_ Gamyxz[_t]*M_ gxyx [_t]+ M_ Gamzxz[_t]*M_ gxzx [_t] + + M_ Gamxxx [_t]*M_ gxzx [_t]+ M_ Gamyxx [_t]*M_ gxzy[_t]+ M_ Gamzxx [_t]*M_ gxzz[_t])+ + M_ gupxy[_t]*( + M_ Gamxxx [_t]*M_ gxyz[_t]+ M_ Gamyxx [_t]*M_ gyyz[_t]+ M_ Gamzxx [_t]*M_ gyzz[_t] + + M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gyyx [_t]+ M_ Gamzxz[_t]*M_ gyzx [_t] + + M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gxzy[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t] + + M_ Gamxxy[_t]*M_ gxxz[_t]+ M_ Gamyxy[_t]*M_ gxyz[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t] + + M_ Gamxyz[_t]*M_ gxxx [_t]+ M_ Gamyyz[_t]*M_ gxyx [_t]+ M_ Gamzyz[_t]*M_ gxzx [_t] + + M_ Gamxxx [_t]*M_ gyzx [_t]+ M_ Gamyxx [_t]*M_ gyzy[_t]+ M_ Gamzxx [_t]*M_ gyzz[_t])+ + M_ gupxz[_t]*( + M_ Gamxxx [_t]*M_ gxzz[_t]+ M_ Gamyxx [_t]*M_ gyzz[_t]+ M_ Gamzxx [_t]*M_ gzzz[_t] + + M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gyzx [_t]+ M_ Gamzxz[_t]*M_ gzzx [_t] + + M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gxzy[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t] + + M_ Gamxxz[_t]*M_ gxxz[_t]+ M_ Gamyxz[_t]*M_ gxyz[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t] + + M_ Gamxzz[_t]*M_ gxxx [_t]+ M_ Gamyzz[_t]*M_ gxyx [_t]+ M_ Gamzzz[_t]*M_ gxzx [_t] + + M_ Gamxxx [_t]*M_ gzzx [_t]+ M_ Gamyxx [_t]*M_ gzzy[_t]+ M_ Gamzxx [_t]*M_ gzzz[_t])+ + M_ gupyy[_t]*( + M_ Gamxxy[_t]*M_ gxyz[_t]+ M_ Gamyxy[_t]*M_ gyyz[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t] + + M_ Gamxyz[_t]*M_ gxyx [_t]+ M_ Gamyyz[_t]*M_ gyyx [_t]+ M_ Gamzyz[_t]*M_ gyzx [_t] + + M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ + M_ gupyz[_t]*( + M_ Gamxxy[_t]*M_ gxzz[_t]+ M_ Gamyxy[_t]*M_ gyzz[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t] + + M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gyzx [_t]+ M_ Gamzyz[_t]*M_ gzzx [_t] + + M_ Gamxxz[_t]*M_ gyzx [_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t] + + M_ Gamxxz[_t]*M_ gxyz[_t]+ M_ Gamyxz[_t]*M_ gyyz[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t] + + M_ Gamxzz[_t]*M_ gxyx [_t]+ M_ Gamyzz[_t]*M_ gyyx [_t]+ M_ Gamzzz[_t]*M_ gyzx [_t] + + M_ Gamxxy[_t]*M_ gzzx [_t]+ M_ Gamyxy[_t]*M_ gzzy[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t])+ + M_ gupzz[_t]*( + M_ Gamxxz[_t]*M_ gxzz[_t]+ M_ Gamyxz[_t]*M_ gyzz[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t] + + M_ Gamxzz[_t]*M_ gxzx [_t]+ M_ Gamyzz[_t]*M_ gyzx [_t]+ M_ Gamzzz[_t]*M_ gzzx [_t] + + M_ Gamxxz[_t]*M_ gzzx [_t]+ M_ Gamyxz[_t]*M_ gzzy[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t]); + + M_ Ryz[_t]= HALF*( -M_ Ryz[_t] + + M_ gxy[_t]* M_ Gamxz[_t]+M_ gyy[_t]* M_ Gamyz[_t]+M_ gyz[_t]* M_ Gamzz[_t] + + M_ gxz[_t]* M_ Gamxy[_t]+M_ gyz[_t]* M_ Gamyy[_t]+M_ gzz[_t]* M_ Gamzy[_t] + + M_ Gamxa[_t]*M_ gxzy[_t]+ M_ Gamya[_t]*M_ gyzy[_t]+ M_ Gamza[_t]*M_ gzzy[_t] + + M_ Gamxa[_t]*M_ gxyz[_t]+ M_ Gamya[_t]*M_ gyyz[_t]+ M_ Gamza[_t]*M_ gyzz[_t])+ + M_ gupxx [_t]*( + M_ Gamxxy[_t]*M_ gxxz[_t]+ M_ Gamyxy[_t]*M_ gxyz[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t] + + M_ Gamxxz[_t]*M_ gxxy[_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxzy[_t] + + M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gxzy[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t])+ + M_ gupxy[_t]*( + M_ Gamxxy[_t]*M_ gxyz[_t]+ M_ Gamyxy[_t]*M_ gyyz[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t] + + M_ Gamxxz[_t]*M_ gxyy[_t]+ M_ Gamyxz[_t]*M_ gyyy[_t]+ M_ Gamzxz[_t]*M_ gyzy[_t] + + M_ Gamxyy[_t]*M_ gxzx [_t]+ M_ Gamyyy[_t]*M_ gxzy[_t]+ M_ Gamzyy[_t]*M_ gxzz[_t] + + M_ Gamxyy[_t]*M_ gxxz[_t]+ M_ Gamyyy[_t]*M_ gxyz[_t]+ M_ Gamzyy[_t]*M_ gxzz[_t] + + M_ Gamxyz[_t]*M_ gxxy[_t]+ M_ Gamyyz[_t]*M_ gxyy[_t]+ M_ Gamzyz[_t]*M_ gxzy[_t] + + M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ + M_ gupxz[_t]*( + M_ Gamxxy[_t]*M_ gxzz[_t]+ M_ Gamyxy[_t]*M_ gyzz[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t] + + M_ Gamxxz[_t]*M_ gxzy[_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gzzy[_t] + + M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gxzy[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t] + + M_ Gamxyz[_t]*M_ gxxz[_t]+ M_ Gamyyz[_t]*M_ gxyz[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t] + + M_ Gamxzz[_t]*M_ gxxy[_t]+ M_ Gamyzz[_t]*M_ gxyy[_t]+ M_ Gamzzz[_t]*M_ gxzy[_t] + + M_ Gamxxy[_t]*M_ gzzx [_t]+ M_ Gamyxy[_t]*M_ gzzy[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t])+ + M_ gupyy[_t]*( + M_ Gamxyy[_t]*M_ gxyz[_t]+ M_ Gamyyy[_t]*M_ gyyz[_t]+ M_ Gamzyy[_t]*M_ gyzz[_t] + + M_ Gamxyz[_t]*M_ gxyy[_t]+ M_ Gamyyz[_t]*M_ gyyy[_t]+ M_ Gamzyz[_t]*M_ gyzy[_t] + + M_ Gamxyy[_t]*M_ gyzx [_t]+ M_ Gamyyy[_t]*M_ gyzy[_t]+ M_ Gamzyy[_t]*M_ gyzz[_t])+ + M_ gupyz[_t]*( + M_ Gamxyy[_t]*M_ gxzz[_t]+ M_ Gamyyy[_t]*M_ gyzz[_t]+ M_ Gamzyy[_t]*M_ gzzz[_t] + + M_ Gamxyz[_t]*M_ gxzy[_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gzzy[_t] + + M_ Gamxyz[_t]*M_ gyzx [_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t] + + M_ Gamxyz[_t]*M_ gxyz[_t]+ M_ Gamyyz[_t]*M_ gyyz[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t] + + M_ Gamxzz[_t]*M_ gxyy[_t]+ M_ Gamyzz[_t]*M_ gyyy[_t]+ M_ Gamzzz[_t]*M_ gyzy[_t] + + M_ Gamxyy[_t]*M_ gzzx [_t]+ M_ Gamyyy[_t]*M_ gzzy[_t]+ M_ Gamzyy[_t]*M_ gzzz[_t])+ + M_ gupzz[_t]*( + M_ Gamxyz[_t]*M_ gxzz[_t]+ M_ Gamyyz[_t]*M_ gyzz[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t] + + M_ Gamxzz[_t]*M_ gxzy[_t]+ M_ Gamyzz[_t]*M_ gyzy[_t]+ M_ Gamzzz[_t]*M_ gzzy[_t] + + M_ Gamxyz[_t]*M_ gzzx [_t]+ M_ Gamyyz[_t]*M_ gzzy[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t]); + + _t += STEP_SIZE; + } +} +__global__ void compute_rhs_ss_part5() +{ + int _t = blockIdx.x*blockDim.x+threadIdx.x; + while(_t < _3D_SIZE[0]) + { + M_ fxx [_t]=M_ fxx [_t]- M_ Gamxxx [_t]* M_ chix [_t]- M_ Gamyxx [_t]* M_ chiy[_t]- M_ Gamzxx [_t]* M_ chiz[_t]; + M_ fxy[_t]=M_ fxy[_t]- M_ Gamxxy[_t]* M_ chix [_t]- M_ Gamyxy[_t]* M_ chiy[_t]- M_ Gamzxy[_t]* M_ chiz[_t]; + M_ fxz[_t]=M_ fxz[_t]- M_ Gamxxz[_t]* M_ chix [_t]- M_ Gamyxz[_t]* M_ chiy[_t]- M_ Gamzxz[_t]* M_ chiz[_t]; + M_ fyy[_t]=M_ fyy[_t]- M_ Gamxyy[_t]* M_ chix [_t]- M_ Gamyyy[_t]* M_ chiy[_t]- M_ Gamzyy[_t]* M_ chiz[_t]; + M_ fyz[_t]=M_ fyz[_t]- M_ Gamxyz[_t]* M_ chix [_t]- M_ Gamyyz[_t]* M_ chiy[_t]- M_ Gamzyz[_t]* M_ chiz[_t]; + M_ fzz[_t]=M_ fzz[_t]- M_ Gamxzz[_t]* M_ chix [_t]- M_ Gamyzz[_t]* M_ chiy[_t]- M_ Gamzzz[_t]* M_ chiz[_t]; + // M_ Store D^l D_l M_ chi - 3/(2*M_ chi) D^l M_ chi D_l M_ chi inM_ f[_t] + + M_ f[_t] = M_ gupxx [_t]* (M_ fxx [_t]- F3o2/M_ chin1[_t] * M_ chix [_t]* M_ chix [_t]) + + M_ gupyy[_t]* (M_ fyy[_t]- F3o2/M_ chin1[_t] * M_ chiy[_t]* M_ chiy[_t]) + + M_ gupzz[_t]* (M_ fzz[_t]- F3o2/M_ chin1[_t] * M_ chiz[_t]* M_ chiz[_t]) + + 2 *M_ gupxy[_t]* (M_ fxy[_t]- F3o2/M_ chin1[_t] * M_ chix [_t]* M_ chiy[_t]) + + 2 *M_ gupxz[_t]* (M_ fxz[_t]- F3o2/M_ chin1[_t] * M_ chix [_t]* M_ chiz[_t]) + + 2 *M_ gupyz[_t]* (M_ fyz[_t]- F3o2/M_ chin1[_t] * M_ chiy[_t]* M_ chiz[_t]); + // M_ Add M_ chi part toM_ Ricci tensor: + + M_ Rxx [_t]=M_ Rxx [_t]+ (M_ fxx [_t]- M_ chix[_t]*M_ chix[_t]/M_ chin1[_t]/2 +M_ gxx [_t]*M_ f[_t])/M_ chin1[_t]/2; + M_ Ryy[_t]=M_ Ryy[_t]+ (M_ fyy[_t]- M_ chiy[_t]*M_ chiy[_t]/M_ chin1[_t]/2 +M_ gyy[_t]*M_ f[_t])/M_ chin1[_t]/2; + M_ Rzz[_t]=M_ Rzz[_t]+ (M_ fzz[_t]- M_ chiz[_t]*M_ chiz[_t]/M_ chin1[_t]/2 +M_ gzz[_t]*M_ f[_t])/M_ chin1[_t]/2; + M_ Rxy[_t]=M_ Rxy[_t]+ (M_ fxy[_t]- M_ chix[_t]*M_ chiy[_t]/M_ chin1[_t]/2 +M_ gxy[_t]*M_ f[_t])/M_ chin1[_t]/2; + M_ Rxz[_t]=M_ Rxz[_t]+ (M_ fxz[_t]- M_ chix[_t]*M_ chiz[_t]/M_ chin1[_t]/2 +M_ gxz[_t]*M_ f[_t])/M_ chin1[_t]/2; + M_ Ryz[_t]=M_ Ryz[_t]+ (M_ fyz[_t]- M_ chiy[_t]*M_ chiz[_t]/M_ chin1[_t]/2 +M_ gyz[_t]*M_ f[_t])/M_ chin1[_t]/2; + + + _t += STEP_SIZE; + } +} + +__global__ void compute_rhs_ss_part6() +{ + int _t = blockIdx.x*blockDim.x+threadIdx.x; + while(_t < _3D_SIZE[0]) + { + M_ gxxx [_t]= (M_ gupxx [_t]* M_ chix [_t]+M_ gupxy[_t]* M_ chiy[_t]+M_ gupxz[_t]* M_ chiz[_t])/M_ chin1[_t]; + M_ gxxy[_t]= (M_ gupxy[_t]* M_ chix [_t]+M_ gupyy[_t]* M_ chiy[_t]+M_ gupyz[_t]* M_ chiz[_t])/M_ chin1[_t]; + M_ gxxz[_t]= (M_ gupxz[_t]* M_ chix [_t]+M_ gupyz[_t]* M_ chiy[_t]+M_ gupzz[_t]* M_ chiz[_t])/M_ chin1[_t]; + // nowM_ get physical second kind of connection + M_ Gamxxx [_t]= M_ Gamxxx [_t]- ( (M_ chix [_t]+ M_ chix[_t])/M_ chin1[_t] -M_ gxx [_t]*M_ gxxx [_t])*HALF; + M_ Gamyxx [_t]= M_ Gamyxx [_t]- ( -M_ gxx [_t]*M_ gxxy[_t])*HALF; + M_ Gamzxx [_t]= M_ Gamzxx [_t]- ( -M_ gxx [_t]*M_ gxxz[_t])*HALF; + M_ Gamxyy[_t]= M_ Gamxyy[_t]- ( -M_ gyy[_t]*M_ gxxx [_t])*HALF; + M_ Gamyyy[_t]= M_ Gamyyy[_t]- ( (M_ chiy[_t]+ M_ chiy[_t])/M_ chin1[_t] -M_ gyy[_t]*M_ gxxy[_t])*HALF; + M_ Gamzyy[_t]= M_ Gamzyy[_t]- ( -M_ gyy[_t]*M_ gxxz[_t])*HALF; + M_ Gamxzz[_t]= M_ Gamxzz[_t]- ( -M_ gzz[_t]*M_ gxxx [_t])*HALF; + M_ Gamyzz[_t]= M_ Gamyzz[_t]- ( -M_ gzz[_t]*M_ gxxy[_t])*HALF; + M_ Gamzzz[_t]= M_ Gamzzz[_t]- ( (M_ chiz[_t]+ M_ chiz[_t])/M_ chin1[_t] -M_ gzz[_t]*M_ gxxz[_t])*HALF; + M_ Gamxxy[_t]= M_ Gamxxy[_t]- ( M_ chiy[_t] /M_ chin1[_t] -M_ gxy[_t]*M_ gxxx [_t])*HALF; + M_ Gamyxy[_t]= M_ Gamyxy[_t]- ( M_ chix [_t]/M_ chin1[_t] -M_ gxy[_t]*M_ gxxy[_t])*HALF; + M_ Gamzxy[_t]= M_ Gamzxy[_t]- ( -M_ gxy[_t]*M_ gxxz[_t])*HALF; + M_ Gamxxz[_t]= M_ Gamxxz[_t]- ( M_ chiz[_t] /M_ chin1[_t] -M_ gxz[_t]*M_ gxxx [_t])*HALF; + M_ Gamyxz[_t]= M_ Gamyxz[_t]- ( -M_ gxz[_t]*M_ gxxy[_t])*HALF; + M_ Gamzxz[_t]= M_ Gamzxz[_t]- ( M_ chix [_t]/M_ chin1[_t] -M_ gxz[_t]*M_ gxxz[_t])*HALF; + M_ Gamxyz[_t]= M_ Gamxyz[_t]- ( -M_ gyz[_t]*M_ gxxx [_t])*HALF; + M_ Gamyyz[_t]= M_ Gamyyz[_t]- ( M_ chiz[_t] /M_ chin1[_t] -M_ gyz[_t]*M_ gxxy[_t])*HALF; + M_ Gamzyz[_t]= M_ Gamzyz[_t]- ( M_ chiy[_t]/M_ chin1[_t] -M_ gyz[_t]*M_ gxxz[_t])*HALF; + + M_ fxx [_t]=M_ fxx [_t]- M_ Gamxxx[_t]*M_ Lapx [_t]- M_ Gamyxx[_t]*M_ Lapy[_t]- M_ Gamzxx[_t]*M_ Lapz[_t]; + M_ fyy[_t]=M_ fyy[_t]- M_ Gamxyy[_t]*M_ Lapx [_t]- M_ Gamyyy[_t]*M_ Lapy[_t]- M_ Gamzyy[_t]*M_ Lapz[_t]; + M_ fzz[_t]=M_ fzz[_t]- M_ Gamxzz[_t]*M_ Lapx [_t]- M_ Gamyzz[_t]*M_ Lapy[_t]- M_ Gamzzz[_t]*M_ Lapz[_t]; + M_ fxy[_t]=M_ fxy[_t]- M_ Gamxxy[_t]*M_ Lapx [_t]- M_ Gamyxy[_t]*M_ Lapy[_t]- M_ Gamzxy[_t]*M_ Lapz[_t]; + M_ fxz[_t]=M_ fxz[_t]- M_ Gamxxz[_t]*M_ Lapx [_t]- M_ Gamyxz[_t]*M_ Lapy[_t]- M_ Gamzxz[_t]*M_ Lapz[_t]; + M_ fyz[_t]=M_ fyz[_t]- M_ Gamxyz[_t]*M_ Lapx [_t]- M_ Gamyyz[_t]*M_ Lapy[_t]- M_ Gamzyz[_t]*M_ Lapz[_t]; + + // store D^i D_i Lap in M_ trK_rhs[_t] upto M_ chi + M_ trK_rhs[_t] = M_ gupxx [_t]*M_ fxx [_t]+M_ gupyy[_t]*M_ fyy[_t]+M_ gupzz[_t]*M_ fzz[_t]+ + 2* (M_ gupxy[_t]*M_ fxy[_t]+M_ gupxz[_t]*M_ fxz[_t]+M_ gupyz[_t]*M_ fyz[_t]); + // M_ Add lapse and M_ S_ij parts toM_ Ricci tensor: + + M_ fxx [_t]= M_ alpn1[_t]* (M_ Rxx [_t]- 8 * PI * M_ Sxx[_t]) -M_ fxx[_t]; + M_ fxy[_t]= M_ alpn1[_t]* (M_ Rxy[_t]- 8 * PI * M_ Sxy[_t]) -M_ fxy[_t]; + M_ fxz[_t]= M_ alpn1[_t]* (M_ Rxz[_t]- 8 * PI * M_ Sxz[_t]) -M_ fxz[_t]; + M_ fyy[_t]= M_ alpn1[_t]* (M_ Ryy[_t]- 8 * PI * M_ Syy[_t]) -M_ fyy[_t]; + M_ fyz[_t]= M_ alpn1[_t]* (M_ Ryz[_t]- 8 * PI * M_ Syz[_t]) -M_ fyz[_t]; + M_ fzz[_t]= M_ alpn1[_t]* (M_ Rzz[_t]- 8 * PI * M_ Szz[_t]) -M_ fzz[_t]; + + // Compute trace-free part (note: M_ chi^-1 and M_ chi cancel//): + + M_ f[_t] = F1o3 *( M_ gupxx [_t]*M_ fxx [_t]+M_ gupyy[_t]*M_ fyy[_t]+M_ gupzz[_t]*M_ fzz[_t]+ + 2* (M_ gupxy[_t]*M_ fxy[_t]+M_ gupxz[_t]*M_ fxz[_t]+M_ gupyz[_t]*M_ fyz[_t]) ); + + M_ Axx_rhs[_t] =M_ fxx [_t]-M_ gxx [_t]*M_ f[_t]; + M_ Ayy_rhs[_t] =M_ fyy[_t]-M_ gyy[_t]*M_ f[_t]; + M_ Azz_rhs[_t] =M_ fzz[_t]-M_ gzz[_t]*M_ f[_t]; + M_ Axy_rhs[_t] =M_ fxy[_t]-M_ gxy[_t]*M_ f[_t]; + M_ Axz_rhs[_t] =M_ fxz[_t]-M_ gxz[_t]*M_ f[_t]; + M_ Ayz_rhs[_t] =M_ fyz[_t]-M_ gyz[_t]*M_ f[_t]; + + // Now: store M_ A_il M_ A^l_j intoM_ fij: + + M_ fxx [_t]= M_ gupxx [_t]* M_ Axx [_t]* M_ Axx [_t]+M_ gupyy[_t]* M_ Axy[_t]* M_ Axy[_t]+M_ gupzz[_t]* M_ Axz[_t]* M_ Axz[_t]+ + 2 * (M_ gupxy[_t]* M_ Axx [_t]* M_ Axy[_t]+M_ gupxz[_t]* M_ Axx [_t]* M_ Axz[_t]+M_ gupyz[_t]* M_ Axy[_t]* M_ Axz[_t]); + + M_ fyy[_t]= M_ gupxx [_t]* M_ Axy[_t]* M_ Axy[_t]+M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayy[_t]+M_ gupzz[_t]* M_ Ayz[_t]* M_ Ayz[_t]+ + 2 * (M_ gupxy[_t]* M_ Axy[_t]* M_ Ayy[_t]+M_ gupxz[_t]* M_ Axy[_t]* M_ Ayz[_t]+M_ gupyz[_t]* M_ Ayy[_t]* M_ Ayz[_t]); + + M_ fzz[_t]= M_ gupxx [_t]* M_ Axz[_t]* M_ Axz[_t]+M_ gupyy[_t]* M_ Ayz[_t]* M_ Ayz[_t]+M_ gupzz[_t]* M_ Azz[_t]* M_ Azz[_t]+ + 2 * (M_ gupxy[_t]* M_ Axz[_t]* M_ Ayz[_t]+M_ gupxz[_t]* M_ Axz[_t]* M_ Azz[_t]+M_ gupyz[_t]* M_ Ayz[_t]* M_ Azz[_t]); + + M_ fxy[_t]= M_ gupxx [_t]* M_ Axx [_t]* M_ Axy[_t]+M_ gupyy[_t]* M_ Axy[_t]* M_ Ayy[_t]+M_ gupzz[_t]* M_ Axz[_t]* M_ Ayz[_t]+ + M_ gupxy[_t]*(M_ Axx [_t]* M_ Ayy[_t]+ M_ Axy[_t]* M_ Axy[_t]) + + M_ gupxz[_t]*(M_ Axx [_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Axy[_t]) + + M_ gupyz[_t]*(M_ Axy[_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Ayy[_t]); + M_ fxz[_t]= M_ gupxx [_t]* M_ Axx [_t]* M_ Axz[_t]+M_ gupyy[_t]* M_ Axy[_t]* M_ Ayz[_t]+M_ gupzz[_t]* M_ Axz[_t]* M_ Azz[_t]+ + M_ gupxy[_t]*(M_ Axx [_t]* M_ Ayz[_t]+ M_ Axy[_t]* M_ Axz[_t]) + + M_ gupxz[_t]*(M_ Axx [_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Axz[_t]) + + M_ gupyz[_t]*(M_ Axy[_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Ayz[_t]); + M_ fyz[_t]= M_ gupxx [_t]* M_ Axy[_t]* M_ Axz[_t]+M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayz[_t]+M_ gupzz[_t]* M_ Ayz[_t]* M_ Azz[_t]+ + M_ gupxy[_t]*(M_ Axy[_t]* M_ Ayz[_t]+ M_ Ayy[_t]* M_ Axz[_t]) + + M_ gupxz[_t]*(M_ Axy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Axz[_t]) + + M_ gupyz[_t]*(M_ Ayy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Ayz[_t]); + + M_ f[_t] = M_ chin1[_t]; + // store D^i D_i Lap in M_ trK_rhs[_t] + M_ trK_rhs[_t] =M_ f[_t]*M_ trK_rhs[_t]; + + M_ Axx_rhs[_t] = M_ f[_t] * M_ Axx_rhs[_t]+ M_ alpn1[_t]* (M_ trK[_t]* M_ Axx [_t]- 2 *M_ fxx[_t]) + + 2 * ( M_ Axx [_t]* M_ betaxx [_t]+ M_ Axy[_t]* M_ betayx [_t]+ M_ Axz[_t]* M_ betazx [_t])- + F2o3 * M_ Axx [_t]* M_ div_beta[_t]; + + M_ Ayy_rhs[_t] = M_ f[_t] * M_ Ayy_rhs[_t]+ M_ alpn1[_t]* (M_ trK[_t]* M_ Ayy[_t]- 2 *M_ fyy[_t]) + + 2 * ( M_ Axy[_t]* M_ betaxy[_t]+ M_ Ayy[_t]* M_ betayy[_t]+ M_ Ayz[_t]* M_ betazy[_t])- + F2o3 * M_ Ayy[_t]* M_ div_beta[_t]; + + M_ Azz_rhs[_t] = M_ f[_t] * M_ Azz_rhs[_t]+ M_ alpn1[_t]* (M_ trK[_t]* M_ Azz[_t]- 2 *M_ fzz[_t]) + + 2 * ( M_ Axz[_t]* M_ betaxz[_t]+ M_ Ayz[_t]* M_ betayz[_t]+ M_ Azz[_t]* M_ betazz[_t])- + F2o3 * M_ Azz[_t]* M_ div_beta[_t]; + + M_ Axy_rhs[_t] = M_ f[_t] * M_ Axy_rhs[_t]+ M_ alpn1[_t]*( M_ trK[_t]* M_ Axy[_t] - 2 *M_ fxy[_t])+ + M_ Axx [_t]* M_ betaxy[_t] + M_ Axz[_t]* M_ betazy[_t] + + M_ Ayy[_t]* M_ betayx [_t]+ M_ Ayz[_t]* M_ betazx [_t] + + F1o3 * M_ Axy[_t]* M_ div_beta[_t] - M_ Axy[_t]* M_ betazz[_t]; + + M_ Ayz_rhs[_t] = M_ f[_t] * M_ Ayz_rhs[_t]+ M_ alpn1[_t]*( M_ trK[_t]* M_ Ayz[_t] - 2 *M_ fyz[_t])+ + M_ Axy[_t]* M_ betaxz[_t]+ M_ Ayy[_t]* M_ betayz[_t] + + M_ Axz[_t]* M_ betaxy[_t] + M_ Azz[_t]* M_ betazy[_t] + + F1o3 * M_ Ayz[_t]* M_ div_beta[_t] - M_ Ayz[_t]* M_ betaxx[_t]; + + M_ Axz_rhs[_t] = M_ f[_t] * M_ Axz_rhs[_t]+ M_ alpn1[_t]*( M_ trK[_t]* M_ Axz[_t] - 2 *M_ fxz[_t])+ + M_ Axx [_t]* M_ betaxz[_t]+ M_ Axy[_t]* M_ betayz[_t] + + M_ Ayz[_t]* M_ betayx [_t]+ M_ Azz[_t]* M_ betazx [_t] + + F1o3 * M_ Axz[_t]* M_ div_beta[_t] - M_ Axz[_t]* M_ betayy[_t] ; //rhsM_ for M_ Aij + + // Compute trace of M_ S_ij + + M_ S[_t] = M_ f[_t] * (M_ gupxx [_t]* M_ Sxx [_t]+M_ gupyy[_t]* M_ Syy[_t]+M_ gupzz[_t]* M_ Szz[_t]+ + 2 * (M_ gupxy[_t]* M_ Sxy[_t]+M_ gupxz[_t]* M_ Sxz[_t]+M_ gupyz[_t]* M_ Syz[_t]) ); + + M_ trK_rhs[_t] = - M_ trK_rhs[_t] + M_ alpn1[_t]*( F1o3 * M_ trK[_t]* M_ trK[_t] + + M_ gupxx [_t]*M_ fxx [_t]+M_ gupyy[_t]*M_ fyy[_t]+M_ gupzz[_t]*M_ fzz[_t] + + 2 * (M_ gupxy[_t]*M_ fxy[_t]+M_ gupxz[_t]*M_ fxz[_t]+M_ gupyz[_t]*M_ fyz[_t]) + + 4 * PI * ( M_ rho[_t] + M_ S[_t] )) ; //rhsM_ for M_ trK[_t] + + ////////M_ gauge variable part + + M_ Lap_rhs[_t] = -2*M_ alpn1[_t] * M_ trK[_t]; + +#if (GAUGE == 0) + M_ betax_rhs[_t] =0.75*M_ dtSfx[_t]; + M_ betay_rhs[_t] =0.75*M_ dtSfy[_t]; + M_ betaz_rhs[_t] =0.75*M_ dtSfz[_t]; + + M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] -2*M_ dtSfx[_t]; + M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] -2*M_ dtSfy[_t]; + M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] -2*M_ dtSfz[_t]; + +#elif (GAUGE == 1) + M_ betax_rhs[_t] =M_ Gamx[_t] - 2 * M_ betax[_t] ; + + M_ betay_rhs[_t] =M_ Gamy[_t] - 2 * M_ betay[_t] ; + + M_ betaz_rhs[_t] =M_ Gamz[_t] - 2 * M_ betaz[_t] ; + + M_ dtSfx_rhs[_t] = 0; + M_ dtSfy_rhs[_t] = 0; + M_ dtSfz_rhs[_t] = 0; + +#elif (GAUGE == 2 || GAUGE == 3) + + M_ betax_rhs[_t] = 0.75* M_ dtSfx[_t]; + + M_ betay_rhs[_t] = 0.75* M_ dtSfy[_t]; + + M_ betaz_rhs[_t] = 0.75* M_ dtSfz[_t]; + +#elif (GAUGE == 6) + if(BHN==2) + { + int k = _t / _2D_SIZE[0]; + int ps = _t - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; + int j = ps / ex_c[0]; + int i = ps - (j * ex_c[0]); + + r1 = ( pow2((Porg[0]-X[i]))+ pow2((Porg[1]-Y[j]))+ pow2((Porg[2]-Z[k])) ) / + + ( pow2((Porg[0]-Porg[3]))+ pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); + + + r2 = ( pow2((Porg[3]-X[i])) + pow2((Porg[4]-Y[j])) + pow2((Porg[5]-Z[k])) )/ + + ( pow2((Porg[0]-Porg[3])) + pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); + + + reta[i+ j*_1D_SIZE[0]+ k*_2D_SIZE[0] ] = A + C1/(1 + 12 * r1) + C2/(1 + 12 *r2); + }//BHN == 2 + + M_ betax_rhs[_t] = 0.75*M_ dtSfx[_t]; + + M_ betay_rhs[_t] = 0.75*M_ dtSfy[_t]; + + M_ betaz_rhs[_t] = 0.75*M_ dtSfz[_t]; + + + + M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t] * M_ dtSfx[_t]; + + M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t] * M_ dtSfy[_t]; + + M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t] * M_ dtSfz[_t]; + +#elif (GAUGE == 7) + if(BHN==2){ + int k = _t / _2D_SIZE[0]; + int ps = _t - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; + int j = ps / ex_c[0]; + int i = ps - (j * ex_c[0]); + + r1 = ( pow2((Porg[0]-X[i])) + pow2((Porg[1]-Y[j])) + pow2((Porg[2]-Z[k])) )/ + + ( pow2((Porg[0]-Porg[3])) + pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); + + + r2 = ( pow2((Porg[3]-X[i])) + pow2((Porg[4]-Y[j])) + pow2((Porg[5]-Z[k])) )/ + + ( pow2((Porg[0]-Porg[3])) + pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); + + + M_ reta[_t][i+ j*_1D_SIZE[0]+ k*_2D_SIZE[0] ] = A + C1* exp(-12 *r1) + C2*exp(- 12*r2); + }//BHN ==2 + + M_ betax_rhs[_t] = 0.75*M_ dtSfx[_t]; + + M_ betay_rhs[_t] = 0.75*M_ dtSfy[_t]; + + M_ betaz_rhs[_t] = 0.75*M_ dtSfz[_t]; + + + + M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t]*M_ dtSfx[_t]; + + M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t]*M_ dtSfy[_t]; + + M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t]*M_ dtSfz[_t]; + +#endif //if (GAUGE == ?) + + _t += STEP_SIZE; + } +} + +__global__ void compute_rhs_bssn_ss_part6_gauge() +{ + int _t = blockIdx.x*blockDim.x+threadIdx.x; + while(_t < _3D_SIZE[0]) + { +#if (GAUGE == 2) + M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + + + 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupxz[_t] * M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); + + + M_ reta[_t] = 1.13 / 2 * sqrt( M_ reta[_t]/M_ chin1[_t])/ pow2( ( 1-sqrt(M_ chin1[_t]) ) ); + + + M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t]* M_ dtSfx[_t]; + + M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t]* M_ dtSfy[_t]; + + M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t]* M_ dtSfz[_t]; + +#elif (GAUGE == 3) + M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * M_ dtSfy_rhs[_t] + + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + + + 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + + M_ gupxz[_t] * M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + + M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); + + + M_ reta[_t] = 1.13/2 * sqrt( M_ reta[_t]/ M_ chin1[_t])/ pow2((1-M_ chin1[_t])); + + M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t]* M_ dtSfx[_t]; + + M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t]* M_ dtSfy[_t]; + + M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t]* M_ dtSfz[_t]; + +#elif (GAUGE == 4) + M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * + M_ dtSfy_rhs[_t] + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + + + 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupxz[_t] * + M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); + + + M_ reta[_t] = 1.13 / 2 * sqrt( M_ reta[_t]/M_ chin1[_t])/ pow( (1-sqrt(M_ chin1[_t]))); + + + M_ betax_rhs[_t] = 0.75* M_ Gamx[_t] - M_ reta[_t]*M_ betax[_t]; + + M_ betay_rhs[_t] = 0.75* M_ Gamy[_t] - M_ reta[_t]*M_ betay[_t]; + + M_ betaz_rhs[_t] = 0.75* M_ Gamz[_t] - M_ reta[_t]*M_ betaz[_t]; + +#elif (GAUGE == 5) + M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + + + 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupxz[_t] * M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); + + + M_ reta[_t] = 1.13 / 2 * sqrt( M_ reta[_t]/M_ chin1)/ pow( (1-M_ chin1[_t]) ); + + M_ betax_rhs[_t] = 0.75* M_ Gamx[_t] - M_ reta[_t]*M_ betax[_t]; + + M_ betay_rhs[_t] = 0.75* M_ Gamy[_t] - M_ reta[_t]*M_ betay[_t]; + + M_ betaz_rhs[_t] = 0.75* M_ Gamz[_t] - M_ reta[_t]*M_ betaz[_t]; + + + + M_ dtSfx_rhs[_t] = 0; + + M_ dtSfy_rhs[_t] = 0; + + M_ dtSfz_rhs[_t] = 0; +#endif + _t += STEP_SIZE; + } +} + +__global__ void compute_rhs_ss_part7() +{ + int _t = blockIdx.x*blockDim.x+threadIdx.x; + while(_t < _3D_SIZE[0]) + { + M_ ham_Res[_t] = M_ gupxx [_t]* M_ Rxx [_t]+ M_ gupyy[_t]* M_ Ryy[_t]+ M_ gupzz[_t]* M_ Rzz[_t]+ + 2* ( M_ gupxy[_t]* M_ Rxy[_t]+ M_ gupxz[_t]* M_ Rxz[_t]+ M_ gupyz[_t]* M_ Ryz[_t]); + + M_ ham_Res[_t] = M_ chin1[_t]*M_ ham_Res[_t] + F2o3 * M_ trK[_t] * M_ trK[_t] -( + M_ gupxx [_t]* ( + M_ gupxx [_t]* M_ Axx [_t]* M_ Axx [_t]+ M_ gupyy[_t]* M_ Axy[_t]* M_ Axy[_t]+ M_ gupzz[_t]* M_ Axz[_t]* M_ Axz[_t]+ + 2 * (M_ gupxy[_t]* M_ Axx [_t]* M_ Axy[_t]+ M_ gupxz[_t]* M_ Axx [_t]* M_ Axz[_t]+ M_ gupyz[_t]* M_ Axy[_t]* M_ Axz[_t]) ) + + M_ gupyy[_t]* ( + M_ gupxx [_t]* M_ Axy[_t]* M_ Axy[_t]+ M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayy[_t]+ M_ gupzz[_t]* M_ Ayz[_t]* M_ Ayz[_t]+ + 2 * (M_ gupxy[_t]* M_ Axy[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ Axy[_t]* M_ Ayz[_t]+ M_ gupyz[_t]* M_ Ayy[_t]* M_ Ayz[_t]) ) + + M_ gupzz[_t]* ( + M_ gupxx [_t]* M_ Axz[_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ Ayz[_t]* M_ Ayz[_t]+ M_ gupzz[_t]* M_ Azz[_t]* M_ Azz[_t]+ + 2 * (M_ gupxy[_t]* M_ Axz[_t]* M_ Ayz[_t]+ M_ gupxz[_t]* M_ Axz[_t]* M_ Azz[_t]+ M_ gupyz[_t]* M_ Ayz[_t]* M_ Azz[_t]) ) + + 2 * ( + M_ gupxy[_t]* ( + M_ gupxx [_t]* M_ Axx [_t]* M_ Axy[_t]+ M_ gupyy[_t]* M_ Axy[_t]* M_ Ayy[_t]+ M_ gupzz[_t]* M_ Axz[_t]* M_ Ayz[_t]+ + M_ gupxy[_t]* (M_ Axx [_t]* M_ Ayy[_t]+ M_ Axy[_t]* M_ Axy[_t]) + + M_ gupxz[_t]* (M_ Axx [_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Axy[_t]) + + M_ gupyz[_t]* (M_ Axy[_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Ayy[_t]) ) + + M_ gupxz[_t]* ( + M_ gupxx [_t]* M_ Axx [_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ Axy[_t]* M_ Ayz[_t]+ M_ gupzz[_t]* M_ Axz[_t]* M_ Azz[_t]+ + M_ gupxy[_t]* (M_ Axx [_t]* M_ Ayz[_t]+ M_ Axy[_t]* M_ Axz[_t]) + + M_ gupxz[_t]* (M_ Axx [_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Axz[_t]) + + M_ gupyz[_t]* (M_ Axy[_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Ayz[_t]) ) + + M_ gupyz[_t]* ( + M_ gupxx [_t]* M_ Axy[_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayz[_t]+ M_ gupzz[_t]* M_ Ayz[_t]* M_ Azz[_t]+ + M_ gupxy[_t]* (M_ Axy[_t]* M_ Ayz[_t]+ M_ Ayy[_t]* M_ Axz[_t]) + + M_ gupxz[_t]* (M_ Axy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Axz[_t]) + + M_ gupyz[_t]* (M_ Ayy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Ayz[_t]) ) ))- 16 * PI * M_ rho[_t]; + + _t += STEP_SIZE; + } +} +__global__ void compute_rhs_ss_part8() +{ + int _t = blockIdx.x*blockDim.x+threadIdx.x; + while(_t < _3D_SIZE[0]) + { + M_ gxxx [_t]= M_ gxxx [_t]- ( M_ Gamxxx [_t]* M_ Axx [_t]+ M_ Gamyxx [_t]* M_ Axy[_t]+ M_ Gamzxx [_t]* M_ Axz[_t] + + M_ Gamxxx [_t]* M_ Axx [_t]+ M_ Gamyxx [_t]* M_ Axy[_t]+ M_ Gamzxx [_t]* M_ Axz[_t]) - M_ chix[_t]*M_ Axx[_t]/M_ chin1[_t]; + + M_ gxyx [_t]= M_ gxyx [_t]- ( M_ Gamxxy[_t]* M_ Axx [_t]+ M_ Gamyxy[_t]* M_ Axy[_t]+ M_ Gamzxy[_t]* M_ Axz[_t] + + M_ Gamxxx [_t]* M_ Axy[_t]+ M_ Gamyxx [_t]* M_ Ayy[_t]+ M_ Gamzxx [_t]* M_ Ayz[_t]) - M_ chix[_t]*M_ Axy[_t]/M_ chin1[_t]; + + M_ gxzx [_t]= M_ gxzx [_t]- ( M_ Gamxxz[_t]* M_ Axx [_t]+ M_ Gamyxz[_t]* M_ Axy[_t]+ M_ Gamzxz[_t]* M_ Axz[_t] + + M_ Gamxxx [_t]* M_ Axz[_t]+ M_ Gamyxx [_t]* M_ Ayz[_t]+ M_ Gamzxx [_t]* M_ Azz[_t]) - M_ chix[_t]*M_ Axz[_t]/M_ chin1[_t]; + + M_ gyyx [_t]= M_ gyyx [_t]- ( M_ Gamxxy[_t]* M_ Axy[_t]+ M_ Gamyxy[_t]* M_ Ayy[_t]+ M_ Gamzxy[_t]* M_ Ayz[_t] + + M_ Gamxxy[_t]* M_ Axy[_t]+ M_ Gamyxy[_t]* M_ Ayy[_t]+ M_ Gamzxy[_t]* M_ Ayz[_t]) - M_ chix[_t]*M_ Ayy[_t]/M_ chin1[_t]; + + M_ gyzx [_t]= M_ gyzx [_t]- ( M_ Gamxxz[_t]* M_ Axy[_t]+ M_ Gamyxz[_t]* M_ Ayy[_t]+ M_ Gamzxz[_t]* M_ Ayz[_t] + + M_ Gamxxy[_t]* M_ Axz[_t]+ M_ Gamyxy[_t]* M_ Ayz[_t]+ M_ Gamzxy[_t]* M_ Azz[_t]) - M_ chix[_t]*M_ Ayz[_t]/M_ chin1[_t]; + + M_ gzzx [_t]= M_ gzzx [_t]- ( M_ Gamxxz[_t]* M_ Axz[_t]+ M_ Gamyxz[_t]* M_ Ayz[_t]+ M_ Gamzxz[_t]* M_ Azz[_t] + + M_ Gamxxz[_t]* M_ Axz[_t]+ M_ Gamyxz[_t]* M_ Ayz[_t]+ M_ Gamzxz[_t]* M_ Azz[_t]) - M_ chix[_t]*M_ Azz[_t]/M_ chin1[_t]; + + M_ gxxy[_t]= M_ gxxy[_t]- ( M_ Gamxxy[_t]* M_ Axx [_t]+ M_ Gamyxy[_t]* M_ Axy[_t]+ M_ Gamzxy[_t]* M_ Axz[_t] + + M_ Gamxxy[_t]* M_ Axx [_t]+ M_ Gamyxy[_t]* M_ Axy[_t]+ M_ Gamzxy[_t]* M_ Axz[_t]) - M_ chiy[_t]*M_ Axx[_t]/M_ chin1[_t]; + + M_ gxyy[_t]= M_ gxyy[_t]- ( M_ Gamxyy[_t]* M_ Axx [_t]+ M_ Gamyyy[_t]* M_ Axy[_t]+ M_ Gamzyy[_t]* M_ Axz[_t] + + M_ Gamxxy[_t]* M_ Axy[_t]+ M_ Gamyxy[_t]* M_ Ayy[_t]+ M_ Gamzxy[_t]* M_ Ayz[_t]) - M_ chiy[_t]*M_ Axy[_t]/M_ chin1[_t]; + + M_ gxzy[_t]= M_ gxzy[_t]- ( M_ Gamxyz[_t]* M_ Axx [_t]+ M_ Gamyyz[_t]* M_ Axy[_t]+ M_ Gamzyz[_t]* M_ Axz[_t] + + M_ Gamxxy[_t]* M_ Axz[_t]+ M_ Gamyxy[_t]* M_ Ayz[_t]+ M_ Gamzxy[_t]* M_ Azz[_t]) - M_ chiy[_t]*M_ Axz[_t]/M_ chin1[_t]; + + M_ gyyy[_t]= M_ gyyy[_t]- ( M_ Gamxyy[_t]* M_ Axy[_t]+ M_ Gamyyy[_t]* M_ Ayy[_t]+ M_ Gamzyy[_t]* M_ Ayz[_t] + + M_ Gamxyy[_t]* M_ Axy[_t]+ M_ Gamyyy[_t]* M_ Ayy[_t]+ M_ Gamzyy[_t]* M_ Ayz[_t]) - M_ chiy[_t]*M_ Ayy[_t]/M_ chin1[_t]; + + M_ gyzy[_t]= M_ gyzy[_t]- ( M_ Gamxyz[_t]* M_ Axy[_t]+ M_ Gamyyz[_t]* M_ Ayy[_t]+ M_ Gamzyz[_t]* M_ Ayz[_t] + + M_ Gamxyy[_t]* M_ Axz[_t]+ M_ Gamyyy[_t]* M_ Ayz[_t]+ M_ Gamzyy[_t]* M_ Azz[_t]) - M_ chiy[_t]*M_ Ayz[_t]/M_ chin1[_t]; + + M_ gzzy[_t]= M_ gzzy[_t]- ( M_ Gamxyz[_t]* M_ Axz[_t]+ M_ Gamyyz[_t]* M_ Ayz[_t]+ M_ Gamzyz[_t]* M_ Azz[_t] + + M_ Gamxyz[_t]* M_ Axz[_t]+ M_ Gamyyz[_t]* M_ Ayz[_t]+ M_ Gamzyz[_t]* M_ Azz[_t]) - M_ chiy[_t]*M_ Azz[_t]/M_ chin1[_t]; + + M_ gxxz[_t]= M_ gxxz[_t]- ( M_ Gamxxz[_t]* M_ Axx [_t]+ M_ Gamyxz[_t]* M_ Axy[_t]+ M_ Gamzxz[_t]* M_ Axz[_t] + + M_ Gamxxz[_t]* M_ Axx [_t]+ M_ Gamyxz[_t]* M_ Axy[_t]+ M_ Gamzxz[_t]* M_ Axz[_t]) - M_ chiz[_t]*M_ Axx[_t]/M_ chin1[_t]; + + M_ gxyz[_t]= M_ gxyz[_t]- ( M_ Gamxyz[_t]* M_ Axx [_t]+ M_ Gamyyz[_t]* M_ Axy[_t]+ M_ Gamzyz[_t]* M_ Axz[_t] + + M_ Gamxxz[_t]* M_ Axy[_t]+ M_ Gamyxz[_t]* M_ Ayy[_t]+ M_ Gamzxz[_t]* M_ Ayz[_t]) - M_ chiz[_t]*M_ Axy[_t]/M_ chin1[_t]; + + M_ gxzz[_t]= M_ gxzz[_t]- ( M_ Gamxzz[_t]* M_ Axx [_t]+ M_ Gamyzz[_t]* M_ Axy[_t]+ M_ Gamzzz[_t]* M_ Axz[_t] + + M_ Gamxxz[_t]* M_ Axz[_t]+ M_ Gamyxz[_t]* M_ Ayz[_t]+ M_ Gamzxz[_t]* M_ Azz[_t]) - M_ chiz[_t]*M_ Axz[_t]/M_ chin1[_t]; + + M_ gyyz[_t]= M_ gyyz[_t]- ( M_ Gamxyz[_t]* M_ Axy[_t]+ M_ Gamyyz[_t]* M_ Ayy[_t]+ M_ Gamzyz[_t]* M_ Ayz[_t] + + M_ Gamxyz[_t]* M_ Axy[_t]+ M_ Gamyyz[_t]* M_ Ayy[_t]+ M_ Gamzyz[_t]* M_ Ayz[_t]) - M_ chiz[_t]*M_ Ayy[_t]/M_ chin1[_t]; + + M_ gyzz[_t]= M_ gyzz[_t]- ( M_ Gamxzz[_t]* M_ Axy[_t]+ M_ Gamyzz[_t]* M_ Ayy[_t]+ M_ Gamzzz[_t]* M_ Ayz[_t] + + M_ Gamxyz[_t]* M_ Axz[_t]+ M_ Gamyyz[_t]* M_ Ayz[_t]+ M_ Gamzyz[_t]* M_ Azz[_t]) - M_ chiz[_t]*M_ Ayz[_t]/M_ chin1[_t]; + + M_ gzzz[_t]= M_ gzzz[_t]- ( M_ Gamxzz[_t]* M_ Axz[_t]+ M_ Gamyzz[_t]* M_ Ayz[_t]+ M_ Gamzzz[_t]* M_ Azz[_t] + + M_ Gamxzz[_t]* M_ Axz[_t]+ M_ Gamyzz[_t]* M_ Ayz[_t]+ M_ Gamzzz[_t]* M_ Azz[_t]) - M_ chiz[_t]*M_ Azz[_t]/M_ chin1[_t]; + + M_ movx_Res[_t] = M_ gupxx[_t]*M_ gxxx [_t]+ M_ gupyy[_t]*M_ gxyy[_t]+ M_ gupzz[_t]*M_ gxzz[_t] + +M_ gupxy[_t]*M_ gxyx [_t]+ M_ gupxz[_t]*M_ gxzx [_t]+ M_ gupyz[_t]*M_ gxzy[_t] + +M_ gupxy[_t]*M_ gxxy[_t]+ M_ gupxz[_t]*M_ gxxz[_t]+ M_ gupyz[_t]*M_ gxyz[_t]; + M_ movy_Res[_t] = M_ gupxx[_t]*M_ gxyx [_t]+ M_ gupyy[_t]*M_ gyyy[_t]+ M_ gupzz[_t]*M_ gyzz[_t] + +M_ gupxy[_t]*M_ gyyx [_t]+ M_ gupxz[_t]*M_ gyzx [_t]+ M_ gupyz[_t]*M_ gyzy[_t] + +M_ gupxy[_t]*M_ gxyy[_t]+ M_ gupxz[_t]*M_ gxyz[_t]+ M_ gupyz[_t]*M_ gyyz[_t]; + + M_ movz_Res[_t] = M_ gupxx[_t]*M_ gxzx [_t]+ M_ gupyy[_t]*M_ gyzy[_t]+ M_ gupzz[_t]*M_ gzzz[_t] + +M_ gupxy[_t]*M_ gyzx [_t]+ M_ gupxz[_t]*M_ gzzx [_t]+ M_ gupyz[_t]*M_ gzzy[_t] + +M_ gupxy[_t]*M_ gxzy[_t]+ M_ gupxz[_t]*M_ gxzz[_t]+ M_ gupyz[_t]*M_ gyzz[_t]; + + M_ movx_Res[_t] = M_ movx_Res[_t] - F2o3*M_ Kx [_t]- 8*PI*M_ Sx[_t]; + M_ movy_Res[_t] = M_ movy_Res[_t] - F2o3*M_ Ky[_t]- 8*PI*M_ Sy[_t]; + M_ movz_Res[_t] = M_ movz_Res[_t] - F2o3*M_ Kz[_t]- 8*PI*M_ Sz[_t]; + + _t += STEP_SIZE; + } +} + +void destroy_meta(Meta *meta,Metass *metass) +{ + if(Mh_ X) cudaFree(Mh_ X); + if(Mh_ Y) cudaFree(Mh_ Y); + if(Mh_ Z) cudaFree(Mh_ Z); + if(Mh_ chi) cudaFree(Mh_ chi); + if(Mh_ dxx) cudaFree(Mh_ dxx); + if(Mh_ dyy) cudaFree(Mh_ dyy); + if(Mh_ dzz) cudaFree(Mh_ dzz); + if(Mh_ trK) cudaFree(Mh_ trK); + if(Mh_ gxy) cudaFree(Mh_ gxy); + if(Mh_ gxz) cudaFree(Mh_ gxz); + if(Mh_ gyz) cudaFree(Mh_ gyz); + if(Mh_ Axx) cudaFree(Mh_ Axx); + if(Mh_ Axy) cudaFree(Mh_ Axy); + if(Mh_ Axz) cudaFree(Mh_ Axz); + if(Mh_ Ayz) cudaFree(Mh_ Ayz); + if(Mh_ Ayy) cudaFree(Mh_ Ayy); + if(Mh_ Azz) cudaFree(Mh_ Azz); + if(Mh_ Gamx) cudaFree(Mh_ Gamx); + if(Mh_ Gamy) cudaFree(Mh_ Gamy); + if(Mh_ Gamz) cudaFree(Mh_ Gamz); + if(Mh_ Lap) cudaFree(Mh_ Lap); + if(Mh_ betax) cudaFree(Mh_ betax); + if(Mh_ betay) cudaFree(Mh_ betay); + if(Mh_ betaz) cudaFree(Mh_ betaz); + if(Mh_ dtSfx) cudaFree(Mh_ dtSfx); + if(Mh_ dtSfy) cudaFree(Mh_ dtSfy); + if(Mh_ dtSfz) cudaFree(Mh_ dtSfz); + if(Mh_ TZ) cudaFree(Mh_ TZ); + if(Mh_ chi_rhs) cudaFree(Mh_ chi_rhs); + if(Mh_ trK_rhs) cudaFree(Mh_ trK_rhs); + if(Mh_ gxy_rhs) cudaFree(Mh_ gxy_rhs); + if(Mh_ gxz_rhs) cudaFree(Mh_ gxz_rhs); + if(Mh_ gyz_rhs) cudaFree(Mh_ gyz_rhs); + if(Mh_ Axx_rhs) cudaFree(Mh_ Axx_rhs); + if(Mh_ Axy_rhs) cudaFree(Mh_ Axy_rhs); + if(Mh_ Axz_rhs) cudaFree(Mh_ Axz_rhs); + if(Mh_ Ayz_rhs) cudaFree(Mh_ Ayz_rhs); + if(Mh_ Ayy_rhs) cudaFree(Mh_ Ayy_rhs); + if(Mh_ Azz_rhs) cudaFree(Mh_ Azz_rhs); + if(Mh_ Gamx_rhs) cudaFree(Mh_ Gamx_rhs); + if(Mh_ Gamy_rhs) cudaFree(Mh_ Gamy_rhs); + if(Mh_ Gamz_rhs) cudaFree(Mh_ Gamz_rhs); + if(Mh_ Lap_rhs) cudaFree(Mh_ Lap_rhs); + if(Mh_ betax_rhs) cudaFree(Mh_ betax_rhs); + if(Mh_ betay_rhs) cudaFree(Mh_ betay_rhs); + if(Mh_ betaz_rhs) cudaFree(Mh_ betaz_rhs); + if(Mh_ dtSfx_rhs) cudaFree(Mh_ dtSfx_rhs); + if(Mh_ dtSfy_rhs) cudaFree(Mh_ dtSfy_rhs); + if(Mh_ dtSfz_rhs) cudaFree(Mh_ dtSfz_rhs); + if(Mh_ TZ_rhs) cudaFree(Mh_ TZ_rhs); + if(Mh_ rho) cudaFree(Mh_ rho); + if(Mh_ Sx) cudaFree(Mh_ Sx); + if(Mh_ Sy) cudaFree(Mh_ Sy); + if(Mh_ Sz) cudaFree(Mh_ Sz); + if(Mh_ Sxx) cudaFree(Mh_ Sxx); + if(Mh_ Sxy) cudaFree(Mh_ Sxy); + if(Mh_ Sxz) cudaFree(Mh_ Sxz); + if(Mh_ Syz) cudaFree(Mh_ Syz); + if(Mh_ Syy) cudaFree(Mh_ Syy); + if(Mh_ Szz) cudaFree(Mh_ Szz); + if(Mh_ Gamxxx) cudaFree(Mh_ Gamxxx); + if(Mh_ Gamxxy) cudaFree(Mh_ Gamxxy); + if(Mh_ Gamxxz) cudaFree(Mh_ Gamxxz); + if(Mh_ Gamxyy) cudaFree(Mh_ Gamxyy); + if(Mh_ Gamxyz) cudaFree(Mh_ Gamxyz); + if(Mh_ Gamxzz) cudaFree(Mh_ Gamxzz); + if(Mh_ Gamyxx) cudaFree(Mh_ Gamyxx); + if(Mh_ Gamyxy) cudaFree(Mh_ Gamyxy); + if(Mh_ Gamyxz) cudaFree(Mh_ Gamyxz); + if(Mh_ Gamyyy) cudaFree(Mh_ Gamyyy); + if(Mh_ Gamyyz) cudaFree(Mh_ Gamyyz); + if(Mh_ Gamyzz) cudaFree(Mh_ Gamyzz); + if(Mh_ Gamzxx) cudaFree(Mh_ Gamzxx); + if(Mh_ Gamzxy) cudaFree(Mh_ Gamzxy); + if(Mh_ Gamzxz) cudaFree(Mh_ Gamzxz); + if(Mh_ Gamzyz) cudaFree(Mh_ Gamzyz); + if(Mh_ Gamzyy) cudaFree(Mh_ Gamzyy); + if(Mh_ Gamzzz) cudaFree(Mh_ Gamzzz); + if(Mh_ Rxx) cudaFree(Mh_ Rxx); + if(Mh_ Rxy) cudaFree(Mh_ Rxy); + if(Mh_ Rxz) cudaFree(Mh_ Rxz); + if(Mh_ Ryy) cudaFree(Mh_ Ryy); + if(Mh_ Ryz) cudaFree(Mh_ Ryz); + if(Mh_ Rzz) cudaFree(Mh_ Rzz); + if(Mh_ ham_Res) cudaFree(Mh_ ham_Res); + if(Mh_ movx_Res) cudaFree(Mh_ movx_Res); + if(Mh_ movy_Res) cudaFree(Mh_ movy_Res); + if(Mh_ movz_Res) cudaFree(Mh_ movz_Res); + if(Mh_ Gmx_Res) cudaFree(Mh_ Gmx_Res); + if(Mh_ Gmy_Res) cudaFree(Mh_ Gmy_Res); + if(Mh_ Gmz_Res) cudaFree(Mh_ Gmz_Res); + if(Mh_ gxx) cudaFree(Mh_ gxx); + if(Mh_ gyy) cudaFree(Mh_ gyy); + if(Mh_ gzz) cudaFree(Mh_ gzz); + if(Mh_ chix) cudaFree(Mh_ chix); + if(Mh_ chiy) cudaFree(Mh_ chiy); + if(Mh_ chiz) cudaFree(Mh_ chiz); + if(Mh_ gxxx) cudaFree(Mh_ gxxx); + if(Mh_ gxyx) cudaFree(Mh_ gxyx); + if(Mh_ gxzx) cudaFree(Mh_ gxzx); + if(Mh_ gyyx) cudaFree(Mh_ gyyx); + if(Mh_ gyzx) cudaFree(Mh_ gyzx); + if(Mh_ gzzx) cudaFree(Mh_ gzzx); + if(Mh_ gxxy) cudaFree(Mh_ gxxy); + if(Mh_ gxyy) cudaFree(Mh_ gxyy); + if(Mh_ gxzy) cudaFree(Mh_ gxzy); + if(Mh_ gyyy) cudaFree(Mh_ gyyy); + if(Mh_ gyzy) cudaFree(Mh_ gyzy); + if(Mh_ gzzy) cudaFree(Mh_ gzzy); + if(Mh_ gxxz) cudaFree(Mh_ gxxz); + if(Mh_ gxyz) cudaFree(Mh_ gxyz); + if(Mh_ gxzz) cudaFree(Mh_ gxzz); + if(Mh_ gyyz) cudaFree(Mh_ gyyz); + if(Mh_ gyzz) cudaFree(Mh_ gyzz); + if(Mh_ gzzz) cudaFree(Mh_ gzzz); + if(Mh_ Lapx) cudaFree(Mh_ Lapx); + if(Mh_ Lapy) cudaFree(Mh_ Lapy); + if(Mh_ Lapz) cudaFree(Mh_ Lapz); + if(Mh_ betaxx) cudaFree(Mh_ betaxx); + if(Mh_ betaxy) cudaFree(Mh_ betaxy); + if(Mh_ betaxz) cudaFree(Mh_ betaxz); + if(Mh_ betayy) cudaFree(Mh_ betayy); + if(Mh_ betayz) cudaFree(Mh_ betayz); + if(Mh_ betazz) cudaFree(Mh_ betazz); + if(Mh_ betayx) cudaFree(Mh_ betayx); + if(Mh_ betazy) cudaFree(Mh_ betazy); + if(Mh_ betazx) cudaFree(Mh_ betazx); + if(Mh_ Kx) cudaFree(Mh_ Kx); + if(Mh_ Ky) cudaFree(Mh_ Ky); + if(Mh_ Kz) cudaFree(Mh_ Kz); + if(Mh_ Gamxx) cudaFree(Mh_ Gamxx); + if(Mh_ Gamxy) cudaFree(Mh_ Gamxy); + if(Mh_ Gamxz) cudaFree(Mh_ Gamxz); + if(Mh_ Gamyy) cudaFree(Mh_ Gamyy); + if(Mh_ Gamyz) cudaFree(Mh_ Gamyz); + if(Mh_ Gamzz) cudaFree(Mh_ Gamzz); + if(Mh_ Gamyx) cudaFree(Mh_ Gamyx); + if(Mh_ Gamzy) cudaFree(Mh_ Gamzy); + if(Mh_ Gamzx) cudaFree(Mh_ Gamzx); + if(Mh_ div_beta) cudaFree(Mh_ div_beta); + if(Mh_ S) cudaFree(Mh_ S); + if(Mh_ f) cudaFree(Mh_ f); + if(Mh_ fxx) cudaFree(Mh_ fxx); + if(Mh_ fxy) cudaFree(Mh_ fxy); + if(Mh_ fxz) cudaFree(Mh_ fxz); + if(Mh_ fyy) cudaFree(Mh_ fyy); + if(Mh_ fyz) cudaFree(Mh_ fyz); + if(Mh_ fzz) cudaFree(Mh_ fzz); + if(Mh_ gupxx) cudaFree(Mh_ gupxx); + if(Mh_ gupxy) cudaFree(Mh_ gupxy); + if(Mh_ gupxz) cudaFree(Mh_ gupxz); + if(Mh_ gupyy) cudaFree(Mh_ gupyy); + if(Mh_ gupyz) cudaFree(Mh_ gupyz); + if(Mh_ gupzz) cudaFree(Mh_ gupzz); + if(Mh_ Gamxa) cudaFree(Mh_ Gamxa); + if(Mh_ Gamya) cudaFree(Mh_ Gamya); + if(Mh_ Gamza) cudaFree(Mh_ Gamza); + if(Mh_ alpn1) cudaFree(Mh_ alpn1); + if(Mh_ chin1) cudaFree(Mh_ chin1); + if(Mh_ fh) cudaFree(Mh_ fh); + if(Mh_ fh2) cudaFree(Mh_ fh2); + if(Mh_ gxx_rhs) cudaFree(Mh_ gxx_rhs); + if(Mh_ gyy_rhs) cudaFree(Mh_ gyy_rhs); + if(Mh_ gzz_rhs) cudaFree(Mh_ gzz_rhs); + + //-----------SS----------------- + if(Msh_ crho) cudaFree(Msh_ crho); + if(Msh_ sigma) cudaFree(Msh_ sigma); + if(Msh_ R) cudaFree(Msh_ R); + if(Msh_ drhodx) cudaFree(Msh_ drhodx); + if(Msh_ drhody) cudaFree(Msh_ drhody); + if(Msh_ drhodz) cudaFree(Msh_ drhodz); + if(Msh_ dsigmadx) cudaFree(Msh_ dsigmadx); + if(Msh_ dsigmady) cudaFree(Msh_ dsigmady); + if(Msh_ dsigmadz) cudaFree(Msh_ dsigmadz); + if(Msh_ dRdx) cudaFree(Msh_ dRdx); + if(Msh_ dRdy) cudaFree(Msh_ dRdy); + if(Msh_ dRdz) cudaFree(Msh_ dRdz); + if(Msh_ drhodxx) cudaFree(Msh_ drhodxx); + if(Msh_ drhodxy) cudaFree(Msh_ drhodxy); + if(Msh_ drhodxz) cudaFree(Msh_ drhodxz); + if(Msh_ drhodyy) cudaFree(Msh_ drhodyy); + if(Msh_ drhodyz) cudaFree(Msh_ drhodyz); + if(Msh_ drhodzz) cudaFree(Msh_ drhodzz); + if(Msh_ dsigmadxx) cudaFree(Msh_ dsigmadxx); + if(Msh_ dsigmadxy) cudaFree(Msh_ dsigmadxy); + if(Msh_ dsigmadxz) cudaFree(Msh_ dsigmadxz); + if(Msh_ dsigmadyy) cudaFree(Msh_ dsigmadyy); + if(Msh_ dsigmadyz) cudaFree(Msh_ dsigmadyz); + if(Msh_ dsigmadzz) cudaFree(Msh_ dsigmadzz); + if(Msh_ dRdxx) cudaFree(Msh_ dRdxx); + if(Msh_ dRdxy) cudaFree(Msh_ dRdxy); + if(Msh_ dRdxz) cudaFree(Msh_ dRdxz); + if(Msh_ dRdyy) cudaFree(Msh_ dRdyy); + if(Msh_ dRdyz) cudaFree(Msh_ dRdyz); + if(Msh_ dRdzz) cudaFree(Msh_ dRdzz); + if(Msh_ gx) cudaFree(Msh_ gx); + if(Msh_ gy) cudaFree(Msh_ gy); + if(Msh_ gz) cudaFree(Msh_ gz); + + if(Msh_ gxx) cudaFree(Msh_ gxx); + if(Msh_ gxy) cudaFree(Msh_ gxy); + if(Msh_ gxz) cudaFree(Msh_ gxz); + if(Msh_ gyy) cudaFree(Msh_ gyy); + if(Msh_ gyz) cudaFree(Msh_ gyz); + if(Msh_ gzz) cudaFree(Msh_ gzz); + +#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7) + if(Mh_ reta) cudaFree(Mh_ reta); + +#endif + + //if(Mh_ other_int) cudaFree(Mh_ other_int); + //if(Mh_ other_double) cudaFree(Mh_ other_double); + //cout<<"Address of meta:"<<&meta<>>(); + cudaDeviceSynchronize(); + + sub_fderivs_shc(sst,Mh_ betax,Mh_ fh,Mh_ betaxx,Mh_ betaxy,Mh_ betaxz,ass); + sub_fderivs_shc(sst,Mh_ betay,Mh_ fh,Mh_ betayx,Mh_ betayy,Mh_ betayz,sas); + sub_fderivs_shc(sst,Mh_ betaz,Mh_ fh,Mh_ betazx,Mh_ betazy,Mh_ betazz,ssa); + sub_fderivs_shc(sst,Mh_ chi,Mh_ fh,Mh_ chix,Mh_ chiy,Mh_ chiz, sss); + sub_fderivs_shc(sst,Mh_ Lap,Mh_ fh,Mh_ Lapx,Mh_ Lapy,Mh_ Lapz, sss); + sub_fderivs_shc(sst,Mh_ trK,Mh_ fh,Mh_ Kx,Mh_ Ky,Mh_ Kz, sss); + sub_fderivs_shc(sst,Mh_ dxx,Mh_ fh,Mh_ gxxx,Mh_ gxxy,Mh_ gxxz, sss); + sub_fderivs_shc(sst,Mh_ dyy,Mh_ fh,Mh_ gyyx,Mh_ gyyy,Mh_ gyyz, sss); + sub_fderivs_shc(sst,Mh_ dzz,Mh_ fh,Mh_ gzzx,Mh_ gzzy,Mh_ gzzz, sss); + sub_fderivs_shc(sst,Mh_ gxy,Mh_ fh,Mh_ gxyx,Mh_ gxyy,Mh_ gxyz, aas); + sub_fderivs_shc(sst,Mh_ gxz,Mh_ fh,Mh_ gxzx,Mh_ gxzy,Mh_ gxzz, asa); + sub_fderivs_shc(sst,Mh_ gyz,Mh_ fh,Mh_ gyzx,Mh_ gyzy,Mh_ gyzz, saa); + + compute_rhs_ss_part2<<>>(); + cudaDeviceSynchronize(); + + sub_fdderivs_shc(sst,Mh_ betax,Mh_ fh,Mh_ gxxx,Mh_ gxyx,Mh_ gxzx,Mh_ gyyx,Mh_ gyzx,Mh_ gzzx,ass); + sub_fdderivs_shc(sst,Mh_ betay,Mh_ fh,Mh_ gxxy,Mh_ gxyy,Mh_ gxzy,Mh_ gyyy,Mh_ gyzy,Mh_ gzzy,sas); + sub_fdderivs_shc(sst,Mh_ betaz,Mh_ fh,Mh_ gxxz,Mh_ gxyz,Mh_ gxzz,Mh_ gyyz,Mh_ gyzz,Mh_ gzzz,ssa); + sub_fderivs_shc( sst,Mh_ Gamx, Mh_ fh,Mh_ Gamxx, Mh_ Gamxy, Mh_ Gamxz,ass); + sub_fderivs_shc( sst,Mh_ Gamy, Mh_ fh,Mh_ Gamyx, Mh_ Gamyy, Mh_ Gamyz,sas); + sub_fderivs_shc( sst,Mh_ Gamz, Mh_ fh,Mh_ Gamzx, Mh_ Gamzy, Mh_ Gamzz,ssa); + + compute_rhs_ss_part3<<>>(); + cudaDeviceSynchronize(); + + computeRicci_ss(sst,Mh_ dxx,Mh_ Rxx,sss, meta); + computeRicci_ss(sst,Mh_ dyy,Mh_ Ryy,sss, meta); + computeRicci_ss(sst,Mh_ dzz,Mh_ Rzz,sss, meta); + computeRicci_ss(sst,Mh_ gxy,Mh_ Rxy,aas, meta); + computeRicci_ss(sst,Mh_ gxz,Mh_ Rxz,asa, meta); + computeRicci_ss(sst,Mh_ gyz,Mh_ Ryz,saa, meta); + cudaDeviceSynchronize(); + + compute_rhs_ss_part4<<>>(); + cudaDeviceSynchronize(); + + sub_fdderivs_shc(sst,Mh_ chi,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,sss); + + //cudaDeviceSynchronize(); + //compare_result_gpu(0,Mh_ chi,h_3D_SIZE[0]); + //compare_result_gpu(1,Mh_ chi,h_3D_SIZE[0]); + //compare_result_gpu(2,Mh_ fyz,h_3D_SIZE[0]); + + compute_rhs_ss_part5<<>>(); + cudaDeviceSynchronize(); + + sub_fdderivs_shc(sst,Mh_ Lap,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,sss); + + compute_rhs_ss_part6<<>>(); + cudaDeviceSynchronize(); + +#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5) + sub_fderivs_shc(sst,Mh_ chi,Mh_ fh, Mh_ dtSfx_rhs, Mh_ dtSfy_rhs, Mh_ dtSfz_rhs,sss); + compute_rhs_bssn_ss_part6_gauge<<>>(); +#endif + //sub_lopsided_ss(int& sst,double *src,double* dst,double *SOA) + sub_lopsided_ss(sst,Mh_ gxx,Mh_ gxx_rhs,sss); + sub_lopsided_ss(sst,Mh_ gxy,Mh_ gxy_rhs,aas); + sub_lopsided_ss(sst,Mh_ gxz,Mh_ gxz_rhs,asa); + sub_lopsided_ss(sst,Mh_ gyy,Mh_ gyy_rhs,sss); + sub_lopsided_ss(sst,Mh_ gyz,Mh_ gyz_rhs,saa); + sub_lopsided_ss(sst,Mh_ gzz,Mh_ gzz_rhs,sss); + sub_lopsided_ss(sst,Mh_ Axx,Mh_ Axx_rhs,sss); + sub_lopsided_ss(sst,Mh_ Axy,Mh_ Axy_rhs,aas); + sub_lopsided_ss(sst,Mh_ Axz,Mh_ Axz_rhs,asa); + sub_lopsided_ss(sst,Mh_ Ayy,Mh_ Ayy_rhs,sss); + sub_lopsided_ss(sst,Mh_ Ayz,Mh_ Ayz_rhs,saa); + sub_lopsided_ss(sst,Mh_ Azz,Mh_ Azz_rhs,sss); + sub_lopsided_ss(sst,Mh_ chi,Mh_ chi_rhs,sss); + sub_lopsided_ss(sst,Mh_ trK,Mh_ trK_rhs,sss); + sub_lopsided_ss(sst,Mh_ Gamx,Mh_ Gamx_rhs,ass); + sub_lopsided_ss(sst,Mh_ Gamy,Mh_ Gamy_rhs,sas); + sub_lopsided_ss(sst,Mh_ Gamz,Mh_ Gamz_rhs,ssa); + sub_lopsided_ss(sst,Mh_ Lap,Mh_ Lap_rhs,sss); +#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) + sub_lopsided_ss(sst,Mh_ betax,Mh_ betax_rhs,ass); + sub_lopsided_ss(sst,Mh_ betay,Mh_ betay_rhs,sas); + sub_lopsided_ss(sst,Mh_ betaz,Mh_ betaz_rhs,ssa); +#endif +#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) + sub_lopsided_ss(sst,Mh_ dtSfx,Mh_ dtSfx_rhs,ass); + sub_lopsided_ss(sst,Mh_ dtSfy,Mh_ dtSfy_rhs,sas); + sub_lopsided_ss(sst,Mh_ dtSfz,Mh_ dtSfz_rhs,ssa); +#endif + if(eps > 0){ + sub_kodis_ss(sst,Mh_ chi,Mh_ fh2, Mh_ chi_rhs,sss); + sub_kodis_ss(sst,Mh_ trK,Mh_ fh2, Mh_ trK_rhs,sss); + sub_kodis_ss(sst,Mh_ dxx,Mh_ fh2, Mh_ gxx_rhs,sss); + sub_kodis_ss(sst,Mh_ gxy,Mh_ fh2, Mh_ gxy_rhs,aas); + sub_kodis_ss(sst,Mh_ gxz,Mh_ fh2, Mh_ gxz_rhs,asa); + sub_kodis_ss(sst,Mh_ dyy,Mh_ fh2, Mh_ gyy_rhs,sss); + sub_kodis_ss(sst,Mh_ gyz,Mh_ fh2, Mh_ gyz_rhs,saa); + sub_kodis_ss(sst,Mh_ dzz,Mh_ fh2, Mh_ gzz_rhs,sss); + sub_kodis_ss(sst,Mh_ Axx,Mh_ fh2, Mh_ Axx_rhs,sss); + sub_kodis_ss(sst,Mh_ Axy,Mh_ fh2, Mh_ Axy_rhs,aas); + sub_kodis_ss(sst,Mh_ Axz,Mh_ fh2, Mh_ Axz_rhs,asa); + sub_kodis_ss(sst,Mh_ Ayy,Mh_ fh2, Mh_ Ayy_rhs,sss); + sub_kodis_ss(sst,Mh_ Ayz,Mh_ fh2, Mh_ Ayz_rhs,saa); + sub_kodis_ss(sst,Mh_ Azz,Mh_ fh2, Mh_ Azz_rhs,sss); + sub_kodis_ss(sst,Mh_ Gamx,Mh_ fh2, Mh_ Gamx_rhs,ass); + sub_kodis_ss(sst,Mh_ Gamy,Mh_ fh2, Mh_ Gamy_rhs,sas); + sub_kodis_ss(sst,Mh_ Gamz,Mh_ fh2, Mh_ Gamz_rhs,ssa); + sub_kodis_ss(sst,Mh_ Lap,Mh_ fh2, Mh_ Lap_rhs,sss); + sub_kodis_ss(sst,Mh_ betax,Mh_ fh2, Mh_ betax_rhs,ass); + sub_kodis_ss(sst,Mh_ betay,Mh_ fh2, Mh_ betay_rhs,sas); + sub_kodis_ss(sst,Mh_ betaz,Mh_ fh2, Mh_ betaz_rhs,ssa); +#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) + sub_kodis_ss(sst,Mh_ dtSfx,Mh_ fh2, Mh_ dtSfx_rhs,ass); + sub_kodis_ss(sst,Mh_ dtSfy,Mh_ fh2, Mh_ dtSfy_rhs,sas); + sub_kodis_ss(sst,Mh_ dtSfz,Mh_ fh2, Mh_ dtSfz_rhs,ssa); +#endif + } + if(co == 0){ + compute_rhs_ss_part7<<>>(); + cudaDeviceSynchronize(); + + sub_fderivs_shc(sst,Mh_ Axx,Mh_ fh,Mh_ gxxx,Mh_ gxxy,Mh_ gxxz,sss); + sub_fderivs_shc(sst,Mh_ Axy,Mh_ fh,Mh_ gxyx,Mh_ gxyy,Mh_ gxyz,aas); + sub_fderivs_shc(sst,Mh_ Axz,Mh_ fh,Mh_ gxzx,Mh_ gxzy,Mh_ gxzz,asa); + sub_fderivs_shc(sst,Mh_ Ayy,Mh_ fh,Mh_ gyyx,Mh_ gyyy,Mh_ gyyz,sss); + sub_fderivs_shc(sst,Mh_ Ayz,Mh_ fh,Mh_ gyzx,Mh_ gyzy,Mh_ gyzz,saa); + sub_fderivs_shc(sst,Mh_ Azz,Mh_ fh,Mh_ gzzx,Mh_ gzzy,Mh_ gzzz,sss); + compute_rhs_ss_part8<<>>(); + cudaDeviceSynchronize(); + } + + // Z4C: set TZ_rhs = alpn1 * Hcon / 2 + constraint damping + kern_z4c_post<<>>(matrix_size); + cudaDeviceSynchronize(); + // Z4C: TZ advection (lopsided) and dissipation (kodis) + sub_lopsided_ss(sst,Mh_ TZ,Mh_ TZ_rhs,sss); + if(eps > ZEO) sub_kodis_ss(sst,Mh_ TZ,Mh_ fh2, Mh_ TZ_rhs,sss); + +#if (ABV == 1) + cout<<"TODO: bssn_gpu.cu::2373 (ABV == 1)"<