Add Z4C Shell-Patch GPU acceleration (Phase 3 complete)
Create z4c_gpu_rhs_ss.cu (reusing BSSN shell FD/chain-rule kernels): - Uploads trKd = trK + 2*TZ to GPU so existing BSSN algebraic kernels compute correct Z4C physical equations without modification - New kern_z4c_post applies TZ_rhs = alpn1 * Hcon / 2, kappa1/kappa2 constraint damping, TZ advection (lopsided), and dissipation (kodis) - Adds TZ/TZ_rhs to Meta struct, alloc/upload/download/free lifecycle Add cuda_compute_rhs_z4c_ss() wrapper in Z4c_class.C matching the Fortran f_compute_rhs_Z4c_ss signature, with #define redirection for Step/SHStep call sites and #undef before analysis functions. Add z4c_gpu_rhs_ss.o to ABE_CUDA_CFILES and build rule in makefile. Add kappa1_c/kappa2_c constants to gpu_rhsSS_mem.h. Build verified with USE_CUDA_Z4C=1 + WithShell — compiles and links cleanly. All three Shell GPU files now coexist: bssn_gpu_rhs_ss.o (BSSN), z4c_gpu_rhs_ss.o (Z4C), both sharing FD/chain-rule kernels. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -38,6 +38,9 @@ using namespace std;
|
|||||||
#endif
|
#endif
|
||||||
#if USE_CUDA_BSSN
|
#if USE_CUDA_BSSN
|
||||||
#include "bssn_rhs_cuda.h"
|
#include "bssn_rhs_cuda.h"
|
||||||
|
#ifdef WithShell
|
||||||
|
#include "bssn_gpu.h"
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef With_AHF
|
#ifdef With_AHF
|
||||||
@@ -49,6 +52,81 @@ using namespace std;
|
|||||||
|
|
||||||
// Define Z4c_class
|
// Define Z4c_class
|
||||||
|
|
||||||
|
#if USE_CUDA_Z4C && (ABEtype == 2) && defined(WithShell)
|
||||||
|
// GPU-accelerated Z4C shell RHS: same parameter signature as f_compute_rhs_Z4c_ss.
|
||||||
|
// Internally calls gpu_rhs_z4c_ss which modifies trK→trKd before upload,
|
||||||
|
// runs BSSN algebraic kernels, then applies Z4C post-processing (TZ_rhs, damping).
|
||||||
|
extern "C" {
|
||||||
|
static int cuda_compute_rhs_z4c_ss(
|
||||||
|
int *ex, double &T, double *crho, double *sigma, double *R,
|
||||||
|
double *X, double *Y, double *Z,
|
||||||
|
double *drhodx, double *drhody, double *drhodz,
|
||||||
|
double *dsigmadx, double *dsigmady, double *dsigmadz,
|
||||||
|
double *dRdx, double *dRdy, double *dRdz,
|
||||||
|
double *drhodxx, double *drhodxy, double *drhodxz, double *drhodyy, double *drhodyz, double *drhodzz,
|
||||||
|
double *dsigmadxx, double *dsigmadxy, double *dsigmadxz, double *dsigmadyy, double *dsigmadyz, double *dsigmadzz,
|
||||||
|
double *dRdxx, double *dRdxy, double *dRdxz, double *dRdyy, double *dRdyz, double *dRdzz,
|
||||||
|
double *chi, double *trK,
|
||||||
|
double *gxx, double *gxy, double *gxz, double *gyy, double *gyz, double *gzz,
|
||||||
|
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||||
|
double *Gamx, double *Gamy, double *Gamz,
|
||||||
|
double *Lap, double *betax, double *betay, double *betaz,
|
||||||
|
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||||
|
double *TZ,
|
||||||
|
double *chi_rhs, double *trK_rhs,
|
||||||
|
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||||
|
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||||
|
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||||
|
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||||
|
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||||
|
double *TZ_rhs,
|
||||||
|
double *rho_mat, double *Sx, double *Sy, double *Sz,
|
||||||
|
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||||
|
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||||
|
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||||
|
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||||
|
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||||
|
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
|
||||||
|
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
|
||||||
|
int &Symmetry, int &Lev, double &eps, int &sst, int &co)
|
||||||
|
{
|
||||||
|
return gpu_rhs_z4c_ss(0, 0, // calledby=ABE_main, mpi_rank=device_0
|
||||||
|
ex, T, crho, sigma, R, X, Y, Z,
|
||||||
|
drhodx, drhody, drhodz,
|
||||||
|
dsigmadx, dsigmady, dsigmadz,
|
||||||
|
dRdx, dRdy, dRdz,
|
||||||
|
drhodxx, drhodxy, drhodxz, drhodyy, drhodyz, drhodzz,
|
||||||
|
dsigmadxx, dsigmadxy, dsigmadxz, dsigmadyy, dsigmadyz, dsigmadzz,
|
||||||
|
dRdxx, dRdxy, dRdxz, dRdyy, dRdyz, dRdzz,
|
||||||
|
chi, trK,
|
||||||
|
gxx, gxy, gxz, gyy, gyz, gzz,
|
||||||
|
Axx, Axy, Axz, Ayy, Ayz, Azz,
|
||||||
|
Gamx, Gamy, Gamz,
|
||||||
|
Lap, betax, betay, betaz,
|
||||||
|
dtSfx, dtSfy, dtSfz,
|
||||||
|
TZ,
|
||||||
|
chi_rhs, trK_rhs,
|
||||||
|
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
|
||||||
|
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
|
||||||
|
Gamx_rhs, Gamy_rhs, Gamz_rhs,
|
||||||
|
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
|
||||||
|
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
|
||||||
|
TZ_rhs,
|
||||||
|
rho_mat, Sx, Sy, Sz,
|
||||||
|
Sxx, Sxy, Sxz, Syy, Syz, Szz,
|
||||||
|
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
|
||||||
|
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
|
||||||
|
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
|
||||||
|
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
|
||||||
|
ham_Res, movx_Res, movy_Res, movz_Res,
|
||||||
|
Gmx_Res, Gmy_Res, Gmz_Res,
|
||||||
|
Symmetry, Lev, eps, sst, co);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Redirect all Z4C shell RHS calls in Step/SHStep to GPU
|
||||||
|
#define f_compute_rhs_Z4c_ss cuda_compute_rhs_z4c_ss
|
||||||
|
#endif
|
||||||
|
|
||||||
// This class inherits some members and methods from the parent `bssn_class` and modifies others.
|
// This class inherits some members and methods from the parent `bssn_class` and modifies others.
|
||||||
// The modified members and methods are defined below (and in the header Z4c_class.h).
|
// The modified members and methods are defined below (and in the header Z4c_class.h).
|
||||||
// The remaining members/methods are inherited from `bssn_class` (declared in bssn_class.h).
|
// The remaining members/methods are inherited from `bssn_class` (declared in bssn_class.h).
|
||||||
@@ -3005,6 +3083,11 @@ void Z4c_class::Check_extrop()
|
|||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
|
|
||||||
|
#if USE_CUDA_Z4C && (ABEtype == 2) && defined(WithShell)
|
||||||
|
#undef f_compute_rhs_Z4c_ss
|
||||||
|
#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
|
||||||
|
#endif
|
||||||
|
|
||||||
// this member function is used to compute and output constraint violation
|
// this member function is used to compute and output constraint violation
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
|
|||||||
@@ -49,4 +49,8 @@ int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,
|
|||||||
|
|
||||||
int gpu_rhs_ss(RHS_SS_PARA);
|
int gpu_rhs_ss(RHS_SS_PARA);
|
||||||
|
|
||||||
|
#define Z4C_SS_PARA int calledby, int mpi_rank, int *ex, double &T, double *crho, double *sigma, double *R, double *X, double *Y, double *Z, double *drhodx, double *drhody, double *drhodz, double *dsigmadx, double *dsigmady, double *dsigmadz, double *dRdx, double *dRdy, double *dRdz, double *drhodxx, double *drhodxy, double *drhodxz, double *drhodyy, double *drhodyz, double *drhodzz, double *dsigmadxx, double *dsigmadxy, double *dsigmadxz, double *dsigmadyy, double *dsigmadyz, double *dsigmadzz, double *dRdxx, double *dRdxy, double *dRdxz, double *dRdyy, double *dRdyz, double *dRdzz, double *chi, double *trK, double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, double *Gamx, double *Gamy, double *Gamz, double *Lap, double *betax, double *betay, double *betaz, double *dtSfx, double *dtSfy, double *dtSfz, double *TZ, double *chi_rhs, double *trK_rhs, double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, double *TZ_rhs, double *rho, double *Sx, double *Sy, double *Sz, double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, int &Symmetry, int &Lev, double &eps, int &sst, int &co
|
||||||
|
|
||||||
|
int gpu_rhs_z4c_ss(Z4C_SS_PARA);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -48,6 +48,8 @@ struct Meta
|
|||||||
double * Gamx_rhs,*Gamy_rhs,*Gamz_rhs;//out
|
double * Gamx_rhs,*Gamy_rhs,*Gamz_rhs;//out
|
||||||
double * Lap_rhs, *betax_rhs, *betay_rhs, *betaz_rhs;//out
|
double * Lap_rhs, *betax_rhs, *betay_rhs, *betaz_rhs;//out
|
||||||
double * dtSfx_rhs,*dtSfy_rhs,*dtSfz_rhs;//out
|
double * dtSfx_rhs,*dtSfy_rhs,*dtSfz_rhs;//out
|
||||||
|
double * TZ; //in (Z4C)
|
||||||
|
double * TZ_rhs; //out (Z4C)
|
||||||
double * rho,*Sx,*Sy,*Sz ; //in
|
double * rho,*Sx,*Sy,*Sz ; //in
|
||||||
double * Sxx,*Sxy,*Sxz,*Syy,*Syz,*Szz; //in
|
double * Sxx,*Sxy,*Sxz,*Syy,*Syz,*Szz; //in
|
||||||
|
|
||||||
@@ -132,6 +134,8 @@ __constant__ double SYM = 1.0;
|
|||||||
__constant__ double ANTI = -1.0;
|
__constant__ double ANTI = -1.0;
|
||||||
__constant__ double FF = 0.75;
|
__constant__ double FF = 0.75;
|
||||||
__constant__ double eta = 2.0;
|
__constant__ double eta = 2.0;
|
||||||
|
__constant__ double kappa1_c = 0.02;
|
||||||
|
__constant__ double kappa2_c = 0.0;
|
||||||
__constant__ double F1o3;
|
__constant__ double F1o3;
|
||||||
__constant__ double F2o3;
|
__constant__ double F2o3;
|
||||||
__constant__ double F3o2 = 1.5;
|
__constant__ double F3o2 = 1.5;
|
||||||
|
|||||||
@@ -74,6 +74,10 @@ bssn_rhs_cuda.o: bssn_rhs_cuda.cu bssn_rhs.h macrodef.h fd_cuda_helpers.cuh
|
|||||||
bssn_gpu_rhs_ss.o: bssn_gpu_rhs_ss.cu bssn_gpu.h gpu_rhsSS_mem.h bssn_macro.h macrodef.fh
|
bssn_gpu_rhs_ss.o: bssn_gpu_rhs_ss.cu bssn_gpu.h gpu_rhsSS_mem.h bssn_macro.h macrodef.fh
|
||||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||||
|
|
||||||
|
# CUDA rewrite of Z4C Shell-Patch RHS (extends BSSN shells with TZ + constraint damping)
|
||||||
|
z4c_gpu_rhs_ss.o: z4c_gpu_rhs_ss.cu bssn_gpu.h gpu_rhsSS_mem.h bssn_macro.h macrodef.fh
|
||||||
|
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||||
|
|
||||||
# CUDA rewrite of Z4C Cartesian RHS
|
# CUDA rewrite of Z4C Cartesian RHS
|
||||||
z4c_rhs_cuda.o: z4c_rhs_cuda.cu z4c_rhs_cuda.h bssn_rhs.h macrodef.h ricci_gamma.h fd_cuda_helpers.cuh
|
z4c_rhs_cuda.o: z4c_rhs_cuda.cu z4c_rhs_cuda.h bssn_rhs.h macrodef.h ricci_gamma.h fd_cuda_helpers.cuh
|
||||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||||
@@ -159,7 +163,7 @@ RK4_F90_OBJ = rungekutta4_rout.o
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
CFILES += $(RK4_C_OBJ)
|
CFILES += $(RK4_C_OBJ)
|
||||||
ABE_CUDA_CFILES = $(CFILES_CUDA_BSSN) z4c_rhs_cuda.o $(RK4_C_OBJ)
|
ABE_CUDA_CFILES = $(CFILES_CUDA_BSSN) z4c_rhs_cuda.o z4c_gpu_rhs_ss.o $(RK4_C_OBJ)
|
||||||
|
|
||||||
ABE_LDLIBS = $(LDLIBS)
|
ABE_LDLIBS = $(LDLIBS)
|
||||||
ifeq ($(USE_CUDA_BSSN),1)
|
ifeq ($(USE_CUDA_BSSN),1)
|
||||||
|
|||||||
2565
AMSS_NCKU_source/z4c_gpu_rhs_ss.cu
Normal file
2565
AMSS_NCKU_source/z4c_gpu_rhs_ss.cu
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user