Integrate CUDA support into RK4 substep execution

This commit is contained in:
2026-04-12 22:11:44 +08:00
parent 86a683de26
commit 4fa12a2009
4 changed files with 1161 additions and 279 deletions

View File

@@ -42,16 +42,16 @@ endif
.for.o:
$(f77) -c $< -o $@
.cu.o:
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
# CUDA rewrite of BSSN RHS (drop-in replacement for bssn_rhs_c + stencil helpers)
bssn_rhs_cuda.o: bssn_rhs_cuda.cu bssn_rhs.h macrodef.h
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
# C rewrite of BSSN RHS kernel and helpers
bssn_rhs_c.o: bssn_rhs_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
.cu.o:
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
# CUDA rewrite of BSSN RHS (drop-in replacement for bssn_rhs_c + stencil helpers)
bssn_rhs_cuda.o: bssn_rhs_cuda.cu bssn_rhs.h macrodef.h
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
# C rewrite of BSSN RHS kernel and helpers
bssn_rhs_c.o: bssn_rhs_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
fderivs_c.o: fderivs_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
@@ -83,46 +83,49 @@ TwoPunctures.o: TwoPunctures.C
TwoPunctureABE.o: TwoPunctureABE.C
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
# Input files
## CUDA BSSN RHS switch
## 1 : use the rewritten CUDA bssn_rhs backend
## 0 : keep the normal CPU/Fortran selection below
USE_CUDA_BSSN ?= 0
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
ifeq ($(USE_CXX_KERNELS),0)
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
CFILES_CPU =
else
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
CFILES_CPU = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
endif
CFILES_CUDA_BSSN = bssn_rhs_cuda.o
ifeq ($(USE_CUDA_BSSN),1)
CFILES = $(CFILES_CUDA_BSSN)
else
CFILES = $(CFILES_CPU)
endif
## RK4 kernel switch (independent from USE_CXX_KERNELS)
ifeq ($(USE_CXX_RK4),1)
RK4_C_OBJ = rungekutta4_rout_c.o
RK4_F90_OBJ =
else
RK4_C_OBJ =
RK4_F90_OBJ = rungekutta4_rout.o
endif
CFILES += $(RK4_C_OBJ)
ABE_CUDA_CFILES = $(CFILES_CUDA_BSSN) $(RK4_C_OBJ)
ABE_LDLIBS = $(LDLIBS)
ifeq ($(USE_CUDA_BSSN),1)
ABE_LDLIBS += -lcudart $(CUDA_LIB_PATH)
endif
# Input files
## CUDA BSSN RHS switch
## 1 : use the rewritten CUDA bssn_rhs backend
## 0 : keep the normal CPU/Fortran selection below
USE_CUDA_BSSN ?= 1
CXXAPPFLAGS += -DUSE_CUDA_BSSN=$(USE_CUDA_BSSN)
CUDA_APP_FLAGS += -DUSE_CUDA_BSSN=$(USE_CUDA_BSSN)
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
ifeq ($(USE_CXX_KERNELS),0)
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
CFILES_CPU =
else
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
CFILES_CPU = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
endif
CFILES_CUDA_BSSN = bssn_rhs_cuda.o
ifeq ($(USE_CUDA_BSSN),1)
CFILES = $(CFILES_CUDA_BSSN)
else
CFILES = $(CFILES_CPU)
endif
## RK4 kernel switch (independent from USE_CXX_KERNELS)
ifeq ($(USE_CXX_RK4),1)
RK4_C_OBJ = rungekutta4_rout_c.o
RK4_F90_OBJ =
else
RK4_C_OBJ =
RK4_F90_OBJ = rungekutta4_rout.o
endif
CFILES += $(RK4_C_OBJ)
ABE_CUDA_CFILES = $(CFILES_CUDA_BSSN) $(RK4_C_OBJ)
ABE_LDLIBS = $(LDLIBS)
ifeq ($(USE_CUDA_BSSN),1)
ABE_LDLIBS += -lcudart $(CUDA_LIB_PATH)
endif
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
@@ -171,8 +174,8 @@ TwoPunctureFILES = TwoPunctureABE.o TwoPunctures.o
#CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o
# file dependences
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
# file dependences
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
$(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
@@ -195,7 +198,7 @@ $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
$(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
TwoPunctureFILES: TwoPunctures.h
@@ -203,18 +206,18 @@ $(CUDAFILES): bssn_gpu.h gpu_mem.h gpu_rhsSS_mem.h
misc.o : zbesh.o
# projects
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(ABE_LDLIBS)
ABE_CUDA: $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) -lcudart $(CUDA_LIB_PATH)
#ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
# $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
# projects
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(ABE_LDLIBS)
ABE_CUDA: $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) -lcudart $(CUDA_LIB_PATH)
#ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
# $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
TwoPunctureABE: $(TwoPunctureFILES)
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
clean:
rm *.o ABE ABE_CUDA ABEGPU TwoPunctureABE make.log -f
clean:
rm *.o ABE ABE_CUDA ABEGPU TwoPunctureABE make.log -f