Integrate CUDA support into RK4 substep execution
This commit is contained in:
@@ -42,16 +42,16 @@ endif
|
||||
.for.o:
|
||||
$(f77) -c $< -o $@
|
||||
|
||||
.cu.o:
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# CUDA rewrite of BSSN RHS (drop-in replacement for bssn_rhs_c + stencil helpers)
|
||||
bssn_rhs_cuda.o: bssn_rhs_cuda.cu bssn_rhs.h macrodef.h
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# C rewrite of BSSN RHS kernel and helpers
|
||||
bssn_rhs_c.o: bssn_rhs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
.cu.o:
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# CUDA rewrite of BSSN RHS (drop-in replacement for bssn_rhs_c + stencil helpers)
|
||||
bssn_rhs_cuda.o: bssn_rhs_cuda.cu bssn_rhs.h macrodef.h
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# C rewrite of BSSN RHS kernel and helpers
|
||||
bssn_rhs_c.o: bssn_rhs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
fderivs_c.o: fderivs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
@@ -83,46 +83,49 @@ TwoPunctures.o: TwoPunctures.C
|
||||
TwoPunctureABE.o: TwoPunctureABE.C
|
||||
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
||||
|
||||
# Input files
|
||||
|
||||
## CUDA BSSN RHS switch
|
||||
## 1 : use the rewritten CUDA bssn_rhs backend
|
||||
## 0 : keep the normal CPU/Fortran selection below
|
||||
USE_CUDA_BSSN ?= 0
|
||||
|
||||
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
|
||||
ifeq ($(USE_CXX_KERNELS),0)
|
||||
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
|
||||
CFILES_CPU =
|
||||
else
|
||||
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
|
||||
CFILES_CPU = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
|
||||
endif
|
||||
|
||||
CFILES_CUDA_BSSN = bssn_rhs_cuda.o
|
||||
|
||||
ifeq ($(USE_CUDA_BSSN),1)
|
||||
CFILES = $(CFILES_CUDA_BSSN)
|
||||
else
|
||||
CFILES = $(CFILES_CPU)
|
||||
endif
|
||||
|
||||
## RK4 kernel switch (independent from USE_CXX_KERNELS)
|
||||
ifeq ($(USE_CXX_RK4),1)
|
||||
RK4_C_OBJ = rungekutta4_rout_c.o
|
||||
RK4_F90_OBJ =
|
||||
else
|
||||
RK4_C_OBJ =
|
||||
RK4_F90_OBJ = rungekutta4_rout.o
|
||||
endif
|
||||
|
||||
CFILES += $(RK4_C_OBJ)
|
||||
ABE_CUDA_CFILES = $(CFILES_CUDA_BSSN) $(RK4_C_OBJ)
|
||||
|
||||
ABE_LDLIBS = $(LDLIBS)
|
||||
ifeq ($(USE_CUDA_BSSN),1)
|
||||
ABE_LDLIBS += -lcudart $(CUDA_LIB_PATH)
|
||||
endif
|
||||
# Input files
|
||||
|
||||
## CUDA BSSN RHS switch
|
||||
## 1 : use the rewritten CUDA bssn_rhs backend
|
||||
## 0 : keep the normal CPU/Fortran selection below
|
||||
USE_CUDA_BSSN ?= 1
|
||||
|
||||
CXXAPPFLAGS += -DUSE_CUDA_BSSN=$(USE_CUDA_BSSN)
|
||||
CUDA_APP_FLAGS += -DUSE_CUDA_BSSN=$(USE_CUDA_BSSN)
|
||||
|
||||
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
|
||||
ifeq ($(USE_CXX_KERNELS),0)
|
||||
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
|
||||
CFILES_CPU =
|
||||
else
|
||||
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
|
||||
CFILES_CPU = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
|
||||
endif
|
||||
|
||||
CFILES_CUDA_BSSN = bssn_rhs_cuda.o
|
||||
|
||||
ifeq ($(USE_CUDA_BSSN),1)
|
||||
CFILES = $(CFILES_CUDA_BSSN)
|
||||
else
|
||||
CFILES = $(CFILES_CPU)
|
||||
endif
|
||||
|
||||
## RK4 kernel switch (independent from USE_CXX_KERNELS)
|
||||
ifeq ($(USE_CXX_RK4),1)
|
||||
RK4_C_OBJ = rungekutta4_rout_c.o
|
||||
RK4_F90_OBJ =
|
||||
else
|
||||
RK4_C_OBJ =
|
||||
RK4_F90_OBJ = rungekutta4_rout.o
|
||||
endif
|
||||
|
||||
CFILES += $(RK4_C_OBJ)
|
||||
ABE_CUDA_CFILES = $(CFILES_CUDA_BSSN) $(RK4_C_OBJ)
|
||||
|
||||
ABE_LDLIBS = $(LDLIBS)
|
||||
ifeq ($(USE_CUDA_BSSN),1)
|
||||
ABE_LDLIBS += -lcudart $(CUDA_LIB_PATH)
|
||||
endif
|
||||
|
||||
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
|
||||
@@ -171,8 +174,8 @@ TwoPunctureFILES = TwoPunctureABE.o TwoPunctures.o
|
||||
|
||||
#CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o
|
||||
|
||||
# file dependences
|
||||
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
|
||||
# file dependences
|
||||
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
|
||||
|
||||
$(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
||||
@@ -195,7 +198,7 @@ $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
|
||||
$(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h
|
||||
|
||||
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
|
||||
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
|
||||
|
||||
TwoPunctureFILES: TwoPunctures.h
|
||||
|
||||
@@ -203,18 +206,18 @@ $(CUDAFILES): bssn_gpu.h gpu_mem.h gpu_rhsSS_mem.h
|
||||
|
||||
misc.o : zbesh.o
|
||||
|
||||
# projects
|
||||
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(ABE_LDLIBS)
|
||||
|
||||
ABE_CUDA: $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) -lcudart $(CUDA_LIB_PATH)
|
||||
|
||||
#ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
||||
# $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||
# projects
|
||||
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(ABE_LDLIBS)
|
||||
|
||||
ABE_CUDA: $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) -lcudart $(CUDA_LIB_PATH)
|
||||
|
||||
#ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
||||
# $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||
|
||||
TwoPunctureABE: $(TwoPunctureFILES)
|
||||
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
||||
|
||||
clean:
|
||||
rm *.o ABE ABE_CUDA ABEGPU TwoPunctureABE make.log -f
|
||||
clean:
|
||||
rm *.o ABE ABE_CUDA ABEGPU TwoPunctureABE make.log -f
|
||||
|
||||
Reference in New Issue
Block a user