Add C implementations of shell-patch derivative kernels (WithShell support)
New files provide C equivalents of Fortran diff_new_sh.f90 and kodiss_sh.f90: - fderivs_sh_c.C: first derivatives in shell (rho, sigma, R) coords - fdderivs_sh_c.C: second derivatives in shell coords - fderivs_shc_c.C: shell first derivs + chain rule to Cartesian - fdderivs_shc_c.C: shell second derivs + chain rule to Cartesian - kodiss_sh_c.C: Kreiss-Oliger dissipation on shell patches Also add symmetry_stbd() C implementation and shell fh indexing to share_func.h. Controlled by USE_CXX_SHELL_KERNELS switch (default 0, experimental). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -40,11 +40,12 @@ POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
|
||||
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE)
|
||||
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL)
|
||||
|
||||
|
||||
## GCC build flags (optimized for x86-64-v4)
|
||||
## PGO disabled (used negative optimization on Intel; not tested on GCC)
|
||||
CXXAPPFLAGS = -O3 -march=x86-64-v4 -ffast-math -mfma -flto \
|
||||
-Dfortran3 -Dnewc $(INTERP_LB_FLAGS) \
|
||||
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
|
||||
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG)
|
||||
f90appflags = -O3 -march=x86-64-v4 -ffast-math -mfma -flto \
|
||||
-cpp $(POLINT6_FLAG)
|
||||
|
||||
@@ -54,11 +55,11 @@ f90appflags = -O3 -march=x86-64-v4 -ffast-math -mfma -flto \
|
||||
$(f90) $(f90appflags) -c $< -o $@
|
||||
|
||||
.C.o:
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
# ShellPatch.C uses OpenMP for setupintintstuff search loops
|
||||
ShellPatch.o: ShellPatch.C
|
||||
$(CXX) $(CXXAPPFLAGS) $(OMP_FLAG) -c $< $(filein) -o $@
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
${CXX} $(CXXAPPFLAGS) $(OMP_FLAG) -c $< $(filein) -o $@
|
||||
|
||||
.for.o:
|
||||
$(f77) -c $< -o $@
|
||||
@@ -85,21 +86,39 @@ lopsided_c.o: lopsided_c.C
|
||||
lopsided_kodis_c.o: lopsided_kodis_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
# C rewrite of shell-patch derivative kernels
|
||||
fderivs_sh_c.o: fderivs_sh_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
fdderivs_sh_c.o: fdderivs_sh_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
fderivs_shc_c.o: fderivs_shc_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
fdderivs_shc_c.o: fdderivs_shc_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
kodiss_sh_c.o: kodiss_sh_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
z4c_rhs_c.o: z4c_rhs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
|
||||
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
## TwoPunctureABE flags (no PGO; PGO was negative optimization)
|
||||
TP_OPTFLAGS = -O3 -march=x86-64-v4 -ffast-math -mfma -flto \
|
||||
-Dfortran3 -Dnewc
|
||||
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
||||
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
||||
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-fprofile-instr-use=$(TP_PROFDATA) \
|
||||
-Dfortran3 -Dnewc -I$(OMPI_ROOT)/include
|
||||
|
||||
TwoPunctures.o: TwoPunctures.C
|
||||
${CXX} $(TP_OPTFLAGS) -fopenmp -c $< -o $@
|
||||
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
||||
|
||||
TwoPunctureABE.o: TwoPunctureABE.C
|
||||
${CXX} $(TP_OPTFLAGS) -fopenmp -c $< -o $@
|
||||
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
||||
|
||||
# Input files
|
||||
|
||||
@@ -130,6 +149,17 @@ else
|
||||
RK4_F90_OBJ = rungekutta4_rout.o
|
||||
endif
|
||||
|
||||
## Shell-patch derivative kernel switch (independent from USE_CXX_KERNELS)
|
||||
## 1 : use C++ rewrite of shell derivative functions (experimental)
|
||||
## 0 : use original Fortran diff_new_sh.o and kodiss_sh.o (default)
|
||||
USE_CXX_SHELL_KERNELS ?= 0
|
||||
ifeq ($(USE_CXX_SHELL_KERNELS),1)
|
||||
CFILES += fderivs_sh_c.o fdderivs_sh_c.o fderivs_shc_c.o fdderivs_shc_c.o kodiss_sh_c.o
|
||||
SH_F90_OBJ =
|
||||
else
|
||||
SH_F90_OBJ = diff_new_sh.o kodiss_sh.o point_diff_new_sh.o
|
||||
endif
|
||||
|
||||
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
|
||||
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
|
||||
@@ -147,11 +177,11 @@ C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o
|
||||
|
||||
F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
||||
prolongrestrict_cell.o prolongrestrict_vertex.o\
|
||||
$(RK4_F90_OBJ) diff_new.o kodiss.o kodiss_sh.o\
|
||||
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
|
||||
$(RK4_F90_OBJ) diff_new.o kodiss.o\
|
||||
lopsidediff.o sommerfeld_rout.o getnp4.o $(SH_F90_OBJ)\
|
||||
shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\
|
||||
getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\
|
||||
fadmquantites_bssn.o $(Z4C_F90_OBJ) Z4c_rhs_ss.o point_diff_new_sh.o\
|
||||
fadmquantites_bssn.o $(Z4C_F90_OBJ) Z4c_rhs_ss.o\
|
||||
cpbc.o getnp4old.o NullEvol.o initial_null.o initial_maxwell.o\
|
||||
getnpem2.o empart.o NullNews.o fourdcurvature.o\
|
||||
bssn2adm.o adm_constraint.o adm_ricci_gamma.o\
|
||||
@@ -217,7 +247,7 @@ ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILE
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||
|
||||
TwoPunctureABE: $(TwoPunctureFILES)
|
||||
$(CLINKER) $(TP_OPTFLAGS) -fopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
||||
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
||||
|
||||
clean:
|
||||
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
||||
|
||||
Reference in New Issue
Block a user