244 lines
8.7 KiB
Makefile
244 lines
8.7 KiB
Makefile
|
|
|
|
include makefile.inc
|
|
|
|
## polint(ordn=6) kernel selector:
|
|
## 1 (default): barycentric fast path
|
|
## 0 : fallback to Neville path
|
|
POLINT6_USE_BARY ?= 1
|
|
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
|
|
|
|
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt)
|
|
## make -> opt (PGO-guided, maximum performance)
|
|
## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data)
|
|
PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
|
|
|
|
ifeq ($(PGO_MODE),instrument)
|
|
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
|
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
|
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
|
-align array64byte -fpp -I${MKLROOT}/include $(SRC_INC_FLAGS) $(POLINT6_FLAG)
|
|
else
|
|
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
|
|
## PGO has been turned off, now tested and found to be negative optimization
|
|
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
|
|
|
|
|
|
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
|
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
|
-align array64byte -fpp -I${MKLROOT}/include $(SRC_INC_FLAGS) $(POLINT6_FLAG)
|
|
endif
|
|
|
|
.SUFFIXES: .o .f90 .C .for .cu
|
|
|
|
.f90.o:
|
|
$(f90) $(f90appflags) -c $< -o $@
|
|
|
|
.C.o:
|
|
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
|
|
|
.for.o:
|
|
$(f77) -c $< -o $@
|
|
|
|
.cu.o:
|
|
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
|
|
|
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
|
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
|
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
|
-fprofile-instr-use=$(TP_PROFDATA) \
|
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(SRC_INC_FLAGS)
|
|
|
|
./Two_Puncture/TwoPunctures.o: ./Two_Puncture/TwoPunctures.C
|
|
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
|
|
|
./Two_Puncture/TwoPunctureABE.o: ./Two_Puncture/TwoPunctureABE.C
|
|
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
|
|
|
# Input files
|
|
|
|
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
|
|
ifeq ($(USE_CXX_KERNELS),0)
|
|
CFILES =
|
|
else
|
|
CFILES = ./BSSN/bssn_rhs_c.o \
|
|
./Derivative/fderivs_c.o \
|
|
./Derivative/fdderivs_c.o \
|
|
./KO_dissipation/kodiss_c.o \
|
|
./BSSN/lopsided_c.o \
|
|
./BSSN/lopsided_kodis_c.o
|
|
endif
|
|
|
|
## RK4 kernel switch (independent from USE_CXX_KERNELS)
|
|
ifeq ($(USE_CXX_RK4),1)
|
|
CFILES += ./Runge_Kutta/rungekutta4_rout_c.o
|
|
RK4_F90_OBJ =
|
|
else
|
|
RK4_F90_OBJ = ./Runge_Kutta/rungekutta4_rout.o
|
|
endif
|
|
|
|
C++FILES = ABE.o \
|
|
./Initial_Data_Solver/Ansorg.o \
|
|
./cgh/Block.o \
|
|
./misc/misc.o \
|
|
./Monitor/monitor.o \
|
|
./Parallel/Parallel.o \
|
|
./Patch/MPatch.o \
|
|
./Variable/var.o \
|
|
./cgh/cgh.o \
|
|
./BSSN/bssn_class.o \
|
|
./Surface_Integral/surface_integral.o \
|
|
./Shell_Patch/ShellPatch.o \
|
|
./Scalar/bssnEScalar_class.o \
|
|
./System_Performance/perf.o \
|
|
./Z4C/Z4c_class.o \
|
|
./Null_Evolve/NullShellPatch.o \
|
|
./BSSN/bssnEM_class.o \
|
|
./Z4C/cpbc_util.o \
|
|
./Z4C/z4c_rhs_point.o \
|
|
./Check_Point/checkpoint.o \
|
|
./Parallel/Parallel_bam.o \
|
|
./Scalar/scalar_class.o \
|
|
./BSSN/transpbh.o \
|
|
./Null_Evolve/NullShellPatch2.o \
|
|
./Null_Evolve/NullShellPatch2_Evo.o \
|
|
./Read_and_Write/writefile_f.o \
|
|
interp_lb_profile.o
|
|
|
|
C++FILES_GPU = ABE.o \
|
|
./Initial_Data_Solver/Ansorg.o \
|
|
./cgh/Block.o \
|
|
./misc/misc.o \
|
|
./Monitor/monitor.o \
|
|
./Parallel/Parallel.o \
|
|
./Patch/MPatch.o \
|
|
./Variable/var.o \
|
|
./cgh/cgh.o \
|
|
./Surface_Integral/surface_integral.o \
|
|
./Shell_Patch/ShellPatch.o \
|
|
./Scalar/bssnEScalar_class.o \
|
|
./System_Performance/perf.o \
|
|
./Z4C/Z4c_class.o \
|
|
./Null_Evolve/NullShellPatch.o \
|
|
./BSSN/bssnEM_class.o \
|
|
./Z4C/cpbc_util.o \
|
|
./Z4C/z4c_rhs_point.o \
|
|
./Check_Point/checkpoint.o \
|
|
./Parallel/Parallel_bam.o \
|
|
./Scalar/scalar_class.o \
|
|
./BSSN/transpbh.o \
|
|
./Null_Evolve/NullShellPatch2.o \
|
|
./Null_Evolve/NullShellPatch2_Evo.o \
|
|
./BSSN_GPU/bssn_gpu_class.o \
|
|
./BSSN_GPU/bssn_step_gpu.o \
|
|
./BSSN_GPU/bssn_macro.o \
|
|
./Read_and_Write/writefile_f.o
|
|
|
|
F90FILES_BASE = ./BSSN/enforce_algebra.o \
|
|
./misc/fmisc.o \
|
|
./Initial_Data_Solver/initial_puncture.o \
|
|
./BSSN/prolongrestrict.o \
|
|
./BSSN/prolongrestrict_cell.o \
|
|
./BSSN/prolongrestrict_vertex.o \
|
|
$(RK4_F90_OBJ) \
|
|
./Derivative/diff_new.o \
|
|
./KO_dissipation/kodiss.o \
|
|
./KO_dissipation/kodiss_sh.o \
|
|
./BSSN/lopsidediff.o \
|
|
./BSSN/sommerfeld_rout.o \
|
|
./Psi4/getnp4.o \
|
|
./Derivative/diff_new_sh.o \
|
|
./Shell_Patch/shellfunctions.o \
|
|
./BSSN/bssn_rhs_ss.o \
|
|
./Scalar/Set_Rho_ADM.o \
|
|
./Psi4/getnp4EScalar.o \
|
|
./Scalar/bssnEScalar_rhs.o \
|
|
./BSSN/bssn_constraint.o \
|
|
./Psi4/ricci_gamma.o \
|
|
./BSSN/fadmquantites_bssn.o \
|
|
./Z4C/Z4c_rhs.o \
|
|
./Z4C/Z4c_rhs_ss.o \
|
|
./Derivative/point_diff_new_sh.o \
|
|
./Z4C/cpbc.o \
|
|
./Psi4/getnp4old.o \
|
|
./Null_Evolve/NullEvol.o \
|
|
./Initial_Data_Solver/initial_null.o \
|
|
./Initial_Data_Solver/initial_maxwell.o \
|
|
./Psi4/getnpem2.o \
|
|
./BSSN/empart.o \
|
|
./Null_Evolve/NullNews.o \
|
|
./BSSN/fourdcurvature.o \
|
|
./BSSN/bssn2adm.o \
|
|
./BSSN/adm_constraint.o \
|
|
./Psi4/adm_ricci_gamma.o \
|
|
./Scalar/scalar_rhs.o \
|
|
./Initial_Data_Solver/initial_scalar.o \
|
|
./Null_Evolve/NullEvol2.o \
|
|
./Initial_Data_Solver/initial_null2.o \
|
|
./Null_Evolve/NullNews2.o \
|
|
./Read_and_Write/tool_f.o
|
|
|
|
ifeq ($(USE_CXX_KERNELS),0)
|
|
F90FILES = $(F90FILES_BASE) ./BSSN/bssn_rhs.o
|
|
else
|
|
F90FILES = $(F90FILES_BASE)
|
|
endif
|
|
|
|
F77FILES = ./Special_Function/zbesh.o
|
|
|
|
AHFDOBJS = ./AHF_Direct/expansion.o \
|
|
./AHF_Direct/expansion_Jacobian.o \
|
|
./AHF_Direct/patch.o \
|
|
./AHF_Direct/coords.o \
|
|
./AHF_Direct/patch_info.o \
|
|
./AHF_Direct/patch_interp.o \
|
|
./AHF_Direct/patch_system.o \
|
|
./AHF_Direct/tgrid.o \
|
|
./AHF_Direct/fd_grid.o \
|
|
./AHF_Direct/ghost_zone.o \
|
|
./AHF_Direct/array.o \
|
|
./AHF_Direct/round.o \
|
|
./AHF_Direct/norm.o \
|
|
./AHF_Direct/fuzzy.o \
|
|
./AHF_Direct/error_exit.o \
|
|
./AHF_Direct/miscfp.o \
|
|
./AHF_Direct/linear_map.o \
|
|
./AHF_Direct/cpm_map.o \
|
|
./AHF_Direct/BH_diagnostics.o \
|
|
./AHF_Direct/setup.o \
|
|
./AHF_Direct/horizon_sequence.o \
|
|
./AHF_Direct/find_horizons.o \
|
|
./AHF_Direct/initial_guess.o \
|
|
./AHF_Direct/Newton.o \
|
|
./AHF_Direct/Jacobian.o \
|
|
./AHF_Direct/ilucg.o \
|
|
./AHF_Direct/IntPnts0.o \
|
|
./AHF_Direct/IntPnts.o
|
|
|
|
TwoPunctureFILES = ./Two_Puncture/TwoPunctureABE.o ./Two_Puncture/TwoPunctures.o
|
|
|
|
CUDAFILES = ./BSSN_GPU/bssn_gpu.o ./BSSN_GPU/bssn_gpu_rhs_ss.o
|
|
|
|
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
|
|
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
|
|
$(TwoPunctureFILES): ./Two_Puncture/TwoPunctures.h
|
|
$(CUDAFILES): ./BSSN_GPU/bssn_gpu.h ./BSSN_GPU/gpu_mem.h ./BSSN_GPU/gpu_rhsSS_mem.h
|
|
|
|
./misc/misc.o: ./Special_Function/zbesh.o
|
|
|
|
# projects
|
|
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
|
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
|
|
|
|
ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
|
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
|
|
|
TwoPunctureABE: $(TwoPunctureFILES)
|
|
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
|
|
|
clean:
|
|
find . -name '*.o' -delete
|
|
rm -f ABE ABEGPU TwoPunctureABE make.log
|