include makefile.inc ## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt) ## make -> opt (PGO-guided, maximum performance) ## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data) PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata ifeq ($(PGO_MODE),instrument) ## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \ -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \ -align array64byte -fpp -I${MKLROOT}/include else ## opt (default): maximum performance with PGO profile data CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ -fprofile-instr-use=$(PROFDATA) \ -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ -fprofile-instr-use=$(PROFDATA) \ -align array64byte -fpp -I${MKLROOT}/include endif .SUFFIXES: .o .f90 .C .for .cu .f90.o: $(f90) $(f90appflags) -c $< -o $@ .C.o: ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ .for.o: $(f77) -c $< -o $@ .cu.o: $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) # CUDA rewrite of BSSN RHS (drop-in replacement for bssn_rhs_c + stencil helpers) bssn_rhs_cuda.o: bssn_rhs_cuda.cu macrodef.h $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) # C rewrite of BSSN RHS kernel and helpers bssn_rhs_c.o: bssn_rhs_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ fderivs_c.o: fderivs_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ fdderivs_c.o: fdderivs_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ kodiss_c.o: kodiss_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ lopsided_c.o: lopsided_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ -fprofile-instr-use=$(TP_PROFDATA) \ -Dfortran3 -Dnewc -I${MKLROOT}/include TwoPunctures.o: TwoPunctures.C ${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@ TwoPunctureABE.o: TwoPunctureABE.C ${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@ # Input files ## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran) ifeq ($(USE_CXX_KERNELS),0) # Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below CFILES = else # C++ mode (default): C rewrite of bssn_rhs and helper kernels CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o endif # CUDA rewrite: bssn_rhs_cuda.o replaces all CFILES (stencils are built-in) CFILES_CUDA = bssn_rhs_cuda.o C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ cgh.o bssn_class.o surface_integral.o ShellPatch.o\ bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\ bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\ Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\ NullShellPatch2_Evo.o writefile_f.o interp_lb_profile.o C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ cgh.o surface_integral.o ShellPatch.o\ bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\ bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\ Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\ NullShellPatch2_Evo.o \ bssn_gpu_class.o bssn_step_gpu.o bssn_macro.o writefile_f.o F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\ prolongrestrict_cell.o prolongrestrict_vertex.o\ rungekutta4_rout.o diff_new.o kodiss.o kodiss_sh.o\ lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\ shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\ getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\ fadmquantites_bssn.o Z4c_rhs.o Z4c_rhs_ss.o point_diff_new_sh.o\ cpbc.o getnp4old.o NullEvol.o initial_null.o initial_maxwell.o\ getnpem2.o empart.o NullNews.o fourdcurvature.o\ bssn2adm.o adm_constraint.o adm_ricci_gamma.o\ scalar_rhs.o initial_scalar.o NullEvol2.o initial_null2.o\ NullNews2.o tool_f.o ifeq ($(USE_CXX_KERNELS),0) # Fortran mode: include original bssn_rhs.o F90FILES = $(F90FILES_BASE) bssn_rhs.o else # C++ mode (default): bssn_rhs.o replaced by C++ kernel F90FILES = $(F90FILES_BASE) endif F77FILES = zbesh.o AHFDOBJS = expansion.o expansion_Jacobian.o patch.o coords.o patch_info.o patch_interp.o patch_system.o \ tgrid.o fd_grid.o ghost_zone.o array.o round.o norm.o fuzzy.o error_exit.o miscfp.o \ linear_map.o cpm_map.o BH_diagnostics.o setup.o horizon_sequence.o find_horizons.o \ initial_guess.o Newton.o Jacobian.o ilucg.o IntPnts0.o IntPnts.o TwoPunctureFILES = TwoPunctureABE.o TwoPunctures.o CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o # file dependences $(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\ misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\ rungekutta4_rout.h var.h bssn_class.h bssn_rhs.h sommerfeld_rout.h\ cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\ fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\ NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\ empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\ initial_null2.h NullShellPatch2.h $(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\ misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\ rungekutta4_rout.h var.h bssn_rhs.h sommerfeld_rout.h\ cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\ fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\ NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\ empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\ initial_null2.h NullShellPatch2.h \ bssn_gpu_class.h bssn_macro.h $(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h $(C++FILES) $(C++FILES_GPU) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h TwoPunctureFILES: TwoPunctures.h $(CUDAFILES): bssn_gpu.h gpu_mem.h gpu_rhsSS_mem.h misc.o : zbesh.o # projects ABE: $(C++FILES) $(CFILES_CUDA) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES_CUDA) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) -lcudart $(CUDA_LIB_PATH) ABE_CUDA: $(C++FILES) $(CFILES_CUDA) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES_CUDA) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) -lcudart $(CUDA_LIB_PATH) ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS) TwoPunctureABE: $(TwoPunctureFILES) $(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS) clean: rm *.o ABE ABE_CUDA ABEGPU TwoPunctureABE make.log -f