diff --git a/AMSS_NCKU_source/makefile b/AMSS_NCKU_source/makefile index f2d4e3c..7a746fa 100644 --- a/AMSS_NCKU_source/makefile +++ b/AMSS_NCKU_source/makefile @@ -2,6 +2,27 @@ include makefile.inc +## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt) +## make -> opt (PGO-guided, maximum performance) +## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data) +PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata + +ifeq ($(PGO_MODE),instrument) +## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability +CXXAPPFLAGS = -O2 -xHost -fma -fprofile-instr-generate \ + -Dfortran3 -Dnewc -I${MKLROOT}/include +f90appflags = -O2 -xHost -fma -fprofile-instr-generate \ + -align array64byte -fpp -I${MKLROOT}/include +else +## opt (default): maximum performance with PGO profile data +CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ + -fprofile-instr-use=$(PROFDATA) \ + -Dfortran3 -Dnewc -I${MKLROOT}/include +f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ + -fprofile-instr-use=$(PROFDATA) \ + -align array64byte -fpp -I${MKLROOT}/include +endif + .SUFFIXES: .o .f90 .C .for .cu .f90.o: @@ -16,11 +37,14 @@ include makefile.inc .cu.o: $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) +## TwoPunctureABE uses fixed optimal flags, independent of CXXAPPFLAGS (which may be PGO-instrumented) +TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo -Dfortran3 -Dnewc -I${MKLROOT}/include + TwoPunctures.o: TwoPunctures.C - ${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@ + ${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@ TwoPunctureABE.o: TwoPunctureABE.C - ${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@ + ${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@ # Input files C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ @@ -102,7 +126,7 @@ ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS) TwoPunctureABE: $(TwoPunctureFILES) - $(CLINKER) $(CXXAPPFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS) + $(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS) clean: rm *.o ABE ABEGPU TwoPunctureABE make.log -f diff --git a/AMSS_NCKU_source/makefile.inc b/AMSS_NCKU_source/makefile.inc index 072e113..db28baf 100755 --- a/AMSS_NCKU_source/makefile.inc +++ b/AMSS_NCKU_source/makefile.inc @@ -10,16 +10,10 @@ filein = -I/usr/include/ -I${MKLROOT}/include ## Added -lifcore for Intel Fortran runtime and -limf for Intel math library LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -## Aggressive optimization flags + PGO Phase 2 (profile-guided optimization) -## -fprofile-instr-use: use collected profile data to guide optimization decisions -## (branch prediction, basic block layout, inlining, loop unrolling) -PROFDATA = ../../pgo_profile/default.profdata -CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ - -fprofile-instr-use=$(PROFDATA) \ - -Dfortran3 -Dnewc -I${MKLROOT}/include -f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ - -fprofile-instr-use=$(PROFDATA) \ - -align array64byte -fpp -I${MKLROOT}/include +## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags) +## opt : (default) maximum performance with PGO profile-guided optimization +## instrument : PGO Phase 1 instrumentation to collect fresh profile data +PGO_MODE ?= opt f90 = ifx f77 = ifx CXX = icpx diff --git a/pgo_profile/default.profdata b/pgo_profile/default.profdata index c09d078..6036a08 100644 Binary files a/pgo_profile/default.profdata and b/pgo_profile/default.profdata differ diff --git a/pgo_profile/default.profdata.backup2 b/pgo_profile/default.profdata.backup2 new file mode 100644 index 0000000..c09d078 Binary files /dev/null and b/pgo_profile/default.profdata.backup2 differ diff --git a/pgo_profile/default_9725923726611433605_0.profraw b/pgo_profile/default_9725923726611433605_0.profraw new file mode 100644 index 0000000..4ffbddd Binary files /dev/null and b/pgo_profile/default_9725923726611433605_0.profraw differ