Merge branch 'cjy-oneapi-opus-hotfix'

2026-02-27 15:13:40 +08:00
parent e6329b013d e0b5e012df
commit 3cee05f262
31 changed files with 3278 additions and 246 deletions
--- a/AMSS_NCKU_source/makefile.inc
+++ b/AMSS_NCKU_source/makefile.inc
@@ -8,18 +8,31 @@ filein  = -I/usr/include/ -I${MKLROOT}/include

 ## Using sequential MKL (OpenMP disabled for better single-threaded performance)
 ## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
-LDLIBS  = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl
+LDLIBS  = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5

-## Aggressive optimization flags + PGO Phase 2 (profile-guided optimization)
-## -fprofile-instr-use: use collected profile data to guide optimization decisions
-##   (branch prediction, basic block layout, inlining, loop unrolling)
-PROFDATA     = ../../pgo_profile/default.profdata
-CXXAPPFLAGS  = -O3 -xHost -fp-model fast=2 -fma -ipo \
-               -fprofile-instr-use=$(PROFDATA) \
-               -Dfortran3 -Dnewc -I${MKLROOT}/include
-f90appflags  = -O3 -xHost -fp-model fast=2 -fma -ipo \
-               -fprofile-instr-use=$(PROFDATA) \
-               -align array64byte -fpp -I${MKLROOT}/include
+## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
+##   opt        : (default) maximum performance with PGO profile-guided optimization
+##   instrument : PGO Phase 1 instrumentation to collect fresh profile data
+PGO_MODE ?= opt
+
+## Interp_Points load balance profiling mode
+##   off        : (default) no load balance instrumentation
+##   profile    : Pass 1 — instrument Interp_Points to collect timing profile
+##   optimize   : Pass 2 — read profile and apply block rebalancing
+INTERP_LB_MODE ?= off
+
+ifeq ($(INTERP_LB_MODE),profile)
+INTERP_LB_FLAGS = -DINTERP_LB_PROFILE
+else ifeq ($(INTERP_LB_MODE),optimize)
+INTERP_LB_FLAGS = -DINTERP_LB_OPTIMIZE
+else
+INTERP_LB_FLAGS =
+endif
+
+## Kernel implementation switch
+##   1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster)
+##   0           : fall back to original Fortran kernels
+USE_CXX_KERNELS ?= 1
 f90          = ifx
 f77          = ifx
 CXX          = icpx