Compiler-level and hot-path optimizations for GW150914

- makefile.inc: add -ipo (interprocedural optimization) and -align array64byte (64-byte array alignment for vectorization) - fmisc.f90: remove redundant funcc=0.d0 zeroing from symmetry_bd, symmetry_tbd, symmetry_stbd (~328+ full-array memsets eliminated per timestep) - enforce_algebra.f90: rewrite enforce_ag and enforce_ga as point-wise loops, replacing 12 stack-allocated 3D temporary arrays with scalar locals for better cache locality All changes are mathematically equivalent — no algorithmic modifications. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 17:13:39 +08:00
parent 223ec17a54
commit 6738854a9d
3 changed files with 105 additions and 79 deletions
--- a/AMSS_NCKU_source/makefile.inc
+++ b/AMSS_NCKU_source/makefile.inc
@@ -16,10 +16,10 @@ LDLIBS  = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore
 ## -fp-model fast=2: Aggressive floating-point optimizations
 ## -fma: Enable fused multiply-add instructions
 ## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues
-CXXAPPFLAGS  = -O3 -xHost -fp-model fast=2 -fma \
+CXXAPPFLAGS  = -O3 -xHost -fp-model fast=2 -fma -ipo \
               -Dfortran3 -Dnewc -I${MKLROOT}/include
-f90appflags  = -O3 -xHost -fp-model fast=2 -fma \
-               -fpp -I${MKLROOT}/include
+f90appflags  = -O3 -xHost -fp-model fast=2 -fma -ipo \
+               -align array64byte -fpp -I${MKLROOT}/include
 f90          = ifx
 f77          = ifx
 CXX          = icpx