Files
AMSS-NCKU/AMSS_NCKU_source/makefile.inc
CGH0S7 9687d9a3dd Switch build system from Intel oneAPI to GCC + OpenMPI
- Replace compilers: ifx→gfortran, icx→gcc, icpx→g++, mpiicpx→mpicxx
- Replace flags: -xHost→-march=x86-64-v4, -ipo→-flto, -fpp→-cpp
- Replace flags: -fp-model fast=2→-ffast-math, -fma→-mfma
- Replace flags: -qopenmp→-fopenmp
- Remove Intel-specific: -align array64byte, -liomp5, -lifcore, -limf
- Switch MKL interface: -lmkl_intel_lp64→-lmkl_gf_lp64 (gfortran)
- Replace TBB malloc with optional jemalloc (default off)
- Disable PGO entirely (was already marked negative optimization)
- TwoPunctureABE and ABE both verified to build successfully

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-28 22:00:58 +08:00

65 lines
2.3 KiB
PHP
Executable File

## GCC version with OpenMPI and oneMKL
filein = -I/usr/include/ -I${MKLROOT}/include
## Using MKL with gfortran interface (-lmkl_gf_lp64 instead of -lmkl_intel_lp64)
LDLIBS = -L${MKLROOT}/lib -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lgfortran -lpthread -lm -ldl -lgomp
## Memory allocator switch
## 0 (default) : use system default allocator (ptmalloc)
## 1 : use jemalloc (install jemalloc-devel first)
USE_JEMALLOC ?= 0
ifeq ($(USE_JEMALLOC),1)
LDLIBS := -ljemalloc $(LDLIBS)
endif
## Interp_Points load balance profiling mode
## off : (default) no load balance instrumentation
## profile : Pass 1 — instrument Interp_Points to collect timing profile
## optimize : Pass 2 — read profile and apply block rebalancing
INTERP_LB_MODE ?= off
ifeq ($(INTERP_LB_MODE),profile)
INTERP_LB_FLAGS = -DINTERP_LB_PROFILE
else ifeq ($(INTERP_LB_MODE),optimize)
INTERP_LB_FLAGS = -DINTERP_LB_OPTIMIZE
else
INTERP_LB_FLAGS =
endif
## Kernel implementation switch
## 1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster)
## 0 : fall back to original Fortran kernels
USE_CXX_KERNELS ?= 1
## Z4C Cartesian RHS kernel switch
## 1 (default) : use C++ rewrite of Z4c_rhs (main Cartesian path faster)
## 0 : use original Fortran Z4c_rhs.o
USE_CXX_Z4C_KERNELS ?= 1
## BSSN-EScalar RHS switch
## 1 (default) : use BSSN-EScalar C wrapper on the normal patch path
## 0 : keep the original Fortran BSSN-EScalar RHS for precision-safe runs
## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel.
USE_CXX_ESCALAR_KERNEL ?= 1
## Cached transfer switch
## auto (default): enable for BSSN vacuum, keep other paths on the safe uncached path
## 1 : force cached Sync/Restrict/OutBd transfer on evolution hot paths
## 0 : force the original uncached transfer path
USE_TRANSFER_CACHE ?= auto
## RK4 kernel implementation switch
## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
## 0 : use original Fortran rungekutta4_rout.o
USE_CXX_RK4 ?= 1
f90 = gfortran
f77 = gfortran
CXX = g++
CC = gcc
CLINKER = mpicxx
Cu = nvcc
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc