Migrate build system from Intel oneAPI to AMD AOCC/AOCL/OpenMPI

Replace Intel compilers (ifx/icpx/icx) with AOCC (flang/clang++/clang),
Intel MPI (mpiicpx) with AOCC-built OpenMPI (mpicxx), and Intel MKL
with AOCL BLIS/libFLAME. Replace -xHost with -march=znver4, -ipo with
-flto, -fp-model fast=2 with -ffast-math, -qopenmp with -fopenmp.
Remove PGO, TBB allocator, and Intel-specific runtime libraries.
Fix MKL-specific includes in TwoPunctures.C and gaussj.C to use
standard CBLAS/LAPACKE headers from AOCL.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-28 02:19:00 +08:00
parent 0db537479b
commit 0992e2219a
4 changed files with 145 additions and 181 deletions

View File

@@ -1,33 +1,17 @@
## GCC version (commented out)
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
## AMD AOCC version with AOCL (Optimized for AMD EPYC Zen 4)
## Intel oneAPI version with oneMKL (Optimized for performance)
filein = -I/usr/include/ -I${MKLROOT}/include
## AOCL root path for includes and libraries
AOCL_ROOT ?= /home/gh0s7/AOCC/aocl/5.2.0/aocc
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
## AOCC-built OpenMPI prefix
OMPI_PREFIX ?= /home/gh0s7/AOCC/aocc-openmpi
## Memory allocator switch
## 1 (default) : link Intel oneTBB allocator (libtbbmalloc)
## 0 : use system default allocator (ptmalloc)
USE_TBBMALLOC ?= 1
TBBMALLOC_SO ?= /home/intel/oneapi/2025.3/lib/libtbbmalloc.so
ifneq ($(wildcard $(TBBMALLOC_SO)),)
TBBMALLOC_LIBS = -Wl,--no-as-needed $(TBBMALLOC_SO) -Wl,--as-needed
else
TBBMALLOC_LIBS = -Wl,--no-as-needed -ltbbmalloc -Wl,--as-needed
endif
ifeq ($(USE_TBBMALLOC),1)
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
endif
filein = -I/usr/include/ -I$(AOCL_ROOT)/include
## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
## opt : (default) maximum performance with PGO profile-guided optimization
## instrument : PGO Phase 1 instrumentation to collect fresh profile data
PGO_MODE ?= opt
## Using AOCL BLIS + libFLAME for BLAS/LAPACK
## AOCC Fortran runtime: -lflang (includes FortranRuntime)
## AOCC OpenMP runtime: -lomp (LLVM OpenMP)
LDLIBS = -L$(AOCL_ROOT)/lib -lblis -lflame -lamdlibm -lflang -lpgmath -lpthread -lm -ldl -lomp
## Interp_Points load balance profiling mode
## off : (default) no load balance instrumentation
@@ -65,11 +49,11 @@ USE_TRANSFER_CACHE ?= auto
## 0 : use original Fortran rungekutta4_rout.o
USE_CXX_RK4 ?= 1
f90 = ifx
f77 = ifx
CXX = icpx
CC = icx
CLINKER = mpiicpx
f90 = flang
f77 = flang
CXX = clang++
CC = clang
CLINKER = $(OMPI_PREFIX)/bin/mpicxx
Cu = nvcc
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include