Migrate build system from Intel oneAPI to AMD AOCC/AOCL toolchain
- Add TOOLCHAIN=aocc option with flang/clang++/mpicxx compilers - Replace Intel flags (-xHost/-fma/-ipo/-qopenmp) with AOCC flags (-march=znver5/-ffast-math/-flto/-fopenmp) targeting EPYC 9755 - Replace Intel oneMKL with AMD AOCL (BLIS + libFLAME + amdlibm) - Replace Intel TBBMALLOC with system jemalloc - Change MKL-specific headers to standard CBLAS/LAPACKE (TwoPunctures.C, gaussj.C) - Guard TBBMALLOC to Intel toolchain only Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -27,7 +27,7 @@ using namespace std;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "TwoPunctures.h"
|
#include "TwoPunctures.h"
|
||||||
#include <mkl_cblas.h>
|
#include <cblas.h>
|
||||||
|
|
||||||
TwoPunctures::TwoPunctures(double mp, double mm, double b,
|
TwoPunctures::TwoPunctures(double mp, double mm, double b,
|
||||||
double P_plusx, double P_plusy, double P_plusz,
|
double P_plusx, double P_plusy, double P_plusz,
|
||||||
|
|||||||
@@ -17,8 +17,8 @@ using namespace std;
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Intel oneMKL LAPACK interface
|
// LAPACKE interface (AOCL for AOCC, oneMKL for Intel)
|
||||||
#include <mkl_lapacke.h>
|
#include <lapacke.h>
|
||||||
/* Linear equation solution using Intel oneMKL LAPACK.
|
/* Linear equation solution using Intel oneMKL LAPACK.
|
||||||
a[0..n-1][0..n-1] is the input matrix. b[0..n-1] is input
|
a[0..n-1][0..n-1] is the input matrix. b[0..n-1] is input
|
||||||
containing the right-hand side vectors. On output a is
|
containing the right-hand side vectors. On output a is
|
||||||
|
|||||||
@@ -36,6 +36,16 @@ endif
|
|||||||
|
|
||||||
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
-Dfortran3 -Dnewc $(MKL_INC)
|
-Dfortran3 -Dnewc $(MKL_INC)
|
||||||
|
else ifeq ($(TOOLCHAIN),aocc)
|
||||||
|
## AMD AOCC build flags optimized for EPYC Zen 5 (-march=znver5)
|
||||||
|
## PGO_MODE is ignored in this branch.
|
||||||
|
OMP_FLAG = -fopenmp
|
||||||
|
CXXAPPFLAGS = -O3 -march=znver5 -ffast-math -flto \
|
||||||
|
-Dfortran3 -Dnewc -I$(AOCL_ROOT)/include $(INTERP_LB_FLAGS)
|
||||||
|
f90appflags = -O3 -march=znver5 -ffast-math -flto \
|
||||||
|
-cpp -I$(AOCL_ROOT)/include $(POLINT6_FLAG)
|
||||||
|
TP_OPTFLAGS = -O3 -march=znver5 -ffast-math -flto \
|
||||||
|
-Dfortran3 -Dnewc -I$(AOCL_ROOT)/include
|
||||||
else
|
else
|
||||||
## NVHPC defaults: mpicc/mpicxx/mpifort wrappers
|
## NVHPC defaults: mpicc/mpicxx/mpifort wrappers
|
||||||
## PGO_MODE is ignored in this branch.
|
## PGO_MODE is ignored in this branch.
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
## Toolchain selection
|
## Toolchain selection
|
||||||
## nvhpc : NVIDIA HPC SDK + CUDA-aware MPI (default)
|
## nvhpc : NVIDIA HPC SDK + CUDA-aware MPI
|
||||||
## intel : Intel oneAPI toolchain (legacy path)
|
## intel : Intel oneAPI toolchain (legacy path)
|
||||||
|
## aocc : AMD AOCC + AOCL + OpenMPI (for AMD EPYC Zen 5, with CUDA)
|
||||||
TOOLCHAIN ?= intel
|
TOOLCHAIN ?= intel
|
||||||
|
|
||||||
## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
|
## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
|
||||||
@@ -26,6 +27,10 @@ MKLROOT ?= /home/intel/oneapi/mkl/latest
|
|||||||
MKL_LIBDIR ?= $(MKLROOT)/lib/intel64
|
MKL_LIBDIR ?= $(MKLROOT)/lib/intel64
|
||||||
MKL_INC ?= -I$(MKLROOT)/include
|
MKL_INC ?= -I$(MKLROOT)/include
|
||||||
|
|
||||||
|
## AMD AOCC toolchain paths (used when TOOLCHAIN=aocc)
|
||||||
|
AOCL_ROOT ?= /home/aocc/aocl/5.2.0/aocc
|
||||||
|
OMPI_PREFIX ?= /home/aocc/aocc-openmpi
|
||||||
|
|
||||||
NVHPC_ROOT ?= /home/nvidia/hpc_sdk/Linux_x86_64/25.11
|
NVHPC_ROOT ?= /home/nvidia/hpc_sdk/Linux_x86_64/25.11
|
||||||
CUDA_HOME ?= $(NVHPC_ROOT)/cuda
|
CUDA_HOME ?= $(NVHPC_ROOT)/cuda
|
||||||
CUDA_ARCH ?= sm_80
|
CUDA_ARCH ?= sm_80
|
||||||
@@ -67,6 +72,16 @@ LDLIBS = -L$(MKL_LIBDIR) -Wl,-rpath,$(MKL_LIBDIR) \
|
|||||||
-lmkl_intel_lp64 -lmkl_sequential -lmkl_core \
|
-lmkl_intel_lp64 -lmkl_sequential -lmkl_core \
|
||||||
-lifcore -limf -liomp5 -lpthread -lm -ldl \
|
-lifcore -limf -liomp5 -lpthread -lm -ldl \
|
||||||
-L$(CUDA_HOME)/lib64 -Wl,-rpath,$(CUDA_HOME)/lib64 -lcuda -lcudart
|
-L$(CUDA_HOME)/lib64 -Wl,-rpath,$(CUDA_HOME)/lib64 -lcuda -lcudart
|
||||||
|
else ifeq ($(TOOLCHAIN),aocc)
|
||||||
|
f90 = flang
|
||||||
|
f77 = flang
|
||||||
|
CXX = clang++
|
||||||
|
CC = clang
|
||||||
|
CLINKER = $(OMPI_PREFIX)/bin/mpicxx
|
||||||
|
filein = -I/usr/include/ -I$(AOCL_ROOT)/include -I$(CUDA_HOME)/include
|
||||||
|
LDLIBS = -L$(AOCL_ROOT)/lib -lblis -lflame -lamdlibm -lflang -lpgmath \
|
||||||
|
-ljemalloc -lpthread -lm -ldl -lomp \
|
||||||
|
-L$(CUDA_HOME)/lib64 -Wl,-rpath,$(CUDA_HOME)/lib64 -lcuda -lcudart
|
||||||
else ifeq ($(TOOLCHAIN),nvhpc)
|
else ifeq ($(TOOLCHAIN),nvhpc)
|
||||||
f90 = mpifort
|
f90 = mpifort
|
||||||
f77 = mpifort
|
f77 = mpifort
|
||||||
@@ -82,9 +97,11 @@ LDLIBS = -L$(MKL_LIBDIR) -Wl,-rpath,$(MKL_LIBDIR) \
|
|||||||
-fortranlibs
|
-fortranlibs
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TOOLCHAIN),intel)
|
||||||
ifeq ($(USE_TBBMALLOC),1)
|
ifeq ($(USE_TBBMALLOC),1)
|
||||||
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
|
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
Cu = $(NVHPC_ROOT)/compilers/bin/nvcc
|
Cu = $(NVHPC_ROOT)/compilers/bin/nvcc
|
||||||
CUDA_LIB_PATH = -L$(CUDA_HOME)/lib64 -I$(CUDA_HOME)/include
|
CUDA_LIB_PATH = -L$(CUDA_HOME)/lib64 -I$(CUDA_HOME)/include
|
||||||
|
|||||||
Reference in New Issue
Block a user