diff --git a/AMSS_NCKU_source/TwoPunctures.C b/AMSS_NCKU_source/TwoPunctures.C index 1b6e590..9101183 100644 --- a/AMSS_NCKU_source/TwoPunctures.C +++ b/AMSS_NCKU_source/TwoPunctures.C @@ -27,7 +27,7 @@ using namespace std; #endif #include "TwoPunctures.h" -#include +#include TwoPunctures::TwoPunctures(double mp, double mm, double b, double P_plusx, double P_plusy, double P_plusz, diff --git a/AMSS_NCKU_source/gaussj.C b/AMSS_NCKU_source/gaussj.C index 86c7777..885284f 100644 --- a/AMSS_NCKU_source/gaussj.C +++ b/AMSS_NCKU_source/gaussj.C @@ -17,8 +17,8 @@ using namespace std; #include #endif -// Intel oneMKL LAPACK interface -#include +// LAPACKE interface (AOCL for AOCC, oneMKL for Intel) +#include /* Linear equation solution using Intel oneMKL LAPACK. a[0..n-1][0..n-1] is the input matrix. b[0..n-1] is input containing the right-hand side vectors. On output a is diff --git a/AMSS_NCKU_source/makefile b/AMSS_NCKU_source/makefile index b3f6914..6842b0f 100644 --- a/AMSS_NCKU_source/makefile +++ b/AMSS_NCKU_source/makefile @@ -36,6 +36,16 @@ endif TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ -Dfortran3 -Dnewc $(MKL_INC) +else ifeq ($(TOOLCHAIN),aocc) +## AMD AOCC build flags optimized for EPYC Zen 5 (-march=znver5) +## PGO_MODE is ignored in this branch. +OMP_FLAG = -fopenmp +CXXAPPFLAGS = -O3 -march=znver5 -ffast-math -flto \ + -Dfortran3 -Dnewc -I$(AOCL_ROOT)/include $(INTERP_LB_FLAGS) +f90appflags = -O3 -march=znver5 -ffast-math -flto \ + -cpp -I$(AOCL_ROOT)/include $(POLINT6_FLAG) +TP_OPTFLAGS = -O3 -march=znver5 -ffast-math -flto \ + -Dfortran3 -Dnewc -I$(AOCL_ROOT)/include else ## NVHPC defaults: mpicc/mpicxx/mpifort wrappers ## PGO_MODE is ignored in this branch. diff --git a/AMSS_NCKU_source/makefile.inc b/AMSS_NCKU_source/makefile.inc index 54bc86c..5ea42a7 100755 --- a/AMSS_NCKU_source/makefile.inc +++ b/AMSS_NCKU_source/makefile.inc @@ -1,6 +1,7 @@ ## Toolchain selection -## nvhpc : NVIDIA HPC SDK + CUDA-aware MPI (default) +## nvhpc : NVIDIA HPC SDK + CUDA-aware MPI ## intel : Intel oneAPI toolchain (legacy path) +## aocc : AMD AOCC + AOCL + OpenMPI (for AMD EPYC Zen 5, with CUDA) TOOLCHAIN ?= intel ## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags) @@ -26,6 +27,10 @@ MKLROOT ?= /home/intel/oneapi/mkl/latest MKL_LIBDIR ?= $(MKLROOT)/lib/intel64 MKL_INC ?= -I$(MKLROOT)/include +## AMD AOCC toolchain paths (used when TOOLCHAIN=aocc) +AOCL_ROOT ?= /home/aocc/aocl/5.2.0/aocc +OMPI_PREFIX ?= /home/aocc/aocc-openmpi + NVHPC_ROOT ?= /home/nvidia/hpc_sdk/Linux_x86_64/25.11 CUDA_HOME ?= $(NVHPC_ROOT)/cuda CUDA_ARCH ?= sm_80 @@ -67,6 +72,16 @@ LDLIBS = -L$(MKL_LIBDIR) -Wl,-rpath,$(MKL_LIBDIR) \ -lmkl_intel_lp64 -lmkl_sequential -lmkl_core \ -lifcore -limf -liomp5 -lpthread -lm -ldl \ -L$(CUDA_HOME)/lib64 -Wl,-rpath,$(CUDA_HOME)/lib64 -lcuda -lcudart +else ifeq ($(TOOLCHAIN),aocc) +f90 = flang +f77 = flang +CXX = clang++ +CC = clang +CLINKER = $(OMPI_PREFIX)/bin/mpicxx +filein = -I/usr/include/ -I$(AOCL_ROOT)/include -I$(CUDA_HOME)/include +LDLIBS = -L$(AOCL_ROOT)/lib -lblis -lflame -lamdlibm -lflang -lpgmath \ + -ljemalloc -lpthread -lm -ldl -lomp \ + -L$(CUDA_HOME)/lib64 -Wl,-rpath,$(CUDA_HOME)/lib64 -lcuda -lcudart else ifeq ($(TOOLCHAIN),nvhpc) f90 = mpifort f77 = mpifort @@ -82,9 +97,11 @@ LDLIBS = -L$(MKL_LIBDIR) -Wl,-rpath,$(MKL_LIBDIR) \ -fortranlibs endif +ifeq ($(TOOLCHAIN),intel) ifeq ($(USE_TBBMALLOC),1) LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS) endif +endif Cu = $(NVHPC_ROOT)/compilers/bin/nvcc CUDA_LIB_PATH = -L$(CUDA_HOME)/lib64 -I$(CUDA_HOME)/include