## AMD AOCC version with AOCL (Optimized for AMD EPYC Zen 4) ## AOCL root path for includes and libraries AOCL_ROOT ?= /home/aocc/aocl/5.2.0/aocc ## AOCC-built OpenMPI prefix OMPI_PREFIX ?= /home/aocc/openmpi-5.0.10 filein = -I/usr/include/ -I$(AOCL_ROOT)/include ## Using AOCL BLIS + libFLAME for BLAS/LAPACK ## AOCC Fortran runtime: -lflang (includes FortranRuntime) ## AOCC OpenMP runtime: -lomp (LLVM OpenMP) LDLIBS = -L$(AOCL_ROOT)/lib -lblis -lflame -lamdlibm -lflang -lpgmath -lpthread -lm -ldl -lomp ## Interp_Points load balance profiling mode ## off : (default) no load balance instrumentation ## profile : Pass 1 — instrument Interp_Points to collect timing profile ## optimize : Pass 2 — read profile and apply block rebalancing INTERP_LB_MODE ?= off ifeq ($(INTERP_LB_MODE),profile) INTERP_LB_FLAGS = -DINTERP_LB_PROFILE else ifeq ($(INTERP_LB_MODE),optimize) INTERP_LB_FLAGS = -DINTERP_LB_OPTIMIZE else INTERP_LB_FLAGS = endif ## Kernel implementation switch ## 1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster) ## 0 : fall back to original Fortran kernels USE_CXX_KERNELS ?= 1 ## Z4C Cartesian RHS kernel switch ## 1 (default) : use C++ rewrite of Z4c_rhs (main Cartesian path faster) ## 0 : use original Fortran Z4c_rhs.o USE_CXX_Z4C_KERNELS ?= 1 ## BSSN-EScalar RHS switch ## 1 (default) : use BSSN-EScalar C wrapper on the normal patch path ## 0 : keep the original Fortran BSSN-EScalar RHS for precision-safe runs ## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel. USE_CXX_ESCALAR_KERNEL ?= 1 ## Cached transfer switch ## auto (default): enable for BSSN vacuum, keep other paths on the safe uncached path ## 1 : force cached Sync/Restrict/OutBd transfer on evolution hot paths ## 0 : force the original uncached transfer path USE_TRANSFER_CACHE ?= auto ## RK4 kernel implementation switch ## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments) ## 0 : use original Fortran rungekutta4_rout.o USE_CXX_RK4 ?= 1 f90 = flang f77 = flang CXX = clang++ CC = clang CLINKER = $(OMPI_PREFIX)/bin/mpicxx Cu = nvcc CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc