66 lines
2.8 KiB
PHP
Executable File
66 lines
2.8 KiB
PHP
Executable File
## GCC version (commented out)
|
|
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
|
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
|
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
|
|
|
|
## Intel oneAPI version with oneMKL (Optimized for performance)
|
|
filein = -I/usr/include/ -I${MKLROOT}/include
|
|
|
|
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
|
|
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
|
|
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
|
|
|
|
## Memory allocator switch
|
|
## 1 (default) : link Intel oneTBB allocator (libtbbmalloc)
|
|
## 0 : use system default allocator (ptmalloc)
|
|
USE_TBBMALLOC ?= 1
|
|
TBBMALLOC_SO ?= /home/intel/oneapi/2025.3/lib/libtbbmalloc.so
|
|
ifneq ($(wildcard $(TBBMALLOC_SO)),)
|
|
TBBMALLOC_LIBS = -Wl,--no-as-needed $(TBBMALLOC_SO) -Wl,--as-needed
|
|
else
|
|
TBBMALLOC_LIBS = -Wl,--no-as-needed -ltbbmalloc -Wl,--as-needed
|
|
endif
|
|
ifeq ($(USE_TBBMALLOC),1)
|
|
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
|
|
endif
|
|
|
|
## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
|
|
## opt : (default) maximum performance with PGO profile-guided optimization
|
|
## instrument : PGO Phase 1 instrumentation to collect fresh profile data
|
|
PGO_MODE ?= opt
|
|
|
|
## Interp_Points load balance profiling mode
|
|
## off : (default) no load balance instrumentation
|
|
## profile : Pass 1 — instrument Interp_Points to collect timing profile
|
|
## optimize : Pass 2 — read profile and apply block rebalancing
|
|
INTERP_LB_MODE ?= off
|
|
|
|
ifeq ($(INTERP_LB_MODE),profile)
|
|
INTERP_LB_FLAGS = -DINTERP_LB_PROFILE
|
|
else ifeq ($(INTERP_LB_MODE),optimize)
|
|
INTERP_LB_FLAGS = -DINTERP_LB_OPTIMIZE
|
|
else
|
|
INTERP_LB_FLAGS =
|
|
endif
|
|
|
|
## Kernel implementation switch
|
|
## 1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster)
|
|
## 0 : fall back to original Fortran kernels
|
|
USE_CXX_KERNELS ?= 1
|
|
|
|
## RK4 kernel implementation switch
|
|
## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
|
|
## 0 : use original Fortran rungekutta4_rout.o
|
|
USE_CXX_RK4 ?= 1
|
|
|
|
f90 = ifx
|
|
f77 = ifx
|
|
CXX = icpx
|
|
CC = icx
|
|
CLINKER = mpiicpx
|
|
|
|
Cu = nvcc
|
|
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
|
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
|
|
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
|