## GCC version with OpenMPI and OpenBLAS OMPI_ROOT = /usr/mpi/gcc/openmpi-4.1.9a1 ## Ensure mpicxx and final executables find OpenMPI libs at build- and runtime export LD_LIBRARY_PATH := $(OMPI_ROOT)/lib64:$(LD_LIBRARY_PATH) filein = -I/usr/include/ -I$(OMPI_ROOT)/include ## OpenBLAS (OpenMP variant) + gfortran runtime ## -Wl,-rpath ensures ABE / TwoPunctureABE find libmpi at runtime without LD_LIBRARY_PATH LDLIBS = -Wl,-rpath,$(OMPI_ROOT)/lib64 -lopenblaso -lgfortran -lpthread -lm -ldl -lgomp # OpenMP flag for selective compilation OMP_FLAG = -fopenmp ## Memory allocator switch ## 0 (default) : use system default allocator (ptmalloc) ## 1 : use jemalloc (install jemalloc-devel first) USE_JEMALLOC ?= 0 ifeq ($(USE_JEMALLOC),1) LDLIBS := -ljemalloc $(LDLIBS) endif ## Interp_Points load balance profiling mode ## off : (default) no load balance instrumentation ## profile : Pass 1 — instrument Interp_Points to collect timing profile ## optimize : Pass 2 — read profile and apply block rebalancing INTERP_LB_MODE ?= off ifeq ($(INTERP_LB_MODE),profile) INTERP_LB_FLAGS = -DINTERP_LB_PROFILE else ifeq ($(INTERP_LB_MODE),optimize) INTERP_LB_FLAGS = -DINTERP_LB_OPTIMIZE else INTERP_LB_FLAGS = endif ## Kernel implementation switch ## 1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster) ## 0 : fall back to original Fortran kernels USE_CXX_KERNELS ?= 1 ## Z4C Cartesian RHS kernel switch ## 1 (default) : use C++ rewrite of Z4c_rhs (main Cartesian path faster) ## 0 : use original Fortran Z4c_rhs.o USE_CXX_Z4C_KERNELS ?= 1 ## BSSN-EScalar RHS switch ## 1 (default) : use BSSN-EScalar C wrapper on the normal patch path ## 0 : keep the original Fortran BSSN-EScalar RHS for precision-safe runs ## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel. USE_CXX_ESCALAR_KERNEL ?= 1 ## BSSN-EM RHS switch ## 1 : use BSSN-EM C kernel (bssn_em_rhs_c.C) on the normal patch path ## 0 : keep the original Fortran empart.f90 RHS for the EM fields (default) ## Note: experimental, requires USE_CXX_KERNELS=1 USE_CXX_EM_KERNEL ?= 0 ## Cached transfer switch ## auto (default): enable for BSSN vacuum, keep other paths on the safe uncached path ## 1 : force cached Sync/Restrict/OutBd transfer on evolution hot paths ## 0 : force the original uncached transfer path USE_TRANSFER_CACHE ?= auto ## RK4 kernel implementation switch ## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments) ## 0 : use original Fortran rungekutta4_rout.o USE_CXX_RK4 ?= 1 f90 = gfortran f77 = gfortran CXX = g++ CC = gcc CLINKER = mpicxx Cu = nvcc CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc