## GCC version (commented out) ## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ ## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ ## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran ## Intel oneAPI version with oneMKL (Optimized for performance) filein = -I/usr/include/ -I${MKLROOT}/include ## Using sequential MKL (OpenMP disabled for better single-threaded performance) LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -lifcore -limf -lmpi \ -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core \ -lpthread -lm -ldl ## Aggressive optimization flags: ## -O3: Maximum optimization ## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible) ## -fp-model fast=2: Aggressive floating-point optimizations ## -fma: Enable fused multiply-add instructions ## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma \ -Dfortran3 -Dnewc -I${MKLROOT}/include f90appflags = -O3 -xHost -fp-model fast=2 -fma \ -fpp -I${MKLROOT}/include f90 = ifx f77 = ifx CXX = icpx CC = icx CLINKER = mpiicpx Cu = nvcc CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include #CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc