## GCC version (commented out) ## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ ## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ ## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran ## Intel oneAPI version with oneMKL (Optimized for performance) filein = -I/usr/include/ -I${MKLROOT}/include ## Using multi-threaded MKL for better scalability with MPI ## This allows MKL functions (FFT, BLAS, LAPACK) to use multiple threads internally ## while keeping the application code as pure MPI (no OpenMP pragmas in user code) LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -lifcore -limf -lmpi \ -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core \ -liomp5 -lpthread -lm -ldl ## Aggressive optimization flags for maximum performance: ## -O3: Maximum optimization level ## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible) ## -fp-model fast=2: Aggressive floating-point optimizations (allows reassociation) ## -fma: Enable fused multiply-add instructions ## -qopt-report=5: Generate detailed optimization reports ## -qopt-report-phase=vec,loop: Report vectorization and loop optimizations ## -march=native: Use all available CPU instructions ## -mtune=native: Tune for the specific CPU ## -funroll-loops: Aggressively unroll loops ## -fno-alias: Assume no pointer aliasing (safe for Fortran arrays) ## -qopt-prefetch: Enable aggressive prefetching ## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues CXXAPPFLAGS = -O3 -xHost -march=native -mtune=native -fp-model fast=2 -fma \ -qopt-report=5 -qopt-report-phase=vec,loop \ -funroll-loops -fno-alias -qopt-prefetch \ -Dfortran3 -Dnewc -I${MKLROOT}/include f90appflags = -O3 -xHost -march=native -mtune=native -fp-model fast=2 -fma \ -qopt-report=5 -qopt-report-phase=vec,loop \ -funroll-loops -fno-alias -qopt-prefetch \ -fpp -I${MKLROOT}/include f90 = ifx f77 = ifx CXX = icpx CC = icx CLINKER = mpiicpx Cu = nvcc CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include #CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc