Enable OpenMP threading for the dominant computational kernels: - makefile.inc: add -qopenmp to f90appflags - diff_new.f90: split fderivs/fdderivs into OpenMP interior + serial boundary - kodiss.f90: split kodis into OpenMP interior + serial boundary - lopsidediff.f90: add OMP PARALLEL DO COLLAPSE(2) to lopsided - fmisc.f90: parallelize symmetry_bd bulk array copy - bssn_rhs.f90: add OMP WORKSHARE to array-syntax operations Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
33 lines
1.8 KiB
PHP
Executable File
33 lines
1.8 KiB
PHP
Executable File
## GCC version (commented out)
|
|
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
|
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
|
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
|
|
|
|
## Intel oneAPI version with oneMKL (Optimized for performance)
|
|
filein = -I/usr/include/ -I${MKLROOT}/include
|
|
|
|
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
|
|
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
|
|
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lifcore -limf -lpthread -lm -ldl -qopenmp
|
|
|
|
## Aggressive optimization flags:
|
|
## -O3: Maximum optimization
|
|
## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
|
|
## -fp-model fast=2: Aggressive floating-point optimizations
|
|
## -fma: Enable fused multiply-add instructions
|
|
## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues
|
|
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo -qopenmp \
|
|
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
|
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo -qopenmp \
|
|
-align array64byte -fpp -I${MKLROOT}/include
|
|
f90 = ifx
|
|
f77 = ifx
|
|
CXX = icpx
|
|
CC = icx
|
|
CLINKER = mpiicpx
|
|
|
|
Cu = nvcc
|
|
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
|
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
|
|
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
|