From 3a7bce3af24f52e0982c2b6dd1f0bd409527c98f Mon Sep 17 00:00:00 2001 From: CGH0S7 Date: Sat, 17 Jan 2026 20:41:02 +0800 Subject: [PATCH] Update Intel oneAPI configuration and CPU binding settings - Update makefile.inc with Intel oneAPI compiler flags and oneMKL linking - Configure taskset CPU binding to use nohz_full cores (4-55, 60-111) - Set build parallelism to 104 jobs for faster compilation - Update MPI process count to 48 in input configuration --- AMSS_NCKU_Input.py | 2 +- AMSS_NCKU_source/makefile.inc | 18 ++++++++++++++---- makefile_and_run.py | 23 +++++++++++++++-------- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/AMSS_NCKU_Input.py b/AMSS_NCKU_Input.py index 6bf3589..f288e2a 100755 --- a/AMSS_NCKU_Input.py +++ b/AMSS_NCKU_Input.py @@ -16,7 +16,7 @@ import numpy File_directory = "GW150914" ## output file directory Output_directory = "binary_output" ## binary data file directory ## The file directory name should not be too long -MPI_processes = 8 ## number of mpi processes used in the simulation +MPI_processes = 48 ## number of mpi processes used in the simulation GPU_Calculation = "no" ## Use GPU or not ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface) diff --git a/AMSS_NCKU_source/makefile.inc b/AMSS_NCKU_source/makefile.inc index a0bd81f..f881737 100755 --- a/AMSS_NCKU_source/makefile.inc +++ b/AMSS_NCKU_source/makefile.inc @@ -3,14 +3,24 @@ ## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ ## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran -## Intel oneAPI version with oneMKL +## Intel oneAPI version with oneMKL (Optimized for performance) filein = -I/usr/include/ -I${MKLROOT}/include +## Use Intel OpenMP threading layer for better performance LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -lifcore -limf -lmpi \ - -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl + -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core \ + -liomp5 -lpthread -lm -ldl -CXXAPPFLAGS = -O3 -Dfortran3 -Dnewc -I${MKLROOT}/include -f90appflags = -O3 -fpp -I${MKLROOT}/include +## Aggressive optimization flags: +## -O3: Maximum optimization +## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible) +## -qopenmp: Enable OpenMP parallelization +## -fp-model fast=2: Aggressive floating-point optimizations +## -fma: Enable fused multiply-add instructions +CXXAPPFLAGS = -O3 -xHost -qopenmp -fp-model fast=2 -fma \ + -Dfortran3 -Dnewc -I${MKLROOT}/include +f90appflags = -O3 -xHost -qopenmp -fp-model fast=2 -fma \ + -fpp -I${MKLROOT}/include f90 = ifx f77 = ifx CXX = icpx diff --git a/makefile_and_run.py b/makefile_and_run.py index a814dee..6140f99 100755 --- a/makefile_and_run.py +++ b/makefile_and_run.py @@ -11,9 +11,16 @@ import AMSS_NCKU_Input as input_data import subprocess -## CPU core binding configuration using numactl -## Avoid cores 0-3 and 56-59, use cores 4-55 and 60-111 -NUMACTL_CPU_BIND = "numactl --physcpubind=4-55,60-111" +## CPU core binding configuration using taskset +## taskset ensures all child processes inherit the CPU affinity mask +## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111) +## Format: taskset -c 4-55,60-111 ensures processes only run on these cores +NUMACTL_CPU_BIND = "taskset -c 4-55,60-111" + +## Build parallelism configuration +## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores +## Set make -j to utilize available cores for faster builds +BUILD_JOBS = 104 ################################################################## @@ -30,11 +37,11 @@ def makefile_ABE(): print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " ) print( ) - ## Build command + ## Build command with CPU binding to nohz_full cores if (input_data.GPU_Calculation == "no"): - makefile_command = "make -j4" + " ABE" + makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE" elif (input_data.GPU_Calculation == "yes"): - makefile_command = "make -j4" + " ABEGPU" + makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU" else: print( " CPU/GPU numerical calculation setting is wrong " ) print( ) @@ -71,8 +78,8 @@ def makefile_TwoPunctureABE(): print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " ) print( ) - ## Build command - makefile_command = "make" + " TwoPunctureABE" + ## Build command with CPU binding to nohz_full cores + makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE" ## Execute the command with subprocess.Popen and stream output makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)