Update Intel oneAPI configuration and CPU binding settings

- Update makefile.inc with Intel oneAPI compiler flags and oneMKL linking - Configure taskset CPU binding to use nohz_full cores (4-55, 60-111) - Set build parallelism to 104 jobs for faster compilation - Update MPI process count to 48 in input configuration
2026-01-17 20:41:02 +08:00
parent c6945bb095
commit 3a7bce3af2
3 changed files with 30 additions and 13 deletions
--- a/AMSS_NCKU_Input.py
+++ b/AMSS_NCKU_Input.py
@@ -16,7 +16,7 @@ import numpy
 File_directory   = "GW150914"                    ## output file directory
 Output_directory = "binary_output"               ## binary data file directory
                                                 ## The file directory name should not be too long
-MPI_processes    = 8                             ## number of mpi processes used in the simulation
+MPI_processes    = 48                             ## number of mpi processes used in the simulation

 GPU_Calculation  = "no"                          ## Use GPU or not 
                                                 ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
--- a/AMSS_NCKU_source/makefile.inc
+++ b/AMSS_NCKU_source/makefile.inc
@@ -3,14 +3,24 @@
 ## filein  = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
 ## LDLIBS  = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran

-## Intel oneAPI version with oneMKL
+## Intel oneAPI version with oneMKL (Optimized for performance)
 filein  = -I/usr/include/ -I${MKLROOT}/include

+## Use Intel OpenMP threading layer for better performance
 LDLIBS  = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -lifcore -limf -lmpi \
-          -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl
+          -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core \
+          -liomp5 -lpthread -lm -ldl

-CXXAPPFLAGS  = -O3 -Dfortran3 -Dnewc -I${MKLROOT}/include
-f90appflags  = -O3 -fpp -I${MKLROOT}/include
+## Aggressive optimization flags:
+## -O3: Maximum optimization
+## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
+## -qopenmp: Enable OpenMP parallelization
+## -fp-model fast=2: Aggressive floating-point optimizations
+## -fma: Enable fused multiply-add instructions
+CXXAPPFLAGS  = -O3 -xHost -qopenmp -fp-model fast=2 -fma \
+               -Dfortran3 -Dnewc -I${MKLROOT}/include
+f90appflags  = -O3 -xHost -qopenmp -fp-model fast=2 -fma \
+               -fpp -I${MKLROOT}/include
 f90          = ifx
 f77          = ifx
 CXX          = icpx
--- a/makefile_and_run.py
+++ b/makefile_and_run.py
@@ -11,9 +11,16 @@
 import AMSS_NCKU_Input as input_data
 import subprocess

-## CPU core binding configuration using numactl
-## Avoid cores 0-3 and 56-59, use cores 4-55 and 60-111
-NUMACTL_CPU_BIND = "numactl --physcpubind=4-55,60-111"
+## CPU core binding configuration using taskset
+## taskset ensures all child processes inherit the CPU affinity mask
+## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
+## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
+NUMACTL_CPU_BIND = "taskset -c 4-55,60-111"
+
+## Build parallelism configuration
+## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
+## Set make -j to utilize available cores for faster builds
+BUILD_JOBS = 104


 ##################################################################
@@ -30,11 +37,11 @@ def makefile_ABE():
    print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " ) 
    print(                                                        )

-    ## Build command
+    ## Build command with CPU binding to nohz_full cores
    if (input_data.GPU_Calculation == "no"):
-        makefile_command  = "make -j4" + " ABE"
+        makefile_command  = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE"
    elif (input_data.GPU_Calculation == "yes"):
-        makefile_command  = "make -j4" + " ABEGPU"
+        makefile_command  = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
    else:
        print( " CPU/GPU numerical calculation setting is wrong " )
        print(                                                    )
@@ -71,8 +78,8 @@ def makefile_TwoPunctureABE():
    print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " )
    print(                                                            )
    
-    ## Build command
-    makefile_command = "make" + " TwoPunctureABE"
+    ## Build command with CPU binding to nohz_full cores
+    makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE"

    ## Execute the command with subprocess.Popen and stream output
    makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)