Enable multi-threaded MKL for better resource utilization
- Changed from libmkl_sequential to libmkl_intel_thread - Added automatic MKL thread count configuration (104 cores / MPI_processes) - Updated runtime scripts to set MKL_NUM_THREADS environment variable - Added comprehensive optimization documentation Expected improvement: 5-15% from better MKL utilization Note: Main performance bottleneck is in computation loops, not MKL functions
This commit is contained in:
@@ -22,6 +22,13 @@ NUMACTL_CPU_BIND = "taskset -c 4-55,60-111"
|
||||
## Set make -j to utilize available cores for faster builds
|
||||
BUILD_JOBS = 104
|
||||
|
||||
## MKL threading configuration for hybrid MPI+threaded-MKL execution
|
||||
## Total cores available: 104 (cores 4-55, 60-111)
|
||||
## MPI processes: configured in AMSS_NCKU_Input.py (typically 48)
|
||||
## MKL threads per process: 104 / MPI_processes ≈ 2
|
||||
## This ensures full utilization of available cores
|
||||
MKL_NUM_THREADS = max(1, 104 // input_data.MPI_processes)
|
||||
|
||||
|
||||
##################################################################
|
||||
|
||||
@@ -110,18 +117,24 @@ def makefile_TwoPunctureABE():
|
||||
def run_ABE():
|
||||
|
||||
print( )
|
||||
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
|
||||
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
|
||||
print( )
|
||||
print( f" MPI processes: {input_data.MPI_processes}" )
|
||||
print( f" MKL threads per process: {MKL_NUM_THREADS}" )
|
||||
print( f" Total threads: {input_data.MPI_processes * MKL_NUM_THREADS}" )
|
||||
print( )
|
||||
|
||||
## Define the command to run; cast other values to strings as needed
|
||||
|
||||
## Set MKL threading environment variables for optimal performance
|
||||
env_vars = f"export MKL_NUM_THREADS={MKL_NUM_THREADS} && export MKL_DYNAMIC=FALSE && "
|
||||
|
||||
if (input_data.GPU_Calculation == "no"):
|
||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||
mpi_command = env_vars + NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||
mpi_command_outfile = "ABE_out.log"
|
||||
elif (input_data.GPU_Calculation == "yes"):
|
||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
||||
mpi_command = env_vars + NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
||||
mpi_command_outfile = "ABEGPU_out.log"
|
||||
|
||||
|
||||
## Execute the MPI command and stream output
|
||||
mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
||||
|
||||
@@ -154,11 +167,12 @@ def run_ABE():
|
||||
def run_TwoPunctureABE():
|
||||
|
||||
print( )
|
||||
print( " Running the AMSS-NCKU executable file TwoPunctureABE " )
|
||||
print( " Running the AMSS-NCKU executable file TwoPunctureABE " )
|
||||
print( )
|
||||
|
||||
## Define the command to run
|
||||
TwoPuncture_command = NUMACTL_CPU_BIND + " ./TwoPunctureABE"
|
||||
|
||||
## Define the command to run with MKL threading configuration
|
||||
env_vars = f"export MKL_NUM_THREADS={MKL_NUM_THREADS} && export MKL_DYNAMIC=FALSE && "
|
||||
TwoPuncture_command = env_vars + NUMACTL_CPU_BIND + " ./TwoPunctureABE"
|
||||
TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
|
||||
|
||||
## Execute the command with subprocess.Popen and stream output
|
||||
|
||||
Reference in New Issue
Block a user