Add MPI+OpenMP hybrid parallelism (48 ranks x 2 threads) for full 96-core utilization

Enable OpenMP threading in finite-difference kernels (diff_new, diff_new_sh, diff_newwb, lopsidediff, kodiss, kodiss_sh) with collapse(3) directives on 36 triple-nested loops. Update build flags (-qopenmp), MPI process binding, and runtime configuration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 15:53:15 +08:00
parent 223ec17a54
commit 4eb698f496
9 changed files with 65 additions and 27 deletions
--- a/makefile_and_run.py
+++ b/makefile_and_run.py
@@ -13,13 +13,9 @@ import subprocess

 ## CPU core binding configuration using taskset
 ## taskset ensures all child processes inherit the CPU affinity mask
-## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
-## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
 NUMACTL_CPU_BIND = "taskset -c 0-111"

 ## Build parallelism configuration
-## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
-## Set make -j to utilize available cores for faster builds
 BUILD_JOBS = 104


@@ -114,12 +110,18 @@ def run_ABE():
    print(                                                      )

    ## Define the command to run; cast other values to strings as needed
-    
+    ## MPI+OpenMP hybrid: compute threads per rank from total cores / MPI ranks
+    omp_threads = max(1, 96 // input_data.MPI_processes)
+    omp_env = (f" -genv OMP_NUM_THREADS={omp_threads}"
+               f" -genv OMP_PROC_BIND=close"
+               f" -genv OMP_PLACES=cores"
+               f" -genv I_MPI_PIN_DOMAIN=omp")
+
    if (input_data.GPU_Calculation == "no"):
-        mpi_command         = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
+        mpi_command         = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + omp_env + " ./ABE"
        mpi_command_outfile = "ABE_out.log"
    elif (input_data.GPU_Calculation == "yes"):
-        mpi_command         = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
+        mpi_command         = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + omp_env + " ./ABEGPU"
        mpi_command_outfile = "ABEGPU_out.log"
 
    ## Execute the MPI command and stream output