Add two-node MPI launch configuration

2026-03-30 21:13:46 +08:00
parent 60ad63e8cc
commit d96ca6ed2a
3 changed files with 42 additions and 2 deletions
--- a/AMSS_NCKU_Input.py
+++ b/AMSS_NCKU_Input.py
@@ -18,6 +18,8 @@ Output_directory = "binary_output"               ## binary data file directory
                                                 ## The file directory name should not be too long
 MPI_processes    = 64                            ## number of mpi processes used in the simulation
 OMP_Threads      = 3                             ## number of OpenMP threads used by each MPI process
+MPI_hosts        = ["localhost", "192.168.20.102"] ## MPI hosts for multi-node runs
+MPI_processes_per_node = 32                      ## MPI ranks launched on each node in MPI_hosts

 GPU_Calculation  = "no"                          ## Use GPU or not
                                                 ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
--- a/makefile_and_run.py
+++ b/makefile_and_run.py
@@ -71,6 +71,28 @@ def build_twopuncture_runtime_env():
    return runtime_env


+def build_mpi_launch_args():
+    """Build optional host-distribution arguments for mpirun."""
+    hosts = list(getattr(input_data, "MPI_hosts", []))
+    ppn = int(getattr(input_data, "MPI_processes_per_node", 0))
+
+    if not hosts:
+        return ""
+
+    if ppn > 0:
+        expected = len(hosts) * ppn
+        if int(input_data.MPI_processes) != expected:
+            raise ValueError(
+                f"MPI_processes={input_data.MPI_processes} does not match "
+                f"len(MPI_hosts) * MPI_processes_per_node = {expected}"
+            )
+
+    launch_args = f"-hosts {','.join(hosts)}"
+    if ppn > 0:
+        launch_args += f" -ppn {ppn}"
+    return launch_args
+
+
 ##################################################################


@@ -161,16 +183,26 @@ def run_ABE():
    print( " Running the AMSS-NCKU executable file ABE/ABEGPU " ) 
    print(                                                      )
    print( f" MPI processes = {input_data.MPI_processes}, OMP threads per process = {max(1, int(getattr(input_data, 'OMP_Threads', 1)))}" )
+    if getattr(input_data, "MPI_hosts", []):
+        print( f" MPI hosts = {getattr(input_data, 'MPI_hosts', [])}, MPI ranks per node = {int(getattr(input_data, 'MPI_processes_per_node', 0))}" )
+        print( " Multi-node runs require the working directory to be visible on all MPI hosts. " )
    print(                                                                                                      )

    ## Define the command to run; cast other values to strings as needed
+    mpi_launch_args = build_mpi_launch_args()
    
    if (input_data.GPU_Calculation == "no"):
-        mpi_command         = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
+        mpi_command         = NUMACTL_CPU_BIND + " mpirun "
+        if mpi_launch_args:
+            mpi_command += mpi_launch_args + " "
+        mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABE"
        #mpi_command         = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
        mpi_command_outfile = "ABE_out.log"
    elif (input_data.GPU_Calculation == "yes"):
-        mpi_command         = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
+        mpi_command         = NUMACTL_CPU_BIND + " mpirun "
+        if mpi_launch_args:
+            mpi_command += mpi_launch_args + " "
+        mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABEGPU"
        mpi_command_outfile = "ABEGPU_out.log"
 
    ## Execute the MPI command and stream output
--- a/setup.py
+++ b/setup.py
@@ -69,6 +69,9 @@ def print_input_data( File_directory ):
    print(                                                                                           )
    print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes   )
    print( " The number of OMP threads per MPI process         = ", input_data.OMP_Threads           )
+    if getattr(input_data, "MPI_hosts", []):
+        print( " The MPI host list in the AMSS-NCKU simulation   = ", input_data.MPI_hosts             )
+        print( " The number of MPI ranks launched per host       = ", input_data.MPI_processes_per_node )
    print(                                                                                           )
    print( " The form of computational equation  = ",            input_data.Equation_Class           )
    print( " The initial data in this simulation = ",            input_data.Initial_Data_Method      )
@@ -145,6 +148,9 @@ def print_input_data( File_directory ):
    print(                                                                                              file=file0 )
    print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes,     file=file0 )
    print( " The number of OMP threads per MPI process         = ", input_data.OMP_Threads,             file=file0 )
+    if getattr(input_data, "MPI_hosts", []):
+        print( " The MPI host list in the AMSS-NCKU simulation   = ", input_data.MPI_hosts,               file=file0 )
+        print( " The number of MPI ranks launched per host       = ", input_data.MPI_processes_per_node,   file=file0 )
    print(                                                                                              file=file0 )
    print( " The form of computational equation  = ",            input_data.Equation_Class,             file=file0 )
    print( " The initial data in this simulation = ",            input_data.Initial_Data_Method,        file=file0 )