diff --git a/AMSS_NCKU_Input.py b/AMSS_NCKU_Input.py index 725a1af..54866c5 100755 --- a/AMSS_NCKU_Input.py +++ b/AMSS_NCKU_Input.py @@ -18,6 +18,8 @@ Output_directory = "binary_output" ## binary data file directory ## The file directory name should not be too long MPI_processes = 64 ## number of mpi processes used in the simulation OMP_Threads = 3 ## number of OpenMP threads used by each MPI process +MPI_hosts = ["localhost", "192.168.20.102"] ## MPI hosts for multi-node runs +MPI_processes_per_node = 32 ## MPI ranks launched on each node in MPI_hosts GPU_Calculation = "no" ## Use GPU or not ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface) diff --git a/makefile_and_run.py b/makefile_and_run.py index e1d6de2..9a57258 100755 --- a/makefile_and_run.py +++ b/makefile_and_run.py @@ -71,6 +71,28 @@ def build_twopuncture_runtime_env(): return runtime_env +def build_mpi_launch_args(): + """Build optional host-distribution arguments for mpirun.""" + hosts = list(getattr(input_data, "MPI_hosts", [])) + ppn = int(getattr(input_data, "MPI_processes_per_node", 0)) + + if not hosts: + return "" + + if ppn > 0: + expected = len(hosts) * ppn + if int(input_data.MPI_processes) != expected: + raise ValueError( + f"MPI_processes={input_data.MPI_processes} does not match " + f"len(MPI_hosts) * MPI_processes_per_node = {expected}" + ) + + launch_args = f"-hosts {','.join(hosts)}" + if ppn > 0: + launch_args += f" -ppn {ppn}" + return launch_args + + ################################################################## @@ -161,16 +183,26 @@ def run_ABE(): print( " Running the AMSS-NCKU executable file ABE/ABEGPU " ) print( ) print( f" MPI processes = {input_data.MPI_processes}, OMP threads per process = {max(1, int(getattr(input_data, 'OMP_Threads', 1)))}" ) + if getattr(input_data, "MPI_hosts", []): + print( f" MPI hosts = {getattr(input_data, 'MPI_hosts', [])}, MPI ranks per node = {int(getattr(input_data, 'MPI_processes_per_node', 0))}" ) + print( " Multi-node runs require the working directory to be visible on all MPI hosts. " ) print( ) ## Define the command to run; cast other values to strings as needed + mpi_launch_args = build_mpi_launch_args() if (input_data.GPU_Calculation == "no"): - mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE" + mpi_command = NUMACTL_CPU_BIND + " mpirun " + if mpi_launch_args: + mpi_command += mpi_launch_args + " " + mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABE" #mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE" mpi_command_outfile = "ABE_out.log" elif (input_data.GPU_Calculation == "yes"): - mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU" + mpi_command = NUMACTL_CPU_BIND + " mpirun " + if mpi_launch_args: + mpi_command += mpi_launch_args + " " + mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABEGPU" mpi_command_outfile = "ABEGPU_out.log" ## Execute the MPI command and stream output diff --git a/setup.py b/setup.py index a7ee65f..6e27d22 100755 --- a/setup.py +++ b/setup.py @@ -69,6 +69,9 @@ def print_input_data( File_directory ): print( ) print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes ) print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads ) + if getattr(input_data, "MPI_hosts", []): + print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts ) + print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node ) print( ) print( " The form of computational equation = ", input_data.Equation_Class ) print( " The initial data in this simulation = ", input_data.Initial_Data_Method ) @@ -145,6 +148,9 @@ def print_input_data( File_directory ): print( file=file0 ) print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes, file=file0 ) print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads, file=file0 ) + if getattr(input_data, "MPI_hosts", []): + print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts, file=file0 ) + print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node, file=file0 ) print( file=file0 ) print( " The form of computational equation = ", input_data.Equation_Class, file=file0 ) print( " The initial data in this simulation = ", input_data.Initial_Data_Method, file=file0 )