Add two-node MPI launch configuration
This commit is contained in:
@@ -18,6 +18,8 @@ Output_directory = "binary_output" ## binary data file directory
|
|||||||
## The file directory name should not be too long
|
## The file directory name should not be too long
|
||||||
MPI_processes = 64 ## number of mpi processes used in the simulation
|
MPI_processes = 64 ## number of mpi processes used in the simulation
|
||||||
OMP_Threads = 3 ## number of OpenMP threads used by each MPI process
|
OMP_Threads = 3 ## number of OpenMP threads used by each MPI process
|
||||||
|
MPI_hosts = ["localhost", "192.168.20.102"] ## MPI hosts for multi-node runs
|
||||||
|
MPI_processes_per_node = 32 ## MPI ranks launched on each node in MPI_hosts
|
||||||
|
|
||||||
GPU_Calculation = "no" ## Use GPU or not
|
GPU_Calculation = "no" ## Use GPU or not
|
||||||
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
|
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
|
||||||
|
|||||||
@@ -71,6 +71,28 @@ def build_twopuncture_runtime_env():
|
|||||||
return runtime_env
|
return runtime_env
|
||||||
|
|
||||||
|
|
||||||
|
def build_mpi_launch_args():
|
||||||
|
"""Build optional host-distribution arguments for mpirun."""
|
||||||
|
hosts = list(getattr(input_data, "MPI_hosts", []))
|
||||||
|
ppn = int(getattr(input_data, "MPI_processes_per_node", 0))
|
||||||
|
|
||||||
|
if not hosts:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
if ppn > 0:
|
||||||
|
expected = len(hosts) * ppn
|
||||||
|
if int(input_data.MPI_processes) != expected:
|
||||||
|
raise ValueError(
|
||||||
|
f"MPI_processes={input_data.MPI_processes} does not match "
|
||||||
|
f"len(MPI_hosts) * MPI_processes_per_node = {expected}"
|
||||||
|
)
|
||||||
|
|
||||||
|
launch_args = f"-hosts {','.join(hosts)}"
|
||||||
|
if ppn > 0:
|
||||||
|
launch_args += f" -ppn {ppn}"
|
||||||
|
return launch_args
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
|
|
||||||
@@ -161,16 +183,26 @@ def run_ABE():
|
|||||||
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
|
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
|
||||||
print( )
|
print( )
|
||||||
print( f" MPI processes = {input_data.MPI_processes}, OMP threads per process = {max(1, int(getattr(input_data, 'OMP_Threads', 1)))}" )
|
print( f" MPI processes = {input_data.MPI_processes}, OMP threads per process = {max(1, int(getattr(input_data, 'OMP_Threads', 1)))}" )
|
||||||
|
if getattr(input_data, "MPI_hosts", []):
|
||||||
|
print( f" MPI hosts = {getattr(input_data, 'MPI_hosts', [])}, MPI ranks per node = {int(getattr(input_data, 'MPI_processes_per_node', 0))}" )
|
||||||
|
print( " Multi-node runs require the working directory to be visible on all MPI hosts. " )
|
||||||
print( )
|
print( )
|
||||||
|
|
||||||
## Define the command to run; cast other values to strings as needed
|
## Define the command to run; cast other values to strings as needed
|
||||||
|
mpi_launch_args = build_mpi_launch_args()
|
||||||
|
|
||||||
if (input_data.GPU_Calculation == "no"):
|
if (input_data.GPU_Calculation == "no"):
|
||||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
mpi_command = NUMACTL_CPU_BIND + " mpirun "
|
||||||
|
if mpi_launch_args:
|
||||||
|
mpi_command += mpi_launch_args + " "
|
||||||
|
mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABE"
|
||||||
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||||
mpi_command_outfile = "ABE_out.log"
|
mpi_command_outfile = "ABE_out.log"
|
||||||
elif (input_data.GPU_Calculation == "yes"):
|
elif (input_data.GPU_Calculation == "yes"):
|
||||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
mpi_command = NUMACTL_CPU_BIND + " mpirun "
|
||||||
|
if mpi_launch_args:
|
||||||
|
mpi_command += mpi_launch_args + " "
|
||||||
|
mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
||||||
mpi_command_outfile = "ABEGPU_out.log"
|
mpi_command_outfile = "ABEGPU_out.log"
|
||||||
|
|
||||||
## Execute the MPI command and stream output
|
## Execute the MPI command and stream output
|
||||||
|
|||||||
6
setup.py
6
setup.py
@@ -69,6 +69,9 @@ def print_input_data( File_directory ):
|
|||||||
print( )
|
print( )
|
||||||
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes )
|
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes )
|
||||||
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads )
|
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads )
|
||||||
|
if getattr(input_data, "MPI_hosts", []):
|
||||||
|
print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts )
|
||||||
|
print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node )
|
||||||
print( )
|
print( )
|
||||||
print( " The form of computational equation = ", input_data.Equation_Class )
|
print( " The form of computational equation = ", input_data.Equation_Class )
|
||||||
print( " The initial data in this simulation = ", input_data.Initial_Data_Method )
|
print( " The initial data in this simulation = ", input_data.Initial_Data_Method )
|
||||||
@@ -145,6 +148,9 @@ def print_input_data( File_directory ):
|
|||||||
print( file=file0 )
|
print( file=file0 )
|
||||||
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes, file=file0 )
|
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes, file=file0 )
|
||||||
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads, file=file0 )
|
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads, file=file0 )
|
||||||
|
if getattr(input_data, "MPI_hosts", []):
|
||||||
|
print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts, file=file0 )
|
||||||
|
print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node, file=file0 )
|
||||||
print( file=file0 )
|
print( file=file0 )
|
||||||
print( " The form of computational equation = ", input_data.Equation_Class, file=file0 )
|
print( " The form of computational equation = ", input_data.Equation_Class, file=file0 )
|
||||||
print( " The initial data in this simulation = ", input_data.Initial_Data_Method, file=file0 )
|
print( " The initial data in this simulation = ", input_data.Initial_Data_Method, file=file0 )
|
||||||
|
|||||||
Reference in New Issue
Block a user