Compare commits
1 Commits
cjy-oneapi
...
cjy-oneapi
| Author | SHA1 | Date | |
|---|---|---|---|
| 223ec17a54 |
@@ -16,7 +16,7 @@ import numpy
|
||||
File_directory = "GW150914" ## output file directory
|
||||
Output_directory = "binary_output" ## binary data file directory
|
||||
## The file directory name should not be too long
|
||||
MPI_processes = 48 ## number of mpi processes used in the simulation
|
||||
MPI_processes = 64 ## number of mpi processes used in the simulation
|
||||
|
||||
GPU_Calculation = "no" ## Use GPU or not
|
||||
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
|
||||
|
||||
@@ -69,7 +69,6 @@
|
||||
fy = ZEO
|
||||
fz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -152,7 +151,6 @@
|
||||
|
||||
fx = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -220,7 +218,6 @@
|
||||
|
||||
fy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -285,7 +282,6 @@
|
||||
|
||||
fz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -375,7 +371,6 @@
|
||||
fxz = ZEO
|
||||
fyz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -474,7 +469,6 @@
|
||||
|
||||
fxx = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -537,7 +531,6 @@
|
||||
|
||||
fyy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -601,7 +594,6 @@
|
||||
|
||||
fzz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -665,7 +657,6 @@
|
||||
|
||||
fxy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -728,7 +719,6 @@
|
||||
|
||||
fxz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -790,7 +780,6 @@
|
||||
|
||||
fyz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -877,7 +866,6 @@
|
||||
fxz = ZEO
|
||||
fyz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1009,7 +997,6 @@
|
||||
fy = ZEO
|
||||
fz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1164,7 +1151,6 @@
|
||||
|
||||
fx = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1241,7 +1227,6 @@
|
||||
|
||||
fy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1312,7 +1297,6 @@
|
||||
|
||||
fz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1417,7 +1401,6 @@
|
||||
fxz = ZEO
|
||||
fyz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1593,7 +1576,6 @@
|
||||
|
||||
fxx = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1661,7 +1643,6 @@
|
||||
|
||||
fyy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1731,7 +1712,6 @@
|
||||
|
||||
fzz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1801,7 +1781,6 @@
|
||||
|
||||
fxy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1872,7 +1851,6 @@
|
||||
|
||||
fxz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1941,7 +1919,6 @@
|
||||
|
||||
fyz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -2034,7 +2011,6 @@
|
||||
fy = ZEO
|
||||
fz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -2151,7 +2127,6 @@
|
||||
|
||||
fx = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -2237,7 +2212,6 @@
|
||||
|
||||
fy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -2314,7 +2288,6 @@
|
||||
|
||||
fz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -2433,7 +2406,6 @@
|
||||
fxz = ZEO
|
||||
fyz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -2621,7 +2593,6 @@
|
||||
|
||||
fxx = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -2694,7 +2665,6 @@
|
||||
|
||||
fyy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -2770,7 +2740,6 @@
|
||||
|
||||
fzz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -2846,7 +2815,6 @@
|
||||
|
||||
fxy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -2927,7 +2895,6 @@
|
||||
|
||||
fxz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -3006,7 +2973,6 @@
|
||||
|
||||
fyz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -3114,7 +3080,6 @@
|
||||
fy = ZEO
|
||||
fz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -3251,7 +3216,6 @@
|
||||
|
||||
fx = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -3347,7 +3311,6 @@
|
||||
|
||||
fy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -3432,7 +3395,6 @@
|
||||
|
||||
fz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -3568,7 +3530,6 @@
|
||||
fxz = ZEO
|
||||
fyz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -3841,7 +3802,6 @@
|
||||
|
||||
fxx = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -3923,7 +3883,6 @@
|
||||
|
||||
fyy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -4008,7 +3967,6 @@
|
||||
|
||||
fzz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -4093,7 +4051,6 @@
|
||||
|
||||
fxy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -4196,7 +4153,6 @@
|
||||
|
||||
fxz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -4297,7 +4253,6 @@
|
||||
|
||||
fyz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) schedule(static)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
|
||||
@@ -15,16 +15,16 @@ LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore
|
||||
## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
|
||||
## -fp-model fast=2: Aggressive floating-point optimizations
|
||||
## -fma: Enable fused multiply-add instructions
|
||||
## Note: OpenMP enabled for hybrid MPI+OpenMP
|
||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -qopenmp \
|
||||
## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues
|
||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -qopenmp \
|
||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma \
|
||||
-fpp -I${MKLROOT}/include
|
||||
f90 = ifx
|
||||
f77 = ifx
|
||||
CXX = icpx
|
||||
CC = icx
|
||||
CLINKER = mpiicpx -qopenmp
|
||||
CLINKER = mpiicpx
|
||||
|
||||
Cu = nvcc
|
||||
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
||||
|
||||
@@ -11,14 +11,16 @@
|
||||
import AMSS_NCKU_Input as input_data
|
||||
import subprocess
|
||||
|
||||
## CPU core binding configuration
|
||||
## Removed hardcoded taskset to allow full utilization of 96 cores via MPI+OpenMP
|
||||
NUMACTL_CPU_BIND = ""
|
||||
## CPU core binding configuration using taskset
|
||||
## taskset ensures all child processes inherit the CPU affinity mask
|
||||
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
|
||||
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
|
||||
NUMACTL_CPU_BIND = "taskset -c 0-111"
|
||||
|
||||
## Build parallelism configuration
|
||||
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
|
||||
## Set make -j to utilize available cores for faster builds
|
||||
BUILD_JOBS = 96
|
||||
BUILD_JOBS = 104
|
||||
|
||||
|
||||
##################################################################
|
||||
@@ -35,7 +37,7 @@ def makefile_ABE():
|
||||
print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " )
|
||||
print( )
|
||||
|
||||
## Build command
|
||||
## Build command with CPU binding to nohz_full cores
|
||||
if (input_data.GPU_Calculation == "no"):
|
||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE"
|
||||
elif (input_data.GPU_Calculation == "yes"):
|
||||
@@ -76,7 +78,7 @@ def makefile_TwoPunctureABE():
|
||||
print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " )
|
||||
print( )
|
||||
|
||||
## Build command
|
||||
## Build command with CPU binding to nohz_full cores
|
||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE"
|
||||
|
||||
## Execute the command with subprocess.Popen and stream output
|
||||
@@ -111,23 +113,13 @@ def run_ABE():
|
||||
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
|
||||
print( )
|
||||
|
||||
## Calculate OMP_NUM_THREADS
|
||||
## User has 96 cores. Calculate threads per MPI process.
|
||||
total_physical_cores = 96
|
||||
omp_num_threads = total_physical_cores // input_data.MPI_processes
|
||||
if omp_num_threads < 1:
|
||||
omp_num_threads = 1
|
||||
|
||||
print( f" Configuration: {input_data.MPI_processes} MPI processes, {omp_num_threads} OpenMP threads per process." )
|
||||
print( f" Total cores utilized: {input_data.MPI_processes * omp_num_threads}" )
|
||||
|
||||
## Define the command to run; cast other values to strings as needed
|
||||
|
||||
if (input_data.GPU_Calculation == "no"):
|
||||
mpi_command = f"{NUMACTL_CPU_BIND} mpirun -genv OMP_NUM_THREADS {omp_num_threads} -np {input_data.MPI_processes} ./ABE"
|
||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||
mpi_command_outfile = "ABE_out.log"
|
||||
elif (input_data.GPU_Calculation == "yes"):
|
||||
mpi_command = f"{NUMACTL_CPU_BIND} mpirun -genv OMP_NUM_THREADS {omp_num_threads} -np {input_data.MPI_processes} ./ABEGPU"
|
||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
||||
mpi_command_outfile = "ABEGPU_out.log"
|
||||
|
||||
## Execute the MPI command and stream output
|
||||
|
||||
Reference in New Issue
Block a user