diff --git a/AMSS_NCKU_Input.py b/AMSS_NCKU_Input.py index fe25a50..5747720 100755 --- a/AMSS_NCKU_Input.py +++ b/AMSS_NCKU_Input.py @@ -16,7 +16,7 @@ import numpy File_directory = "GW150914" ## output file directory Output_directory = "binary_output" ## binary data file directory ## The file directory name should not be too long -MPI_processes = 64 ## number of mpi processes used in the simulation +MPI_processes = 1 ## number of processes (MPI removed, single-process mode) GPU_Calculation = "no" ## Use GPU or not ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface) diff --git a/AMSS_NCKU_source/ABE.C b/AMSS_NCKU_source/ABE.C index 9a4874e..9f59684 100644 --- a/AMSS_NCKU_source/ABE.C +++ b/AMSS_NCKU_source/ABE.C @@ -20,7 +20,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "misc.h" #include "macrodef.h" diff --git a/AMSS_NCKU_source/Ansorg.h b/AMSS_NCKU_source/Ansorg.h index 557043c..a57e0dc 100644 --- a/AMSS_NCKU_source/Ansorg.h +++ b/AMSS_NCKU_source/Ansorg.h @@ -19,7 +19,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #define PI M_PI diff --git a/AMSS_NCKU_source/Block.h b/AMSS_NCKU_source/Block.h index 28193fd..13b0257 100644 --- a/AMSS_NCKU_source/Block.h +++ b/AMSS_NCKU_source/Block.h @@ -2,7 +2,11 @@ #ifndef BLOCK_H #define BLOCK_H +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "macrodef.h" //need dim here; Vertex or Cell #include "var.h" #include "MyList.h" diff --git a/AMSS_NCKU_source/IntPnts0.C b/AMSS_NCKU_source/IntPnts0.C index fb176d8..4433246 100644 --- a/AMSS_NCKU_source/IntPnts0.C +++ b/AMSS_NCKU_source/IntPnts0.C @@ -4,7 +4,11 @@ #include #include +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "myglobal.h" diff --git a/AMSS_NCKU_source/MPatch.h b/AMSS_NCKU_source/MPatch.h index b993be6..4d3863b 100644 --- a/AMSS_NCKU_source/MPatch.h +++ b/AMSS_NCKU_source/MPatch.h @@ -2,7 +2,11 @@ #ifndef PATCH_H #define PATCH_H +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "MyList.h" #include "Block.h" #include "var.h" diff --git a/AMSS_NCKU_source/Newton.C b/AMSS_NCKU_source/Newton.C index 5e93014..ad50b47 100644 --- a/AMSS_NCKU_source/Newton.C +++ b/AMSS_NCKU_source/Newton.C @@ -8,7 +8,11 @@ #include #include #include +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "util_Table.h" #include "cctk.h" diff --git a/AMSS_NCKU_source/NullShellPatch.h b/AMSS_NCKU_source/NullShellPatch.h index 26ff030..91f1da2 100644 --- a/AMSS_NCKU_source/NullShellPatch.h +++ b/AMSS_NCKU_source/NullShellPatch.h @@ -23,7 +23,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "MyList.h" #include "Block.h" #include "Parallel.h" diff --git a/AMSS_NCKU_source/NullShellPatch2.h b/AMSS_NCKU_source/NullShellPatch2.h index df132ff..d38585b 100644 --- a/AMSS_NCKU_source/NullShellPatch2.h +++ b/AMSS_NCKU_source/NullShellPatch2.h @@ -23,7 +23,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "MyList.h" #include "Block.h" #include "Parallel.h" diff --git a/AMSS_NCKU_source/ShellPatch.h b/AMSS_NCKU_source/ShellPatch.h index b64c79d..9bb0d99 100644 --- a/AMSS_NCKU_source/ShellPatch.h +++ b/AMSS_NCKU_source/ShellPatch.h @@ -2,7 +2,11 @@ #ifndef SHELLPATCH_H #define SHELLPATCH_H +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "MyList.h" #include "Block.h" #include "Parallel.h" diff --git a/AMSS_NCKU_source/Z4c_class.h b/AMSS_NCKU_source/Z4c_class.h index d279a1d..7cf502b 100644 --- a/AMSS_NCKU_source/Z4c_class.h +++ b/AMSS_NCKU_source/Z4c_class.h @@ -19,7 +19,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "cgh.h" #include "ShellPatch.h" diff --git a/AMSS_NCKU_source/bssnEM_class.h b/AMSS_NCKU_source/bssnEM_class.h index 2bff672..bdbaba8 100644 --- a/AMSS_NCKU_source/bssnEM_class.h +++ b/AMSS_NCKU_source/bssnEM_class.h @@ -19,7 +19,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "cgh.h" #include "ShellPatch.h" diff --git a/AMSS_NCKU_source/bssnEScalar_class.h b/AMSS_NCKU_source/bssnEScalar_class.h index 3e26005..b994f57 100644 --- a/AMSS_NCKU_source/bssnEScalar_class.h +++ b/AMSS_NCKU_source/bssnEScalar_class.h @@ -19,7 +19,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "cgh.h" #include "ShellPatch.h" diff --git a/AMSS_NCKU_source/bssn_class.h b/AMSS_NCKU_source/bssn_class.h index db434e2..e3d99c4 100644 --- a/AMSS_NCKU_source/bssn_class.h +++ b/AMSS_NCKU_source/bssn_class.h @@ -19,7 +19,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "macrodef.h" #include "cgh.h" diff --git a/AMSS_NCKU_source/bssn_gpu_class.h b/AMSS_NCKU_source/bssn_gpu_class.h index 98e844d..f0c7448 100644 --- a/AMSS_NCKU_source/bssn_gpu_class.h +++ b/AMSS_NCKU_source/bssn_gpu_class.h @@ -19,7 +19,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "macrodef.h" #include "cgh.h" diff --git a/AMSS_NCKU_source/cgh.C b/AMSS_NCKU_source/cgh.C index e27ccd6..67ac212 100644 --- a/AMSS_NCKU_source/cgh.C +++ b/AMSS_NCKU_source/cgh.C @@ -20,7 +20,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "macrodef.h" #include "misc.h" diff --git a/AMSS_NCKU_source/cgh.h b/AMSS_NCKU_source/cgh.h index 79e7bf6..b932afe 100644 --- a/AMSS_NCKU_source/cgh.h +++ b/AMSS_NCKU_source/cgh.h @@ -2,7 +2,11 @@ #ifndef CGH_H #define CGH_H +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "MyList.h" #include "MPatch.h" #include "macrodef.h" diff --git a/AMSS_NCKU_source/checkpoint.h b/AMSS_NCKU_source/checkpoint.h index 6571766..e965e08 100644 --- a/AMSS_NCKU_source/checkpoint.h +++ b/AMSS_NCKU_source/checkpoint.h @@ -19,7 +19,11 @@ using namespace std; #include #include +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "var.h" #include "MyList.h" diff --git a/AMSS_NCKU_source/expansion.C b/AMSS_NCKU_source/expansion.C index 44c5f3c..4cbbe2a 100644 --- a/AMSS_NCKU_source/expansion.C +++ b/AMSS_NCKU_source/expansion.C @@ -6,7 +6,11 @@ #include #include #include +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "util_Table.h" #include "cctk.h" diff --git a/AMSS_NCKU_source/find_horizons.C b/AMSS_NCKU_source/find_horizons.C index fb5f014..efe9c6f 100644 --- a/AMSS_NCKU_source/find_horizons.C +++ b/AMSS_NCKU_source/find_horizons.C @@ -6,7 +6,11 @@ #include #include #include +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "cctk.h" diff --git a/AMSS_NCKU_source/makefile.inc b/AMSS_NCKU_source/makefile.inc index a5fd83d..c2abe44 100755 --- a/AMSS_NCKU_source/makefile.inc +++ b/AMSS_NCKU_source/makefile.inc @@ -14,17 +14,15 @@ LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore ## -fprofile-instr-use: use collected profile data to guide optimization decisions ## (branch prediction, basic block layout, inlining, loop unrolling) PROFDATA = /home/amss/AMSS-NCKU/pgo_profile/default.profdata -CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ - -fprofile-instr-use=$(PROFDATA) \ - -Dfortran3 -Dnewc -I${MKLROOT}/include -f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ - -fprofile-instr-use=$(PROFDATA) \ +CXXAPPFLAGS = -O3 -march=native -fp-model fast=2 -fma -ipo \ + -DMPI_STUB -Dfortran3 -Dnewc -I${MKLROOT}/include +f90appflags = -O3 -march=native -fp-model fast=2 -fma -ipo \ -align array64byte -fpp -I${MKLROOT}/include f90 = ifx f77 = ifx CXX = icpx CC = icx -CLINKER = mpiicpx +CLINKER = icpx Cu = nvcc CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include diff --git a/AMSS_NCKU_source/misc.C b/AMSS_NCKU_source/misc.C index b692485..09e0f19 100644 --- a/AMSS_NCKU_source/misc.C +++ b/AMSS_NCKU_source/misc.C @@ -14,7 +14,11 @@ using namespace std; #include #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "misc.h" #include "macrodef.h" diff --git a/AMSS_NCKU_source/misc.h b/AMSS_NCKU_source/misc.h index 3b9ddcc..c96448e 100644 --- a/AMSS_NCKU_source/misc.h +++ b/AMSS_NCKU_source/misc.h @@ -24,7 +24,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif namespace misc { diff --git a/AMSS_NCKU_source/monitor.h b/AMSS_NCKU_source/monitor.h index 49692d4..9d8bdd4 100644 --- a/AMSS_NCKU_source/monitor.h +++ b/AMSS_NCKU_source/monitor.h @@ -20,7 +20,11 @@ using namespace std; #endif #include +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif class monitor { diff --git a/AMSS_NCKU_source/mpi_stub.h b/AMSS_NCKU_source/mpi_stub.h new file mode 100644 index 0000000..989cd8b --- /dev/null +++ b/AMSS_NCKU_source/mpi_stub.h @@ -0,0 +1,153 @@ +#ifndef MPI_STUB_H +#define MPI_STUB_H + +/* + * MPI Stub Header — single-process shim for AMSS-NCKU ABE solver. + * Provides all MPI types, constants, and functions used in the codebase + * as no-ops or trivial implementations for nprocs=1, myrank=0. + */ + +#include +#include +#include +#include + +/* ── Types ─────────────────────────────────────────────────────────── */ + +typedef int MPI_Comm; +typedef int MPI_Datatype; +typedef int MPI_Op; +typedef int MPI_Request; +typedef int MPI_Group; + +typedef struct MPI_Status { + int MPI_SOURCE; + int MPI_TAG; + int MPI_ERROR; +} MPI_Status; + +/* ── Constants ─────────────────────────────────────────────────────── */ + +#define MPI_COMM_WORLD 0 + +#define MPI_INT 1 +#define MPI_DOUBLE 2 +#define MPI_DOUBLE_PRECISION 2 +#define MPI_DOUBLE_INT 3 + +#define MPI_SUM 1 +#define MPI_MAX 2 +#define MPI_MAXLOC 3 + +#define MPI_STATUS_IGNORE ((MPI_Status *)0) +#define MPI_STATUSES_IGNORE ((MPI_Status *)0) + +#define MPI_MAX_PROCESSOR_NAME 256 + +/* ── Helper: sizeof for MPI_Datatype ──────────────────────────────── */ + +static inline size_t mpi_stub_sizeof(MPI_Datatype type) { + switch (type) { + case MPI_INT: return sizeof(int); + case MPI_DOUBLE: return sizeof(double); + case MPI_DOUBLE_INT: return sizeof(double) + sizeof(int); + default: return 0; + } +} + +/* ── Init / Finalize ──────────────────────────────────────────────── */ + +static inline int MPI_Init(int *, char ***) { return 0; } +static inline int MPI_Finalize() { return 0; } + +/* ── Communicator queries ─────────────────────────────────────────── */ + +static inline int MPI_Comm_rank(MPI_Comm, int *rank) { *rank = 0; return 0; } +static inline int MPI_Comm_size(MPI_Comm, int *size) { *size = 1; return 0; } +static inline int MPI_Comm_split(MPI_Comm comm, int, int, MPI_Comm *newcomm) { + *newcomm = comm; + return 0; +} +static inline int MPI_Comm_free(MPI_Comm *) { return 0; } + +/* ── Group operations ─────────────────────────────────────────────── */ + +static inline int MPI_Comm_group(MPI_Comm, MPI_Group *group) { + *group = 0; + return 0; +} +static inline int MPI_Group_translate_ranks(MPI_Group, int n, + const int *ranks1, MPI_Group, int *ranks2) { + for (int i = 0; i < n; ++i) ranks2[i] = ranks1[i]; + return 0; +} +static inline int MPI_Group_free(MPI_Group *) { return 0; } + +/* ── Collective operations ────────────────────────────────────────── */ + +static inline int MPI_Allreduce(const void *sendbuf, void *recvbuf, + int count, MPI_Datatype datatype, MPI_Op, MPI_Comm) { + std::memcpy(recvbuf, sendbuf, count * mpi_stub_sizeof(datatype)); + return 0; +} + +static inline int MPI_Iallreduce(const void *sendbuf, void *recvbuf, + int count, MPI_Datatype datatype, MPI_Op, MPI_Comm, + MPI_Request *request) { + std::memcpy(recvbuf, sendbuf, count * mpi_stub_sizeof(datatype)); + *request = 0; + return 0; +} + +static inline int MPI_Bcast(void *, int, MPI_Datatype, int, MPI_Comm) { + return 0; +} + +static inline int MPI_Barrier(MPI_Comm) { return 0; } + +/* ── Point-to-point (never reached with nprocs=1) ─────────────────── */ + +static inline int MPI_Send(const void *, int, MPI_Datatype, int, int, MPI_Comm) { + return 0; +} +static inline int MPI_Recv(void *, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status *) { + return 0; +} +static inline int MPI_Isend(const void *, int, MPI_Datatype, int, int, MPI_Comm, + MPI_Request *req) { + *req = 0; + return 0; +} +static inline int MPI_Irecv(void *, int, MPI_Datatype, int, int, MPI_Comm, + MPI_Request *req) { + *req = 0; + return 0; +} + +/* ── Completion ───────────────────────────────────────────────────── */ + +static inline int MPI_Wait(MPI_Request *, MPI_Status *) { return 0; } +static inline int MPI_Waitall(int, MPI_Request *, MPI_Status *) { return 0; } + +/* ── Utility ──────────────────────────────────────────────────────── */ + +static inline int MPI_Abort(MPI_Comm, int error_code) { + std::fprintf(stderr, "MPI_Abort called with error code %d\n", error_code); + std::exit(error_code); + return 0; +} + +static inline double MPI_Wtime() { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec + (double)ts.tv_nsec * 1.0e-9; +} + +static inline int MPI_Get_processor_name(char *name, int *resultlen) { + const char *stub_name = "localhost"; + std::strcpy(name, stub_name); + *resultlen = (int)std::strlen(stub_name); + return 0; +} + +#endif /* MPI_STUB_H */ diff --git a/AMSS_NCKU_source/parameters.h b/AMSS_NCKU_source/parameters.h index edd0a71..c67893d 100644 --- a/AMSS_NCKU_source/parameters.h +++ b/AMSS_NCKU_source/parameters.h @@ -24,7 +24,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif namespace parameters { diff --git a/AMSS_NCKU_source/perf.h b/AMSS_NCKU_source/perf.h index c16723d..1b7741b 100644 --- a/AMSS_NCKU_source/perf.h +++ b/AMSS_NCKU_source/perf.h @@ -30,7 +30,11 @@ using namespace std; #include #include +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif /* Real time */ #define TimerSignal SIGALRM diff --git a/AMSS_NCKU_source/scalar_class.h b/AMSS_NCKU_source/scalar_class.h index aab5aa4..d23f5b4 100644 --- a/AMSS_NCKU_source/scalar_class.h +++ b/AMSS_NCKU_source/scalar_class.h @@ -19,7 +19,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "cgh.h" #include "ShellPatch.h" diff --git a/AMSS_NCKU_source/scalarwaves.C b/AMSS_NCKU_source/scalarwaves.C index 9f465d9..eaca784 100644 --- a/AMSS_NCKU_source/scalarwaves.C +++ b/AMSS_NCKU_source/scalarwaves.C @@ -18,7 +18,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "misc.h" #include "microdef.h" diff --git a/AMSS_NCKU_source/setup.C b/AMSS_NCKU_source/setup.C index e760067..8012b49 100644 --- a/AMSS_NCKU_source/setup.C +++ b/AMSS_NCKU_source/setup.C @@ -3,7 +3,11 @@ #include #include +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "util_Table.h" #include "cctk.h" diff --git a/AMSS_NCKU_source/surface_integral.C b/AMSS_NCKU_source/surface_integral.C index c2b7b67..a1ff57f 100644 --- a/AMSS_NCKU_source/surface_integral.C +++ b/AMSS_NCKU_source/surface_integral.C @@ -20,7 +20,11 @@ using namespace std; #include #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "misc.h" #include "cgh.h" diff --git a/AMSS_NCKU_source/testNull.C b/AMSS_NCKU_source/testNull.C index d09293e..ab07fc6 100644 --- a/AMSS_NCKU_source/testNull.C +++ b/AMSS_NCKU_source/testNull.C @@ -18,7 +18,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "misc.h" #include "macrodef.h" diff --git a/AMSS_NCKU_source/testNull2.C b/AMSS_NCKU_source/testNull2.C index ef5697c..ef5bc38 100644 --- a/AMSS_NCKU_source/testNull2.C +++ b/AMSS_NCKU_source/testNull2.C @@ -20,7 +20,11 @@ using namespace std; #include #endif +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "misc.h" #include "macrodef.h" diff --git a/AMSS_NCKU_source/var.C b/AMSS_NCKU_source/var.C index 47a98ff..3d9420c 100644 --- a/AMSS_NCKU_source/var.C +++ b/AMSS_NCKU_source/var.C @@ -9,7 +9,11 @@ using namespace std; #include +#ifdef MPI_STUB +#include "mpi_stub.h" +#else #include +#endif #include "var.h" diff --git a/makefile_and_run.py b/makefile_and_run.py index 096ed58..c8b0ddc 100755 --- a/makefile_and_run.py +++ b/makefile_and_run.py @@ -16,7 +16,8 @@ import time ## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111) ## Format: taskset -c 4-55,60-111 ensures processes only run on these cores #NUMACTL_CPU_BIND = "taskset -c 0-111" -NUMACTL_CPU_BIND = "taskset -c 16-47,64-95" +#NUMACTL_CPU_BIND = "taskset -c 16-47,64-95" +NUMACTL_CPU_BIND = "taskset -c 8-15" ## Build parallelism configuration ## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores @@ -115,29 +116,28 @@ def run_ABE(): print( ) ## Define the command to run; cast other values to strings as needed - + if (input_data.GPU_Calculation == "no"): - mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE" - #mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE" - mpi_command_outfile = "ABE_out.log" + run_command = NUMACTL_CPU_BIND + " ./ABE" + run_command_outfile = "ABE_out.log" elif (input_data.GPU_Calculation == "yes"): - mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU" - mpi_command_outfile = "ABEGPU_out.log" - - ## Execute the MPI command and stream output - mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) + run_command = NUMACTL_CPU_BIND + " ./ABEGPU" + run_command_outfile = "ABEGPU_out.log" + + ## Execute the command and stream output + run_process = subprocess.Popen(run_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) ## Write ABE run output to file while printing to stdout - with open(mpi_command_outfile, 'w') as file0: + with open(run_command_outfile, 'w') as file0: ## Read and print output lines; also write each line to file - for line in mpi_process.stdout: + for line in run_process.stdout: print(line, end='') # stream output in real time file0.write(line) # write the line to file file0.flush() # flush to ensure each line is written immediately (optional) file0.close() ## Wait for the process to finish - mpi_return_code = mpi_process.wait() + run_return_code = run_process.wait() print( ) print( " The ABE/ABEGPU simulation is finished " )