Remove MPI dependency, replace with single-process stub for non-MPI builds

- Add mpi_stub.h providing all MPI types/constants/functions as no-ops
  (nprocs=1, myrank=0) with memcpy-based Allreduce and clock_gettime Wtime
- Replace #include <mpi.h> with conditional #ifdef MPI_STUB in 31 files
  (19 headers + 12 source files) preserving ability to build with real MPI
- Change makefile.inc: CLINKER mpiicpx->icpx, add -DMPI_STUB to CXXAPPFLAGS
- Update makefile_and_run.py: run ./ABE directly instead of mpirun -np N
- Set MPI_processes=1 in AMSS_NCKU_Input.py

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-02-10 22:51:11 +08:00
parent d06d5b4db8
commit caf192b2e4
35 changed files with 295 additions and 20 deletions

View File

@@ -16,7 +16,7 @@ import numpy
File_directory = "GW150914" ## output file directory
Output_directory = "binary_output" ## binary data file directory
## The file directory name should not be too long
MPI_processes = 64 ## number of mpi processes used in the simulation
MPI_processes = 1 ## number of processes (MPI removed, single-process mode)
GPU_Calculation = "no" ## Use GPU or not
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)

View File

@@ -20,7 +20,11 @@ using namespace std;
#include <map.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "misc.h"
#include "macrodef.h"

View File

@@ -19,7 +19,11 @@ using namespace std;
#include <math.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#define PI M_PI

View File

@@ -2,7 +2,11 @@
#ifndef BLOCK_H
#define BLOCK_H
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "macrodef.h" //need dim here; Vertex or Cell
#include "var.h"
#include "MyList.h"

View File

@@ -4,7 +4,11 @@
#include <stdarg.h>
#include <string.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "myglobal.h"

View File

@@ -2,7 +2,11 @@
#ifndef PATCH_H
#define PATCH_H
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "MyList.h"
#include "Block.h"
#include "var.h"

View File

@@ -8,7 +8,11 @@
#include <limits.h>
#include <float.h>
#include <math.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "util_Table.h"
#include "cctk.h"

View File

@@ -23,7 +23,11 @@ using namespace std;
#include <complex.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "MyList.h"
#include "Block.h"
#include "Parallel.h"

View File

@@ -23,7 +23,11 @@ using namespace std;
#include <complex.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "MyList.h"
#include "Block.h"
#include "Parallel.h"

View File

@@ -2,7 +2,11 @@
#ifndef SHELLPATCH_H
#define SHELLPATCH_H
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "MyList.h"
#include "Block.h"
#include "Parallel.h"

View File

@@ -19,7 +19,11 @@ using namespace std;
#include <math.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "cgh.h"
#include "ShellPatch.h"

View File

@@ -19,7 +19,11 @@ using namespace std;
#include <math.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "cgh.h"
#include "ShellPatch.h"

View File

@@ -19,7 +19,11 @@ using namespace std;
#include <math.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "cgh.h"
#include "ShellPatch.h"

View File

@@ -19,7 +19,11 @@ using namespace std;
#include <math.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "macrodef.h"
#include "cgh.h"

View File

@@ -19,7 +19,11 @@ using namespace std;
#include <math.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "macrodef.h"
#include "cgh.h"

View File

@@ -20,7 +20,11 @@ using namespace std;
#include <map.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "macrodef.h"
#include "misc.h"

View File

@@ -2,7 +2,11 @@
#ifndef CGH_H
#define CGH_H
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "MyList.h"
#include "MPatch.h"
#include "macrodef.h"

View File

@@ -19,7 +19,11 @@ using namespace std;
#include <time.h>
#include <stdlib.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "var.h"
#include "MyList.h"

View File

@@ -6,7 +6,11 @@
#include <stdio.h>
#include <assert.h>
#include <math.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "util_Table.h"
#include "cctk.h"

View File

@@ -6,7 +6,11 @@
#include <stdio.h>
#include <assert.h>
#include <math.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "cctk.h"

View File

@@ -14,17 +14,15 @@ LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore
## -fprofile-instr-use: use collected profile data to guide optimization decisions
## (branch prediction, basic block layout, inlining, loop unrolling)
PROFDATA = /home/amss/AMSS-NCKU/pgo_profile/default.profdata
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-fprofile-instr-use=$(PROFDATA) \
-Dfortran3 -Dnewc -I${MKLROOT}/include
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
-fprofile-instr-use=$(PROFDATA) \
CXXAPPFLAGS = -O3 -march=native -fp-model fast=2 -fma -ipo \
-DMPI_STUB -Dfortran3 -Dnewc -I${MKLROOT}/include
f90appflags = -O3 -march=native -fp-model fast=2 -fma -ipo \
-align array64byte -fpp -I${MKLROOT}/include
f90 = ifx
f77 = ifx
CXX = icpx
CC = icx
CLINKER = mpiicpx
CLINKER = icpx
Cu = nvcc
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include

View File

@@ -14,7 +14,11 @@ using namespace std;
#include <string.h>
#include <math.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "misc.h"
#include "macrodef.h"

View File

@@ -24,7 +24,11 @@ using namespace std;
#include <complex.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
namespace misc
{

View File

@@ -20,7 +20,11 @@ using namespace std;
#endif
#include <time.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
class monitor
{

153
AMSS_NCKU_source/mpi_stub.h Normal file
View File

@@ -0,0 +1,153 @@
#ifndef MPI_STUB_H
#define MPI_STUB_H
/*
* MPI Stub Header — single-process shim for AMSS-NCKU ABE solver.
* Provides all MPI types, constants, and functions used in the codebase
* as no-ops or trivial implementations for nprocs=1, myrank=0.
*/
#include <cstring>
#include <cstdlib>
#include <cstdio>
#include <time.h>
/* ── Types ─────────────────────────────────────────────────────────── */
typedef int MPI_Comm;
typedef int MPI_Datatype;
typedef int MPI_Op;
typedef int MPI_Request;
typedef int MPI_Group;
typedef struct MPI_Status {
int MPI_SOURCE;
int MPI_TAG;
int MPI_ERROR;
} MPI_Status;
/* ── Constants ─────────────────────────────────────────────────────── */
#define MPI_COMM_WORLD 0
#define MPI_INT 1
#define MPI_DOUBLE 2
#define MPI_DOUBLE_PRECISION 2
#define MPI_DOUBLE_INT 3
#define MPI_SUM 1
#define MPI_MAX 2
#define MPI_MAXLOC 3
#define MPI_STATUS_IGNORE ((MPI_Status *)0)
#define MPI_STATUSES_IGNORE ((MPI_Status *)0)
#define MPI_MAX_PROCESSOR_NAME 256
/* ── Helper: sizeof for MPI_Datatype ──────────────────────────────── */
static inline size_t mpi_stub_sizeof(MPI_Datatype type) {
switch (type) {
case MPI_INT: return sizeof(int);
case MPI_DOUBLE: return sizeof(double);
case MPI_DOUBLE_INT: return sizeof(double) + sizeof(int);
default: return 0;
}
}
/* ── Init / Finalize ──────────────────────────────────────────────── */
static inline int MPI_Init(int *, char ***) { return 0; }
static inline int MPI_Finalize() { return 0; }
/* ── Communicator queries ─────────────────────────────────────────── */
static inline int MPI_Comm_rank(MPI_Comm, int *rank) { *rank = 0; return 0; }
static inline int MPI_Comm_size(MPI_Comm, int *size) { *size = 1; return 0; }
static inline int MPI_Comm_split(MPI_Comm comm, int, int, MPI_Comm *newcomm) {
*newcomm = comm;
return 0;
}
static inline int MPI_Comm_free(MPI_Comm *) { return 0; }
/* ── Group operations ─────────────────────────────────────────────── */
static inline int MPI_Comm_group(MPI_Comm, MPI_Group *group) {
*group = 0;
return 0;
}
static inline int MPI_Group_translate_ranks(MPI_Group, int n,
const int *ranks1, MPI_Group, int *ranks2) {
for (int i = 0; i < n; ++i) ranks2[i] = ranks1[i];
return 0;
}
static inline int MPI_Group_free(MPI_Group *) { return 0; }
/* ── Collective operations ────────────────────────────────────────── */
static inline int MPI_Allreduce(const void *sendbuf, void *recvbuf,
int count, MPI_Datatype datatype, MPI_Op, MPI_Comm) {
std::memcpy(recvbuf, sendbuf, count * mpi_stub_sizeof(datatype));
return 0;
}
static inline int MPI_Iallreduce(const void *sendbuf, void *recvbuf,
int count, MPI_Datatype datatype, MPI_Op, MPI_Comm,
MPI_Request *request) {
std::memcpy(recvbuf, sendbuf, count * mpi_stub_sizeof(datatype));
*request = 0;
return 0;
}
static inline int MPI_Bcast(void *, int, MPI_Datatype, int, MPI_Comm) {
return 0;
}
static inline int MPI_Barrier(MPI_Comm) { return 0; }
/* ── Point-to-point (never reached with nprocs=1) ─────────────────── */
static inline int MPI_Send(const void *, int, MPI_Datatype, int, int, MPI_Comm) {
return 0;
}
static inline int MPI_Recv(void *, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status *) {
return 0;
}
static inline int MPI_Isend(const void *, int, MPI_Datatype, int, int, MPI_Comm,
MPI_Request *req) {
*req = 0;
return 0;
}
static inline int MPI_Irecv(void *, int, MPI_Datatype, int, int, MPI_Comm,
MPI_Request *req) {
*req = 0;
return 0;
}
/* ── Completion ───────────────────────────────────────────────────── */
static inline int MPI_Wait(MPI_Request *, MPI_Status *) { return 0; }
static inline int MPI_Waitall(int, MPI_Request *, MPI_Status *) { return 0; }
/* ── Utility ──────────────────────────────────────────────────────── */
static inline int MPI_Abort(MPI_Comm, int error_code) {
std::fprintf(stderr, "MPI_Abort called with error code %d\n", error_code);
std::exit(error_code);
return 0;
}
static inline double MPI_Wtime() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.0e-9;
}
static inline int MPI_Get_processor_name(char *name, int *resultlen) {
const char *stub_name = "localhost";
std::strcpy(name, stub_name);
*resultlen = (int)std::strlen(stub_name);
return 0;
}
#endif /* MPI_STUB_H */

View File

@@ -24,7 +24,11 @@ using namespace std;
#include <map.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
namespace parameters
{

View File

@@ -30,7 +30,11 @@ using namespace std;
#include <sys/time.h>
#include <sys/resource.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
/* Real time */
#define TimerSignal SIGALRM

View File

@@ -19,7 +19,11 @@ using namespace std;
#include <math.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "cgh.h"
#include "ShellPatch.h"

View File

@@ -18,7 +18,11 @@ using namespace std;
#include <math.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "misc.h"
#include "microdef.h"

View File

@@ -3,7 +3,11 @@
#include <math.h>
#include <string.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "util_Table.h"
#include "cctk.h"

View File

@@ -20,7 +20,11 @@ using namespace std;
#include <math.h>
#include <map.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "misc.h"
#include "cgh.h"

View File

@@ -18,7 +18,11 @@ using namespace std;
#include <math.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "misc.h"
#include "macrodef.h"

View File

@@ -20,7 +20,11 @@ using namespace std;
#include <map.h>
#endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "misc.h"
#include "macrodef.h"

View File

@@ -9,7 +9,11 @@
using namespace std;
#include <time.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h>
#endif
#include "var.h"

View File

@@ -16,7 +16,8 @@ import time
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
#NUMACTL_CPU_BIND = "taskset -c 0-111"
NUMACTL_CPU_BIND = "taskset -c 16-47,64-95"
#NUMACTL_CPU_BIND = "taskset -c 16-47,64-95"
NUMACTL_CPU_BIND = "taskset -c 8-15"
## Build parallelism configuration
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
@@ -115,29 +116,28 @@ def run_ABE():
print( )
## Define the command to run; cast other values to strings as needed
if (input_data.GPU_Calculation == "no"):
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
mpi_command_outfile = "ABE_out.log"
run_command = NUMACTL_CPU_BIND + " ./ABE"
run_command_outfile = "ABE_out.log"
elif (input_data.GPU_Calculation == "yes"):
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
mpi_command_outfile = "ABEGPU_out.log"
## Execute the MPI command and stream output
mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
run_command = NUMACTL_CPU_BIND + " ./ABEGPU"
run_command_outfile = "ABEGPU_out.log"
## Execute the command and stream output
run_process = subprocess.Popen(run_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Write ABE run output to file while printing to stdout
with open(mpi_command_outfile, 'w') as file0:
with open(run_command_outfile, 'w') as file0:
## Read and print output lines; also write each line to file
for line in mpi_process.stdout:
for line in run_process.stdout:
print(line, end='') # stream output in real time
file0.write(line) # write the line to file
file0.flush() # flush to ensure each line is written immediately (optional)
file0.close()
## Wait for the process to finish
mpi_return_code = mpi_process.wait()
run_return_code = run_process.wait()
print( )
print( " The ABE/ABEGPU simulation is finished " )