Optimize MPI Sync with merged transfers, caching, and async overlap
Phase 1: Merge N+1 transfer() calls into a single transfer() per Sync(PatchList), reducing N+1 MPI_Waitall barriers to 1 via new Sync_merged() that collects all intra-patch and inter-patch grid segment lists into combined per-rank arrays. Phase 2: Cache grid segment lists and reuse grow-only communication buffers across RK4 substeps via SyncCache struct. Caches are per-level and per-variable-list (predictor/corrector), invalidated on regrid. Eliminates redundant build_ghost_gsl/build_owned_gsl0/build_gstl rebuilds and malloc/free cycles between regrids. Phase 3: Split Sync into async Sync_start/Sync_finish to overlap Cartesian ghost zone exchange (MPI_Isend/Irecv) with Shell patch synchronization. Uses MPI tag 2 to avoid conflicts with SH->Synch() which uses transfer() with tag 1. Also updates makefile.inc paths and flags for local build environment. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -81,6 +81,42 @@ namespace Parallel
|
||||
int Symmetry);
|
||||
void Sync(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||
void Sync(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
|
||||
void Sync_merged(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry);
|
||||
|
||||
struct SyncCache {
|
||||
bool valid;
|
||||
int cpusize;
|
||||
MyList<gridseg> **combined_src;
|
||||
MyList<gridseg> **combined_dst;
|
||||
int *send_lengths;
|
||||
int *recv_lengths;
|
||||
double **send_bufs;
|
||||
double **recv_bufs;
|
||||
int *send_buf_caps;
|
||||
int *recv_buf_caps;
|
||||
MPI_Request *reqs;
|
||||
MPI_Status *stats;
|
||||
int max_reqs;
|
||||
SyncCache();
|
||||
void invalidate();
|
||||
void destroy();
|
||||
};
|
||||
|
||||
void Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache);
|
||||
void transfer_cached(MyList<gridseg> **src, MyList<gridseg> **dst,
|
||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
||||
int Symmetry, SyncCache &cache);
|
||||
|
||||
struct AsyncSyncState {
|
||||
int req_no;
|
||||
bool active;
|
||||
AsyncSyncState() : req_no(0), active(false) {}
|
||||
};
|
||||
|
||||
void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,
|
||||
SyncCache &cache, AsyncSyncState &state);
|
||||
void Sync_finish(SyncCache &cache, AsyncSyncState &state,
|
||||
MyList<var> *VarList, int Symmetry);
|
||||
void OutBdLow2Hi(Patch *Patc, Patch *Patf,
|
||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||
int Symmetry);
|
||||
|
||||
Reference in New Issue
Block a user