Cache data_packer lengths in Sync_start to skip redundant buffer-size traversals
The data_packer(NULL, ...) calls that compute send/recv buffer lengths traverse all grid segments × variables × nprocs on every Sync_start invocation, even though lengths never change once the cache is built. Add a lengths_valid flag to SyncCache so these length computations are done once and reused on subsequent calls (4× per RK4 step). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3853,7 +3853,8 @@ void Parallel::Sync_merged(MyList<Patch> *PatL, MyList<var> *VarList, int Symmet
|
|||||||
Parallel::SyncCache::SyncCache()
|
Parallel::SyncCache::SyncCache()
|
||||||
: valid(false), cpusize(0), combined_src(0), combined_dst(0),
|
: valid(false), cpusize(0), combined_src(0), combined_dst(0),
|
||||||
send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0),
|
send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0),
|
||||||
send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0)
|
send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0),
|
||||||
|
lengths_valid(false)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
// SyncCache invalidate: free grid segment lists but keep buffers
|
// SyncCache invalidate: free grid segment lists but keep buffers
|
||||||
@@ -3871,6 +3872,7 @@ void Parallel::SyncCache::invalidate()
|
|||||||
send_lengths[i] = recv_lengths[i] = 0;
|
send_lengths[i] = recv_lengths[i] = 0;
|
||||||
}
|
}
|
||||||
valid = false;
|
valid = false;
|
||||||
|
lengths_valid = false;
|
||||||
}
|
}
|
||||||
// SyncCache destroy: free everything
|
// SyncCache destroy: free everything
|
||||||
void Parallel::SyncCache::destroy()
|
void Parallel::SyncCache::destroy()
|
||||||
@@ -4172,8 +4174,13 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
|
|||||||
{
|
{
|
||||||
if (node == myrank)
|
if (node == myrank)
|
||||||
{
|
{
|
||||||
int length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
int length;
|
||||||
cache.recv_lengths[node] = length;
|
if (!cache.lengths_valid) {
|
||||||
|
length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||||||
|
cache.recv_lengths[node] = length;
|
||||||
|
} else {
|
||||||
|
length = cache.recv_lengths[node];
|
||||||
|
}
|
||||||
if (length > 0)
|
if (length > 0)
|
||||||
{
|
{
|
||||||
if (length > cache.recv_buf_caps[node])
|
if (length > cache.recv_buf_caps[node])
|
||||||
@@ -4187,8 +4194,13 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
int slength;
|
||||||
cache.send_lengths[node] = slength;
|
if (!cache.lengths_valid) {
|
||||||
|
slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||||||
|
cache.send_lengths[node] = slength;
|
||||||
|
} else {
|
||||||
|
slength = cache.send_lengths[node];
|
||||||
|
}
|
||||||
if (slength > 0)
|
if (slength > 0)
|
||||||
{
|
{
|
||||||
if (slength > cache.send_buf_caps[node])
|
if (slength > cache.send_buf_caps[node])
|
||||||
@@ -4200,8 +4212,13 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
|
|||||||
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||||||
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++);
|
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++);
|
||||||
}
|
}
|
||||||
int rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry);
|
int rlength;
|
||||||
cache.recv_lengths[node] = rlength;
|
if (!cache.lengths_valid) {
|
||||||
|
rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry);
|
||||||
|
cache.recv_lengths[node] = rlength;
|
||||||
|
} else {
|
||||||
|
rlength = cache.recv_lengths[node];
|
||||||
|
}
|
||||||
if (rlength > 0)
|
if (rlength > 0)
|
||||||
{
|
{
|
||||||
if (rlength > cache.recv_buf_caps[node])
|
if (rlength > cache.recv_buf_caps[node])
|
||||||
@@ -4214,6 +4231,7 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
cache.lengths_valid = true;
|
||||||
}
|
}
|
||||||
// Sync_finish: wait for async MPI operations and unpack
|
// Sync_finish: wait for async MPI operations and unpack
|
||||||
void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state,
|
void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state,
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ namespace Parallel
|
|||||||
MPI_Request *reqs;
|
MPI_Request *reqs;
|
||||||
MPI_Status *stats;
|
MPI_Status *stats;
|
||||||
int max_reqs;
|
int max_reqs;
|
||||||
|
bool lengths_valid;
|
||||||
SyncCache();
|
SyncCache();
|
||||||
void invalidate();
|
void invalidate();
|
||||||
void destroy();
|
void destroy();
|
||||||
|
|||||||
Reference in New Issue
Block a user