From e09ae438a2434a2b77fbf1d5cecfd537160e2fa2 Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Tue, 10 Feb 2026 21:39:22 +0800 Subject: [PATCH] Cache data_packer lengths in Sync_start to skip redundant buffer-size traversals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The data_packer(NULL, ...) calls that compute send/recv buffer lengths traverse all grid segments × variables × nprocs on every Sync_start invocation, even though lengths never change once the cache is built. Add a lengths_valid flag to SyncCache so these length computations are done once and reused on subsequent calls (4× per RK4 step). Co-Authored-By: Claude Opus 4.6 (1M context) --- AMSS_NCKU_source/Parallel.C | 32 +++++++++++++++++++++++++------- AMSS_NCKU_source/Parallel.h | 1 + 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/AMSS_NCKU_source/Parallel.C b/AMSS_NCKU_source/Parallel.C index d90cdeb..0cd50a2 100644 --- a/AMSS_NCKU_source/Parallel.C +++ b/AMSS_NCKU_source/Parallel.C @@ -3853,7 +3853,8 @@ void Parallel::Sync_merged(MyList *PatL, MyList *VarList, int Symmet Parallel::SyncCache::SyncCache() : valid(false), cpusize(0), combined_src(0), combined_dst(0), send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0), - send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0) + send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0), + lengths_valid(false) { } // SyncCache invalidate: free grid segment lists but keep buffers @@ -3871,6 +3872,7 @@ void Parallel::SyncCache::invalidate() send_lengths[i] = recv_lengths[i] = 0; } valid = false; + lengths_valid = false; } // SyncCache destroy: free everything void Parallel::SyncCache::destroy() @@ -4172,8 +4174,13 @@ void Parallel::Sync_start(MyList *PatL, MyList *VarList, int Symmetr { if (node == myrank) { - int length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); - cache.recv_lengths[node] = length; + int length; + if (!cache.lengths_valid) { + length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); + cache.recv_lengths[node] = length; + } else { + length = cache.recv_lengths[node]; + } if (length > 0) { if (length > cache.recv_buf_caps[node]) @@ -4187,8 +4194,13 @@ void Parallel::Sync_start(MyList *PatL, MyList *VarList, int Symmetr } else { - int slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); - cache.send_lengths[node] = slength; + int slength; + if (!cache.lengths_valid) { + slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); + cache.send_lengths[node] = slength; + } else { + slength = cache.send_lengths[node]; + } if (slength > 0) { if (slength > cache.send_buf_caps[node]) @@ -4200,8 +4212,13 @@ void Parallel::Sync_start(MyList *PatL, MyList *VarList, int Symmetr data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++); } - int rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry); - cache.recv_lengths[node] = rlength; + int rlength; + if (!cache.lengths_valid) { + rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry); + cache.recv_lengths[node] = rlength; + } else { + rlength = cache.recv_lengths[node]; + } if (rlength > 0) { if (rlength > cache.recv_buf_caps[node]) @@ -4214,6 +4231,7 @@ void Parallel::Sync_start(MyList *PatL, MyList *VarList, int Symmetr } } } + cache.lengths_valid = true; } // Sync_finish: wait for async MPI operations and unpack void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state, diff --git a/AMSS_NCKU_source/Parallel.h b/AMSS_NCKU_source/Parallel.h index 7935727..6ab22af 100644 --- a/AMSS_NCKU_source/Parallel.h +++ b/AMSS_NCKU_source/Parallel.h @@ -97,6 +97,7 @@ namespace Parallel MPI_Request *reqs; MPI_Status *stats; int max_reqs; + bool lengths_valid; SyncCache(); void invalidate(); void destroy();