diff --git a/AMSS_NCKU_source/Parallel.C b/AMSS_NCKU_source/Parallel.C index d90cdeb..0cd50a2 100644 --- a/AMSS_NCKU_source/Parallel.C +++ b/AMSS_NCKU_source/Parallel.C @@ -3853,7 +3853,8 @@ void Parallel::Sync_merged(MyList *PatL, MyList *VarList, int Symmet Parallel::SyncCache::SyncCache() : valid(false), cpusize(0), combined_src(0), combined_dst(0), send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0), - send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0) + send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0), + lengths_valid(false) { } // SyncCache invalidate: free grid segment lists but keep buffers @@ -3871,6 +3872,7 @@ void Parallel::SyncCache::invalidate() send_lengths[i] = recv_lengths[i] = 0; } valid = false; + lengths_valid = false; } // SyncCache destroy: free everything void Parallel::SyncCache::destroy() @@ -4172,8 +4174,13 @@ void Parallel::Sync_start(MyList *PatL, MyList *VarList, int Symmetr { if (node == myrank) { - int length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); - cache.recv_lengths[node] = length; + int length; + if (!cache.lengths_valid) { + length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); + cache.recv_lengths[node] = length; + } else { + length = cache.recv_lengths[node]; + } if (length > 0) { if (length > cache.recv_buf_caps[node]) @@ -4187,8 +4194,13 @@ void Parallel::Sync_start(MyList *PatL, MyList *VarList, int Symmetr } else { - int slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); - cache.send_lengths[node] = slength; + int slength; + if (!cache.lengths_valid) { + slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); + cache.send_lengths[node] = slength; + } else { + slength = cache.send_lengths[node]; + } if (slength > 0) { if (slength > cache.send_buf_caps[node]) @@ -4200,8 +4212,13 @@ void Parallel::Sync_start(MyList *PatL, MyList *VarList, int Symmetr data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++); } - int rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry); - cache.recv_lengths[node] = rlength; + int rlength; + if (!cache.lengths_valid) { + rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry); + cache.recv_lengths[node] = rlength; + } else { + rlength = cache.recv_lengths[node]; + } if (rlength > 0) { if (rlength > cache.recv_buf_caps[node]) @@ -4214,6 +4231,7 @@ void Parallel::Sync_start(MyList *PatL, MyList *VarList, int Symmetr } } } + cache.lengths_valid = true; } // Sync_finish: wait for async MPI operations and unpack void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state, diff --git a/AMSS_NCKU_source/Parallel.h b/AMSS_NCKU_source/Parallel.h index 7935727..6ab22af 100644 --- a/AMSS_NCKU_source/Parallel.h +++ b/AMSS_NCKU_source/Parallel.h @@ -97,6 +97,7 @@ namespace Parallel MPI_Request *reqs; MPI_Status *stats; int max_reqs; + bool lengths_valid; SyncCache(); void invalidate(); void destroy();