diff --git a/AMSS_NCKU_source/Parallel.C b/AMSS_NCKU_source/Parallel.C index 4e5e4ec..b87ce6d 100644 --- a/AMSS_NCKU_source/Parallel.C +++ b/AMSS_NCKU_source/Parallel.C @@ -4320,7 +4320,7 @@ Parallel::SyncCache::SyncCache() : valid(false), cpusize(0), combined_src(0), combined_dst(0), send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0), send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0), - lengths_valid(false) + lengths_valid(false), tc_req_node(0), tc_req_is_recv(0), tc_completed(0) { } // SyncCache invalidate: free grid segment lists but keep buffers @@ -4359,11 +4359,15 @@ void Parallel::SyncCache::destroy() if (recv_bufs) delete[] recv_bufs; if (reqs) delete[] reqs; if (stats) delete[] stats; + if (tc_req_node) delete[] tc_req_node; + if (tc_req_is_recv) delete[] tc_req_is_recv; + if (tc_completed) delete[] tc_completed; combined_src = combined_dst = 0; send_lengths = recv_lengths = 0; send_buf_caps = recv_buf_caps = 0; send_bufs = recv_bufs = 0; reqs = 0; stats = 0; + tc_req_node = 0; tc_req_is_recv = 0; tc_completed = 0; cpusize = 0; max_reqs = 0; } // transfer_cached: reuse pre-allocated buffers from SyncCache @@ -4379,9 +4383,9 @@ void Parallel::transfer_cached(MyList **src, MyList **src, MyList 0) data_packer(cache.recv_bufs[myrank], src[myrank], dst[myrank], myrank, UNPACK, VarList1, VarList2, Symmetry); - - delete[] req_node; - delete[] req_is_recv; - delete[] completed; } void Parallel::Sync_cached(MyList *PatL, MyList *VarList, int Symmetry, SyncCache &cache) { @@ -4498,6 +4498,9 @@ void Parallel::Sync_cached(MyList *PatL, MyList *VarList, int Symmet cache.max_reqs = 2 * cpusize; cache.reqs = new MPI_Request[cache.max_reqs]; cache.stats = new MPI_Status[cache.max_reqs]; + cache.tc_req_node = new int[cache.max_reqs]; + cache.tc_req_is_recv = new int[cache.max_reqs]; + cache.tc_completed = new int[cache.max_reqs]; } for (int node = 0; node < cpusize; node++) @@ -4598,6 +4601,9 @@ void Parallel::Sync_start(MyList *PatL, MyList *VarList, int Symmetr cache.max_reqs = 2 * cpusize; cache.reqs = new MPI_Request[cache.max_reqs]; cache.stats = new MPI_Status[cache.max_reqs]; + cache.tc_req_node = new int[cache.max_reqs]; + cache.tc_req_is_recv = new int[cache.max_reqs]; + cache.tc_completed = new int[cache.max_reqs]; } for (int node = 0; node < cpusize; node++) @@ -5856,6 +5862,9 @@ void Parallel::Restrict_cached(MyList *PatcL, MyList *PatfL, cache.max_reqs = 2 * cpusize; cache.reqs = new MPI_Request[cache.max_reqs]; cache.stats = new MPI_Status[cache.max_reqs]; + cache.tc_req_node = new int[cache.max_reqs]; + cache.tc_req_is_recv = new int[cache.max_reqs]; + cache.tc_completed = new int[cache.max_reqs]; } MyList *dst = build_complete_gsl(PatcL); @@ -5902,6 +5911,9 @@ void Parallel::OutBdLow2Hi_cached(MyList *PatcL, MyList *PatfL, cache.max_reqs = 2 * cpusize; cache.reqs = new MPI_Request[cache.max_reqs]; cache.stats = new MPI_Status[cache.max_reqs]; + cache.tc_req_node = new int[cache.max_reqs]; + cache.tc_req_is_recv = new int[cache.max_reqs]; + cache.tc_completed = new int[cache.max_reqs]; } MyList *dst = build_buffer_gsl(PatfL); @@ -5948,6 +5960,9 @@ void Parallel::OutBdLow2Himix_cached(MyList *PatcL, MyList *PatfL, cache.max_reqs = 2 * cpusize; cache.reqs = new MPI_Request[cache.max_reqs]; cache.stats = new MPI_Status[cache.max_reqs]; + cache.tc_req_node = new int[cache.max_reqs]; + cache.tc_req_is_recv = new int[cache.max_reqs]; + cache.tc_completed = new int[cache.max_reqs]; } MyList *dst = build_buffer_gsl(PatfL); diff --git a/AMSS_NCKU_source/Parallel.h b/AMSS_NCKU_source/Parallel.h index 5a72797..0ab975c 100644 --- a/AMSS_NCKU_source/Parallel.h +++ b/AMSS_NCKU_source/Parallel.h @@ -108,6 +108,9 @@ namespace Parallel MPI_Status *stats; int max_reqs; bool lengths_valid; + int *tc_req_node; + int *tc_req_is_recv; + int *tc_completed; SyncCache(); void invalidate(); void destroy();