Unpack intermediate sync stages directly to GPU
This commit is contained in:
@@ -56,6 +56,11 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
|
||||
|
||||
const bool BB = fgt(PhysTime, StartTime, dT_lev / 2);
|
||||
(void)BB;
|
||||
#if (MAPBH == 0)
|
||||
const bool need_host_stage_sync = (BH_num > 0 && lev == GH->levels - 1);
|
||||
#else
|
||||
const bool need_host_stage_sync = false;
|
||||
#endif
|
||||
double ndeps = (lev < GH->movls) ? numepsb : numepss;
|
||||
double TRK4 = PhysTime;
|
||||
int iter_count = 0;
|
||||
@@ -372,8 +377,8 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
|
||||
|
||||
Parallel::AsyncSyncState async_pre;
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
||||
if (!ERROR)
|
||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry, need_host_stage_sync);
|
||||
if (!ERROR && need_host_stage_sync)
|
||||
refresh_stage_device_after_sync(SynchList_pre, sync_cache_pre[lev]);
|
||||
|
||||
MPI_Wait(&err_req_pre, MPI_STATUS_IGNORE);
|
||||
@@ -465,8 +470,9 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
|
||||
|
||||
Parallel::AsyncSyncState async_cor;
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
||||
if (!ERROR && iter_count < 3)
|
||||
const bool unpack_cor_to_host = (iter_count == 3) || need_host_stage_sync;
|
||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry, unpack_cor_to_host);
|
||||
if (!ERROR && iter_count < 3 && unpack_cor_to_host)
|
||||
refresh_stage_device_after_sync(SynchList_cor, sync_cache_cor[lev]);
|
||||
|
||||
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
|
||||
|
||||
Reference in New Issue
Block a user