Unpack intermediate sync stages directly to GPU

This commit is contained in:
2026-04-09 19:01:12 +08:00
parent 4484635f0d
commit 4463f1d23e
5 changed files with 195 additions and 27 deletions

View File

@@ -56,6 +56,11 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
const bool BB = fgt(PhysTime, StartTime, dT_lev / 2);
(void)BB;
#if (MAPBH == 0)
const bool need_host_stage_sync = (BH_num > 0 && lev == GH->levels - 1);
#else
const bool need_host_stage_sync = false;
#endif
double ndeps = (lev < GH->movls) ? numepsb : numepss;
double TRK4 = PhysTime;
int iter_count = 0;
@@ -372,8 +377,8 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
Parallel::AsyncSyncState async_pre;
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
if (!ERROR)
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry, need_host_stage_sync);
if (!ERROR && need_host_stage_sync)
refresh_stage_device_after_sync(SynchList_pre, sync_cache_pre[lev]);
MPI_Wait(&err_req_pre, MPI_STATUS_IGNORE);
@@ -465,8 +470,9 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
Parallel::AsyncSyncState async_cor;
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
if (!ERROR && iter_count < 3)
const bool unpack_cor_to_host = (iter_count == 3) || need_host_stage_sync;
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry, unpack_cor_to_host);
if (!ERROR && iter_count < 3 && unpack_cor_to_host)
refresh_stage_device_after_sync(SynchList_cor, sync_cache_cor[lev]);
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);