Refresh synced GPU regions incrementally

This commit is contained in:
2026-04-09 17:07:31 +08:00
parent 5b00d49070
commit 3b16795e78
3 changed files with 183 additions and 3 deletions

View File

@@ -138,6 +138,111 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
}
};
auto ensure_stage_device_var_list =
[&](Block *cg, MyList<var> *var_list) {
const int n = cg->shape[0] * cg->shape[1] * cg->shape[2];
while (var_list)
{
double *host_ptr = cg->fgfs[var_list->data->sgfn];
if (!bssn_gpu_find_device_buffer(host_ptr) &&
bssn_gpu_stage_upload_buffer(host_ptr, n))
{
cerr << "GPU state ensure failure: lev=" << lev
<< " var=" << var_list->data->name
<< " bbox=(" << cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
break;
}
var_list = var_list->next;
}
};
auto refresh_synced_device_regions =
[&](Block *cg, MyList<var> *var_list, Parallel::SyncCache &cache) {
std::vector<Parallel::gridseg *> local_segments;
for (int node = 0; node < cache.cpusize; ++node)
{
MyList<Parallel::gridseg> *seg = cache.combined_dst[node];
while (seg)
{
if (seg->data && seg->data->Bg == cg)
local_segments.push_back(seg->data);
seg = seg->next;
}
}
if (local_segments.empty())
return;
const int n = cg->shape[0] * cg->shape[1] * cg->shape[2];
while (var_list)
{
double *host_ptr = cg->fgfs[var_list->data->sgfn];
if (!bssn_gpu_find_device_buffer(host_ptr))
{
if (bssn_gpu_stage_upload_buffer(host_ptr, n))
{
cerr << "GPU sync refresh upload failure: lev=" << lev
<< " var=" << var_list->data->name
<< " bbox=(" << cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
break;
}
}
else
{
for (size_t i = 0; i < local_segments.size(); ++i)
{
Parallel::gridseg *seg = local_segments[i];
if (bssn_gpu_stage_upload_region(host_ptr,
cg->shape,
cg->bbox,
cg->bbox + dim,
seg->shape,
seg->llb))
{
cerr << "GPU sync region refresh failure: lev=" << lev
<< " var=" << var_list->data->name
<< " bbox=(" << cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
break;
}
}
if (ERROR)
break;
}
var_list = var_list->next;
}
};
auto refresh_stage_device_after_sync =
[&](MyList<var> *var_list, Parallel::SyncCache &cache) {
MyList<Patch> *patch_it = GH->PatL[lev];
while (patch_it)
{
MyList<Block> *block_it = patch_it->data->blb;
while (block_it)
{
Block *cg = block_it->data;
if (myrank == cg->rank)
refresh_synced_device_regions(cg, var_list, cache);
if (block_it == patch_it->data->ble)
break;
block_it = block_it->next;
}
if (ERROR)
break;
patch_it = patch_it->next;
}
};
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
@@ -182,7 +287,8 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
Parallel::AsyncSyncState async_pre;
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
bssn_gpu_clear_cached_device_buffers();
if (!ERROR)
refresh_stage_device_after_sync(SynchList_pre, sync_cache_pre[lev]);
MPI_Wait(&err_req_pre, MPI_STATUS_IGNORE);
if (ERROR)
@@ -234,7 +340,7 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
Block *cg = BP->data;
if (myrank == cg->rank)
{
stage_upload_var_list(cg, SynchList_pre);
ensure_stage_device_var_list(cg, SynchList_pre);
if (gpu_rhs(CALLED_BY_STEP, myrank, RHS_PARA_CALLED_THEN))
ERROR = 1;
@@ -270,7 +376,8 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
Parallel::AsyncSyncState async_cor;
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
bssn_gpu_clear_cached_device_buffers();
if (!ERROR && iter_count < 3)
refresh_stage_device_after_sync(SynchList_cor, sync_cache_cor[lev]);
MPI_Wait(&err_req_cor, MPI_STATUS_IGNORE);
if (ERROR)