Reduce staged GPU host-device copies
This commit is contained in:
@@ -83,7 +83,7 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
|
||||
cg->fgfs[varlr->data->sgfn],
|
||||
varl0->data->propspeed,
|
||||
varl0->data->SoA,
|
||||
Symmetry, lev, rk_stage))
|
||||
Symmetry, lev, rk_stage, false))
|
||||
{
|
||||
cerr << "GPU rk4/boundary failure: lev=" << lev
|
||||
<< " rk_stage=" << rk_stage
|
||||
@@ -101,6 +101,43 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
|
||||
}
|
||||
};
|
||||
|
||||
auto stage_download_var_list =
|
||||
[&](Block *cg, MyList<var> *var_list) {
|
||||
while (var_list)
|
||||
{
|
||||
if (bssn_cuda_download_buffer(cg->shape, cg->fgfs[var_list->data->sgfn]))
|
||||
{
|
||||
cerr << "GPU stage download failure: lev=" << lev
|
||||
<< " var=" << var_list->data->name
|
||||
<< " bbox=(" << cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
break;
|
||||
}
|
||||
var_list = var_list->next;
|
||||
}
|
||||
};
|
||||
|
||||
auto stage_upload_var_list =
|
||||
[&](Block *cg, MyList<var> *var_list) {
|
||||
const int n = cg->shape[0] * cg->shape[1] * cg->shape[2];
|
||||
while (var_list)
|
||||
{
|
||||
if (bssn_gpu_stage_upload_buffer(cg->fgfs[var_list->data->sgfn], n))
|
||||
{
|
||||
cerr << "GPU state upload failure: lev=" << lev
|
||||
<< " var=" << var_list->data->name
|
||||
<< " bbox=(" << cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
break;
|
||||
}
|
||||
var_list = var_list->next;
|
||||
}
|
||||
};
|
||||
|
||||
MyList<Patch> *Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
@@ -110,12 +147,13 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
stage_upload_var_list(cg, StateList);
|
||||
if (gpu_rhs(CALLED_BY_STEP, myrank, RHS_PARA_CALLED_FIRST_TIME))
|
||||
ERROR = 1;
|
||||
|
||||
run_stage_on_block(cg, Pp->data, StateList, StateList, SynchList_pre, RHSList, iter_count);
|
||||
|
||||
if (bssn_cuda_lowerbound(cg->shape, cg->fgfs[phi->sgfn], chitiny))
|
||||
if (bssn_cuda_lowerbound(cg->shape, cg->fgfs[phi->sgfn], chitiny, false))
|
||||
{
|
||||
cerr << "GPU lowerbound failure: lev=" << lev
|
||||
<< " rk_stage=" << iter_count
|
||||
@@ -125,6 +163,8 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
if (!ERROR)
|
||||
stage_download_var_list(cg, SynchList_pre);
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
@@ -194,12 +234,13 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
stage_upload_var_list(cg, SynchList_pre);
|
||||
if (gpu_rhs(CALLED_BY_STEP, myrank, RHS_PARA_CALLED_THEN))
|
||||
ERROR = 1;
|
||||
|
||||
run_stage_on_block(cg, Pp->data, StateList, SynchList_pre, SynchList_cor, RHSList, iter_count);
|
||||
|
||||
if (bssn_cuda_lowerbound(cg->shape, cg->fgfs[phi1->sgfn], chitiny))
|
||||
if (bssn_cuda_lowerbound(cg->shape, cg->fgfs[phi1->sgfn], chitiny, false))
|
||||
{
|
||||
cerr << "GPU lowerbound failure: lev=" << lev
|
||||
<< " rk_stage=" << iter_count
|
||||
@@ -209,6 +250,8 @@ void bssn_class::Step_MainPath_GPU(int lev, int YN)
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
if (!ERROR)
|
||||
stage_download_var_list(cg, SynchList_cor);
|
||||
}
|
||||
|
||||
if (BP == Pp->data->ble)
|
||||
|
||||
Reference in New Issue
Block a user