Batch patch-boundary copy and gate CPU BC in GPU substeps

This commit is contained in:
2026-04-13 11:40:06 +08:00
parent 4bdfc90f22
commit c5d1268dd1
2 changed files with 125 additions and 24 deletions

View File

@@ -95,7 +95,7 @@ void bssn_cuda_download_level_state(MyList<Patch> *PatL, MyList<var> *vars, int
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank)
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
{
double *state_out[BSSN_CUDA_STATE_COUNT];
if (!fill_bssn_cuda_views(cg, vars, state_out))
@@ -3301,18 +3301,21 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[varlrhs->data->sgfn],
iter_count);
#endif
if (!used_gpu_substep)
{
#ifndef WithShell
if (lev > 0) // fix BD point
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
}
#if (SommerType == 1)
#warning "shell part still bam type"
@@ -3333,7 +3336,7 @@ void bssn_class::Step(int lev, int YN)
varlrhs = varlrhs->next;
}
}
if (!used_gpu_resident_state)
if (!used_gpu_substep)
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
}
if (BP == Pp->data->ble)
@@ -3722,18 +3725,21 @@ void bssn_class::Step(int lev, int YN)
iter_count);
#endif
if (!used_gpu_substep)
{
#ifndef WithShell
if (lev > 0) // fix BD point
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
}
#if (SommerType == 1)
if (lev == 1) // shibata type sommerfeld
@@ -3754,7 +3760,7 @@ void bssn_class::Step(int lev, int YN)
varlrhs = varlrhs->next;
}
}
if (!used_gpu_resident_state)
if (!used_gpu_substep)
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
}
if (BP == Pp->data->ble)
@@ -8636,4 +8642,3 @@ bool bssn_class::check_Stdin_Abort()
}
//================================================================================================