Stabilize EScalar CUDA sync defaults

This commit is contained in:
2026-05-03 00:24:50 +08:00
parent 74ba5feb86
commit 4430d04ee7
6 changed files with 243 additions and 20 deletions

View File

@@ -129,6 +129,17 @@ MyList<var> *clone_var_list_prefix(MyList<var> *src, int count)
return dst;
}
bool escalar_gpu_rk_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_ESCALAR_GPU_RK");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
void clear_var_list(MyList<var> *&list)
{
if (list)
@@ -175,6 +186,7 @@ int run_bssn_escalar_cuda_substep(Block *cg,
int &co,
double &chitiny,
var *Sphi_in, var *Spi_in,
var *Sphi_out, var *Spi_out,
var *Sphi_rhs, var *Spi_rhs,
var *rho, var *Sx, var *Sy, var *Sz,
var *Sxx, var *Sxy, var *Sxz,
@@ -220,6 +232,26 @@ int run_bssn_escalar_cuda_substep(Block *cg,
apply_bam_bc = (lev == 0) ? 1 : 0;
#endif
#endif
if (escalar_gpu_rk_enabled())
{
double scalar_propspeed[2] = {
Sphi_in->propspeed, Spi_in->propspeed
};
double scalar_soa[6] = {
Sphi_in->SoA[0], Sphi_in->SoA[1], Sphi_in->SoA[2],
Spi_in->SoA[0], Spi_in->SoA[1], Spi_in->SoA[2]
};
if (bssn_cuda_escalar_finalize_scalar_fields(cg,
cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[Sphi_out->sgfn],
cg->fgfs[Spi_out->sgfn],
scalar_propspeed,
scalar_soa,
patch->bbox,
dT_lev, iter_count, apply_bam_bc,
Symmetry, lev, ndeps, co))
return 1;
}
int use_zero_matter = 0;
int keep_resident_state = 1;
double **matter_precomputed = nullptr;
@@ -1003,7 +1035,7 @@ void bssnEScalar_class::Step(int lev, int YN)
(run_bssn_escalar_cuda_substep(cg, StateList, SynchList_pre, Pp->data,
dT_lev, TRK4, iter_count, Symmetry, lev,
ndeps, pre, chitiny,
Sphi0, Spi0, Sphi_rhs, Spi_rhs,
Sphi0, Spi0, Sphi, Spi, Sphi_rhs, Spi_rhs,
rho, Sx, Sy, Sz, Sxx, Sxy, Sxz, Syy, Syz, Szz) == 0))
? 0
: 1) ||
@@ -1058,12 +1090,37 @@ void bssnEScalar_class::Step(int lev, int YN)
#if USE_CUDA_BSSN
if (used_gpu_substep)
skip_bssn_cuda_prefix(varl0, varl, varlrhs);
#endif
const bool scalar_gpu_rk_done =
#if USE_CUDA_BSSN
used_gpu_substep && escalar_gpu_rk_enabled();
#else
false;
#endif
while (varl0)
{
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
{
if (scalar_gpu_rk_done)
{
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev, cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
varl0 = varl0->next;
varl = varl->next;
varlrhs = varlrhs->next;
continue;
}
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varlrhs->data->sgfn],
@@ -1369,7 +1426,7 @@ void bssnEScalar_class::Step(int lev, int YN)
(run_bssn_escalar_cuda_substep(cg, SynchList_pre, SynchList_cor, Pp->data,
dT_lev, TRK4, iter_count, Symmetry, lev,
ndeps, cor, chitiny,
Sphi, Spi, Sphi_rhs, Spi_rhs,
Sphi, Spi, Sphi1, Spi1, Sphi_rhs, Spi_rhs,
rho, Sx, Sy, Sz, Sxx, Sxy, Sxz, Syy, Syz, Szz) == 0))
? 0
: 1) ||
@@ -1426,12 +1483,38 @@ void bssnEScalar_class::Step(int lev, int YN)
if (used_gpu_substep)
skip_bssn_cuda_prefix(varl0, varl, varl1, varlrhs);
#endif
const bool scalar_gpu_rk_done =
#if USE_CUDA_BSSN
used_gpu_substep && escalar_gpu_rk_enabled();
#else
false;
#endif
while (varl0)
{
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
{
if (scalar_gpu_rk_done)
{
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev, cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
varl0 = varl0->next;
varl = varl->next;
varl1 = varl1->next;
varlrhs = varlrhs->next;
continue;
}
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varl1->data->sgfn],