Stabilize EScalar CUDA sync defaults
This commit is contained in:
@@ -129,6 +129,17 @@ MyList<var> *clone_var_list_prefix(MyList<var> *src, int count)
|
||||
return dst;
|
||||
}
|
||||
|
||||
bool escalar_gpu_rk_enabled()
|
||||
{
|
||||
static int enabled = -1;
|
||||
if (enabled < 0)
|
||||
{
|
||||
const char *env = getenv("AMSS_ESCALAR_GPU_RK");
|
||||
enabled = (env && atoi(env) != 0) ? 1 : 0;
|
||||
}
|
||||
return enabled != 0;
|
||||
}
|
||||
|
||||
void clear_var_list(MyList<var> *&list)
|
||||
{
|
||||
if (list)
|
||||
@@ -175,6 +186,7 @@ int run_bssn_escalar_cuda_substep(Block *cg,
|
||||
int &co,
|
||||
double &chitiny,
|
||||
var *Sphi_in, var *Spi_in,
|
||||
var *Sphi_out, var *Spi_out,
|
||||
var *Sphi_rhs, var *Spi_rhs,
|
||||
var *rho, var *Sx, var *Sy, var *Sz,
|
||||
var *Sxx, var *Sxy, var *Sxz,
|
||||
@@ -220,6 +232,26 @@ int run_bssn_escalar_cuda_substep(Block *cg,
|
||||
apply_bam_bc = (lev == 0) ? 1 : 0;
|
||||
#endif
|
||||
#endif
|
||||
if (escalar_gpu_rk_enabled())
|
||||
{
|
||||
double scalar_propspeed[2] = {
|
||||
Sphi_in->propspeed, Spi_in->propspeed
|
||||
};
|
||||
double scalar_soa[6] = {
|
||||
Sphi_in->SoA[0], Sphi_in->SoA[1], Sphi_in->SoA[2],
|
||||
Spi_in->SoA[0], Spi_in->SoA[1], Spi_in->SoA[2]
|
||||
};
|
||||
if (bssn_cuda_escalar_finalize_scalar_fields(cg,
|
||||
cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[Sphi_out->sgfn],
|
||||
cg->fgfs[Spi_out->sgfn],
|
||||
scalar_propspeed,
|
||||
scalar_soa,
|
||||
patch->bbox,
|
||||
dT_lev, iter_count, apply_bam_bc,
|
||||
Symmetry, lev, ndeps, co))
|
||||
return 1;
|
||||
}
|
||||
int use_zero_matter = 0;
|
||||
int keep_resident_state = 1;
|
||||
double **matter_precomputed = nullptr;
|
||||
@@ -1003,7 +1035,7 @@ void bssnEScalar_class::Step(int lev, int YN)
|
||||
(run_bssn_escalar_cuda_substep(cg, StateList, SynchList_pre, Pp->data,
|
||||
dT_lev, TRK4, iter_count, Symmetry, lev,
|
||||
ndeps, pre, chitiny,
|
||||
Sphi0, Spi0, Sphi_rhs, Spi_rhs,
|
||||
Sphi0, Spi0, Sphi, Spi, Sphi_rhs, Spi_rhs,
|
||||
rho, Sx, Sy, Sz, Sxx, Sxy, Sxz, Syy, Syz, Szz) == 0))
|
||||
? 0
|
||||
: 1) ||
|
||||
@@ -1058,12 +1090,37 @@ void bssnEScalar_class::Step(int lev, int YN)
|
||||
#if USE_CUDA_BSSN
|
||||
if (used_gpu_substep)
|
||||
skip_bssn_cuda_prefix(varl0, varl, varlrhs);
|
||||
#endif
|
||||
const bool scalar_gpu_rk_done =
|
||||
#if USE_CUDA_BSSN
|
||||
used_gpu_substep && escalar_gpu_rk_enabled();
|
||||
#else
|
||||
false;
|
||||
#endif
|
||||
while (varl0)
|
||||
{
|
||||
#ifndef WithShell
|
||||
if (lev == 0) // sommerfeld indeed
|
||||
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
{
|
||||
if (scalar_gpu_rk_done)
|
||||
{
|
||||
#ifndef WithShell
|
||||
if (lev > 0) // fix BD point
|
||||
#endif
|
||||
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
dT_lev, cg->fgfs[phi0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
|
||||
varl0->data->SoA,
|
||||
Symmetry, cor);
|
||||
|
||||
varl0 = varl0->next;
|
||||
varl = varl->next;
|
||||
varlrhs = varlrhs->next;
|
||||
continue;
|
||||
}
|
||||
#ifndef WithShell
|
||||
if (lev == 0) // sommerfeld indeed
|
||||
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
cg->fgfs[varlrhs->data->sgfn],
|
||||
@@ -1369,7 +1426,7 @@ void bssnEScalar_class::Step(int lev, int YN)
|
||||
(run_bssn_escalar_cuda_substep(cg, SynchList_pre, SynchList_cor, Pp->data,
|
||||
dT_lev, TRK4, iter_count, Symmetry, lev,
|
||||
ndeps, cor, chitiny,
|
||||
Sphi, Spi, Sphi_rhs, Spi_rhs,
|
||||
Sphi, Spi, Sphi1, Spi1, Sphi_rhs, Spi_rhs,
|
||||
rho, Sx, Sy, Sz, Sxx, Sxy, Sxz, Syy, Syz, Szz) == 0))
|
||||
? 0
|
||||
: 1) ||
|
||||
@@ -1426,12 +1483,38 @@ void bssnEScalar_class::Step(int lev, int YN)
|
||||
if (used_gpu_substep)
|
||||
skip_bssn_cuda_prefix(varl0, varl, varl1, varlrhs);
|
||||
#endif
|
||||
const bool scalar_gpu_rk_done =
|
||||
#if USE_CUDA_BSSN
|
||||
used_gpu_substep && escalar_gpu_rk_enabled();
|
||||
#else
|
||||
false;
|
||||
#endif
|
||||
|
||||
while (varl0)
|
||||
{
|
||||
#ifndef WithShell
|
||||
if (lev == 0) // sommerfeld indeed
|
||||
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
{
|
||||
if (scalar_gpu_rk_done)
|
||||
{
|
||||
#ifndef WithShell
|
||||
if (lev > 0) // fix BD point
|
||||
#endif
|
||||
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
dT_lev, cg->fgfs[phi0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn],
|
||||
varl0->data->SoA,
|
||||
Symmetry, cor);
|
||||
|
||||
varl0 = varl0->next;
|
||||
varl = varl->next;
|
||||
varl1 = varl1->next;
|
||||
varlrhs = varlrhs->next;
|
||||
continue;
|
||||
}
|
||||
#ifndef WithShell
|
||||
if (lev == 0) // sommerfeld indeed
|
||||
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
cg->fgfs[varl1->data->sgfn],
|
||||
|
||||
Reference in New Issue
Block a user