Integrate CUDA support into RK4 substep execution

This commit is contained in:
2026-04-12 22:11:44 +08:00
parent 86a683de26
commit 4fa12a2009
4 changed files with 1161 additions and 279 deletions

View File

@@ -21,9 +21,12 @@ using namespace std;
#include "Ansorg.h"
#include "fmisc.h"
#include "Parallel.h"
#include "bssn_class.h"
#include "bssn_rhs.h"
#include "initial_puncture.h"
#include "bssn_class.h"
#include "bssn_rhs.h"
#if USE_CUDA_BSSN
#include "bssn_rhs_cuda.h"
#endif
#include "initial_puncture.h"
#include "enforce_algebra.h"
#include "rungekutta4_rout.h"
#include "sommerfeld_rout.h"
@@ -47,6 +50,35 @@ using namespace std;
#define BSSN_ENABLE_MEM_USAGE_LOG 0
#endif
#if USE_CUDA_BSSN
namespace {
bool fill_bssn_cuda_views(Block *cg, MyList<var> *vars,
double **host_views,
double *propspeeds = nullptr,
double *soa_flat = nullptr)
{
int idx = 0;
while (vars && idx < BSSN_CUDA_STATE_COUNT)
{
host_views[idx] = cg->fgfs[vars->data->sgfn];
if (propspeeds)
propspeeds[idx] = vars->data->propspeed;
if (soa_flat)
{
soa_flat[3 * idx + 0] = vars->data->SoA[0];
soa_flat[3 * idx + 1] = vars->data->SoA[1];
soa_flat[3 * idx + 2] = vars->data->SoA[2];
}
vars = vars->next;
++idx;
}
return idx == BSSN_CUDA_STATE_COUNT && vars == 0;
}
} // namespace
#endif
//================================================================================================
// define bssn_class
@@ -3104,104 +3136,148 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, pre))
{
cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here
while (varl0)
{
#if (SommerType == 0)
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varlrhs->data->sgfn],
cg->fgfs[varl0->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
#endif
#endif
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
#if (SommerType == 1)
#warning "shell part still bam type"
if (lev == 0) // Shibata type sommerfeld
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, pre);
#endif
varl0 = varl0->next;
varl = varl->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
bool used_gpu_substep = false;
#if USE_CUDA_BSSN
{
double *state_in[BSSN_CUDA_STATE_COUNT];
double *state_out[BSSN_CUDA_STATE_COUNT];
double *matter[BSSN_CUDA_MATTER_COUNT] = {
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn]};
double propspeed[BSSN_CUDA_STATE_COUNT];
double soa_flat[3 * BSSN_CUDA_STATE_COUNT];
if (!fill_bssn_cuda_views(cg, StateList, state_in, propspeed, soa_flat) ||
!fill_bssn_cuda_views(cg, SynchList_pre, state_out))
{
cout << "CUDA BSSN state list mismatch on predictor step" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
int apply_bam_bc = 0;
#if (SommerType == 0)
#ifndef WithShell
apply_bam_bc = (lev == 0) ? 1 : 0;
#endif
#endif
if (bssn_cuda_rk4_substep(cg,
cg->shape, cg->X[0], cg->X[1], cg->X[2],
state_in, state_out, matter,
propspeed, soa_flat, Pp->data->bbox,
dT_lev, TRK4, iter_count, apply_bam_bc,
Symmetry, lev, ndeps, pre))
{
cout << "CUDA predictor substep failed in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
used_gpu_substep = true;
}
#endif
if (!used_gpu_substep)
{
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
cg->fgfs[Lap_rhs->sgfn],
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, pre))
{
cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
}
// rk4 substep boundary fix
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here
while (varl0)
{
#if !USE_CUDA_BSSN
#if (SommerType == 0)
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varlrhs->data->sgfn],
cg->fgfs[varl0->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
#endif
#endif
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
#endif
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
#if (SommerType == 1)
#warning "shell part still bam type"
if (lev == 0) // Shibata type sommerfeld
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
varl0->data->SoA,
Symmetry, pre);
#endif
varl0 = varl0->next;
varl = varl->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
}
if (BP == Pp->data->ble)
break;
@@ -3469,102 +3545,148 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn],
cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn],
cg->fgfs[Lap->sgfn],
cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn],
cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn],
cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn],
cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn],
cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn],
cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn],
cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn],
cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn],
cg->fgfs[Lap1->sgfn],
cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn],
cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, cor))
{
cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; // we do not check the correspondence here
while (varl0)
{
#if (SommerType == 0)
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varl->data->sgfn], varl0->data->propspeed, varl0->data->SoA,
Symmetry);
#endif
#endif
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
#if (SommerType == 1)
if (lev == 1) // shibata type sommerfeld
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
#endif
varl0 = varl0->next;
varl = varl->next;
varl1 = varl1->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
bool used_gpu_substep = false;
#if USE_CUDA_BSSN
{
double *state_in[BSSN_CUDA_STATE_COUNT];
double *state_out[BSSN_CUDA_STATE_COUNT];
double *matter[BSSN_CUDA_MATTER_COUNT] = {
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn]};
double propspeed[BSSN_CUDA_STATE_COUNT];
double soa_flat[3 * BSSN_CUDA_STATE_COUNT];
if (!fill_bssn_cuda_views(cg, SynchList_pre, state_in, propspeed, soa_flat) ||
!fill_bssn_cuda_views(cg, SynchList_cor, state_out))
{
cout << "CUDA BSSN state list mismatch on corrector step" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
int apply_bam_bc = 0;
#if (SommerType == 0)
#ifndef WithShell
apply_bam_bc = (lev == 0) ? 1 : 0;
#endif
#endif
if (bssn_cuda_rk4_substep(cg,
cg->shape, cg->X[0], cg->X[1], cg->X[2],
state_in, state_out, matter,
propspeed, soa_flat, Pp->data->bbox,
dT_lev, TRK4, iter_count, apply_bam_bc,
Symmetry, lev, ndeps, cor))
{
cout << "CUDA corrector substep failed in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
used_gpu_substep = true;
}
#endif
if (!used_gpu_substep)
{
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn],
cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn],
cg->fgfs[Lap->sgfn],
cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn],
cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn],
cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn],
cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn],
cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn],
cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn],
cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn],
cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn],
cg->fgfs[Lap1->sgfn],
cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn],
cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn],
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, cor))
{
cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
}
// rk4 substep boundary fix
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList;
// we do not check the correspondence here
while (varl0)
{
#if !USE_CUDA_BSSN
#if (SommerType == 0)
#ifndef WithShell
if (lev == 0) // sommerfeld indeed
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varl->data->sgfn],
varl0->data->propspeed, varl0->data->SoA,
Symmetry);
#endif
#endif
f_rungekutta4_rout(cg->shape, dT_lev,
cg->fgfs[varl0->data->sgfn],
cg->fgfs[varl1->data->sgfn],
cg->fgfs[varlrhs->data->sgfn],
iter_count);
#endif
#ifndef WithShell
if (lev > 0) // fix BD point
#endif
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
#if (SommerType == 1)
if (lev == 1) // shibata type sommerfeld
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
dT_lev,
cg->fgfs[phi0->sgfn],
cg->fgfs[Lap0->sgfn],
cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn],
varl0->data->SoA,
Symmetry, cor);
#endif
varl0 = varl0->next;
varl = varl->next;
varl1 = varl1->next;
varlrhs = varlrhs->next;
}
}
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
}
if (BP == Pp->data->ble)
break;