Integrate CUDA support into RK4 substep execution
This commit is contained in:
@@ -21,9 +21,12 @@ using namespace std;
|
||||
#include "Ansorg.h"
|
||||
#include "fmisc.h"
|
||||
#include "Parallel.h"
|
||||
#include "bssn_class.h"
|
||||
#include "bssn_rhs.h"
|
||||
#include "initial_puncture.h"
|
||||
#include "bssn_class.h"
|
||||
#include "bssn_rhs.h"
|
||||
#if USE_CUDA_BSSN
|
||||
#include "bssn_rhs_cuda.h"
|
||||
#endif
|
||||
#include "initial_puncture.h"
|
||||
#include "enforce_algebra.h"
|
||||
#include "rungekutta4_rout.h"
|
||||
#include "sommerfeld_rout.h"
|
||||
@@ -47,6 +50,35 @@ using namespace std;
|
||||
#define BSSN_ENABLE_MEM_USAGE_LOG 0
|
||||
#endif
|
||||
|
||||
#if USE_CUDA_BSSN
|
||||
namespace {
|
||||
|
||||
bool fill_bssn_cuda_views(Block *cg, MyList<var> *vars,
|
||||
double **host_views,
|
||||
double *propspeeds = nullptr,
|
||||
double *soa_flat = nullptr)
|
||||
{
|
||||
int idx = 0;
|
||||
while (vars && idx < BSSN_CUDA_STATE_COUNT)
|
||||
{
|
||||
host_views[idx] = cg->fgfs[vars->data->sgfn];
|
||||
if (propspeeds)
|
||||
propspeeds[idx] = vars->data->propspeed;
|
||||
if (soa_flat)
|
||||
{
|
||||
soa_flat[3 * idx + 0] = vars->data->SoA[0];
|
||||
soa_flat[3 * idx + 1] = vars->data->SoA[1];
|
||||
soa_flat[3 * idx + 2] = vars->data->SoA[2];
|
||||
}
|
||||
vars = vars->next;
|
||||
++idx;
|
||||
}
|
||||
return idx == BSSN_CUDA_STATE_COUNT && vars == 0;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#endif
|
||||
|
||||
//================================================================================================
|
||||
|
||||
// define bssn_class
|
||||
@@ -3104,104 +3136,148 @@ void bssn_class::Step(int lev, int YN)
|
||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
||||
#endif
|
||||
|
||||
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
|
||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
|
||||
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
|
||||
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
|
||||
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
|
||||
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
|
||||
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
|
||||
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
|
||||
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
|
||||
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
|
||||
cg->fgfs[Lap_rhs->sgfn],
|
||||
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
|
||||
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
|
||||
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
|
||||
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
|
||||
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
|
||||
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
|
||||
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
|
||||
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
|
||||
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
|
||||
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
|
||||
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
|
||||
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
|
||||
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
|
||||
cg->fgfs[Cons_Ham->sgfn],
|
||||
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
|
||||
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
|
||||
Symmetry, lev, ndeps, pre))
|
||||
{
|
||||
cout << "find NaN in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
|
||||
// rk4 substep and boundary
|
||||
{
|
||||
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here
|
||||
while (varl0)
|
||||
{
|
||||
#if (SommerType == 0)
|
||||
#ifndef WithShell
|
||||
if (lev == 0) // sommerfeld indeed
|
||||
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
cg->fgfs[varlrhs->data->sgfn],
|
||||
cg->fgfs[varl0->data->sgfn],
|
||||
varl0->data->propspeed, varl0->data->SoA,
|
||||
Symmetry);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
f_rungekutta4_rout(cg->shape, dT_lev,
|
||||
cg->fgfs[varl0->data->sgfn],
|
||||
cg->fgfs[varl->data->sgfn],
|
||||
cg->fgfs[varlrhs->data->sgfn],
|
||||
iter_count);
|
||||
#ifndef WithShell
|
||||
if (lev > 0) // fix BD point
|
||||
#endif
|
||||
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
dT_lev,
|
||||
cg->fgfs[phi0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
|
||||
varl0->data->SoA,
|
||||
Symmetry, cor);
|
||||
|
||||
#if (SommerType == 1)
|
||||
#warning "shell part still bam type"
|
||||
if (lev == 0) // Shibata type sommerfeld
|
||||
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
dT_lev,
|
||||
cg->fgfs[phi0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
|
||||
varl0->data->SoA,
|
||||
Symmetry, pre);
|
||||
#endif
|
||||
|
||||
varl0 = varl0->next;
|
||||
varl = varl->next;
|
||||
varlrhs = varlrhs->next;
|
||||
}
|
||||
}
|
||||
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
|
||||
bool used_gpu_substep = false;
|
||||
#if USE_CUDA_BSSN
|
||||
{
|
||||
double *state_in[BSSN_CUDA_STATE_COUNT];
|
||||
double *state_out[BSSN_CUDA_STATE_COUNT];
|
||||
double *matter[BSSN_CUDA_MATTER_COUNT] = {
|
||||
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
|
||||
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
|
||||
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn]};
|
||||
double propspeed[BSSN_CUDA_STATE_COUNT];
|
||||
double soa_flat[3 * BSSN_CUDA_STATE_COUNT];
|
||||
if (!fill_bssn_cuda_views(cg, StateList, state_in, propspeed, soa_flat) ||
|
||||
!fill_bssn_cuda_views(cg, SynchList_pre, state_out))
|
||||
{
|
||||
cout << "CUDA BSSN state list mismatch on predictor step" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int apply_bam_bc = 0;
|
||||
#if (SommerType == 0)
|
||||
#ifndef WithShell
|
||||
apply_bam_bc = (lev == 0) ? 1 : 0;
|
||||
#endif
|
||||
#endif
|
||||
if (bssn_cuda_rk4_substep(cg,
|
||||
cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
state_in, state_out, matter,
|
||||
propspeed, soa_flat, Pp->data->bbox,
|
||||
dT_lev, TRK4, iter_count, apply_bam_bc,
|
||||
Symmetry, lev, ndeps, pre))
|
||||
{
|
||||
cout << "CUDA predictor substep failed in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
used_gpu_substep = true;
|
||||
}
|
||||
#endif
|
||||
if (!used_gpu_substep)
|
||||
{
|
||||
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
|
||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn],
|
||||
cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn],
|
||||
cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn],
|
||||
cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn],
|
||||
cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn],
|
||||
cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn],
|
||||
cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn],
|
||||
cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn],
|
||||
cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn],
|
||||
cg->fgfs[Lap_rhs->sgfn],
|
||||
cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn],
|
||||
cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn],
|
||||
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
|
||||
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
|
||||
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
|
||||
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
|
||||
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
|
||||
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
|
||||
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
|
||||
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
|
||||
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
|
||||
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
|
||||
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
|
||||
cg->fgfs[Cons_Ham->sgfn],
|
||||
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
|
||||
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
|
||||
Symmetry, lev, ndeps, pre))
|
||||
{
|
||||
cout << "find NaN in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// rk4 substep boundary fix
|
||||
{
|
||||
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here
|
||||
while (varl0)
|
||||
{
|
||||
#if !USE_CUDA_BSSN
|
||||
#if (SommerType == 0)
|
||||
#ifndef WithShell
|
||||
if (lev == 0) // sommerfeld indeed
|
||||
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
cg->fgfs[varlrhs->data->sgfn],
|
||||
cg->fgfs[varl0->data->sgfn],
|
||||
varl0->data->propspeed, varl0->data->SoA,
|
||||
Symmetry);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
f_rungekutta4_rout(cg->shape, dT_lev,
|
||||
cg->fgfs[varl0->data->sgfn],
|
||||
cg->fgfs[varl->data->sgfn],
|
||||
cg->fgfs[varlrhs->data->sgfn],
|
||||
iter_count);
|
||||
#endif
|
||||
#ifndef WithShell
|
||||
if (lev > 0) // fix BD point
|
||||
#endif
|
||||
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
dT_lev,
|
||||
cg->fgfs[phi0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
|
||||
varl0->data->SoA,
|
||||
Symmetry, cor);
|
||||
|
||||
#if (SommerType == 1)
|
||||
#warning "shell part still bam type"
|
||||
if (lev == 0) // Shibata type sommerfeld
|
||||
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
dT_lev,
|
||||
cg->fgfs[phi0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn],
|
||||
varl0->data->SoA,
|
||||
Symmetry, pre);
|
||||
#endif
|
||||
|
||||
varl0 = varl0->next;
|
||||
varl = varl->next;
|
||||
varlrhs = varlrhs->next;
|
||||
}
|
||||
}
|
||||
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
@@ -3469,102 +3545,148 @@ void bssn_class::Step(int lev, int YN)
|
||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
||||
#endif
|
||||
|
||||
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
|
||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn],
|
||||
cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn],
|
||||
cg->fgfs[Lap->sgfn],
|
||||
cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn],
|
||||
cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn],
|
||||
cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn],
|
||||
cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn],
|
||||
cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn],
|
||||
cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn],
|
||||
cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn],
|
||||
cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn],
|
||||
cg->fgfs[Lap1->sgfn],
|
||||
cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn],
|
||||
cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn],
|
||||
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
|
||||
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
|
||||
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
|
||||
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
|
||||
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
|
||||
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
|
||||
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
|
||||
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
|
||||
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
|
||||
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
|
||||
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
|
||||
cg->fgfs[Cons_Ham->sgfn],
|
||||
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
|
||||
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
|
||||
Symmetry, lev, ndeps, cor))
|
||||
{
|
||||
cout << "find NaN in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
// rk4 substep and boundary
|
||||
{
|
||||
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; // we do not check the correspondence here
|
||||
while (varl0)
|
||||
{
|
||||
#if (SommerType == 0)
|
||||
#ifndef WithShell
|
||||
if (lev == 0) // sommerfeld indeed
|
||||
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
cg->fgfs[varl1->data->sgfn],
|
||||
cg->fgfs[varl->data->sgfn], varl0->data->propspeed, varl0->data->SoA,
|
||||
Symmetry);
|
||||
#endif
|
||||
#endif
|
||||
f_rungekutta4_rout(cg->shape, dT_lev,
|
||||
cg->fgfs[varl0->data->sgfn],
|
||||
cg->fgfs[varl1->data->sgfn],
|
||||
cg->fgfs[varlrhs->data->sgfn],
|
||||
iter_count);
|
||||
|
||||
#ifndef WithShell
|
||||
if (lev > 0) // fix BD point
|
||||
#endif
|
||||
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
dT_lev,
|
||||
cg->fgfs[phi0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn],
|
||||
varl0->data->SoA,
|
||||
Symmetry, cor);
|
||||
|
||||
#if (SommerType == 1)
|
||||
if (lev == 1) // shibata type sommerfeld
|
||||
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
dT_lev,
|
||||
cg->fgfs[phi0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn],
|
||||
varl0->data->SoA,
|
||||
Symmetry, cor);
|
||||
#endif
|
||||
|
||||
varl0 = varl0->next;
|
||||
varl = varl->next;
|
||||
varl1 = varl1->next;
|
||||
varlrhs = varlrhs->next;
|
||||
}
|
||||
}
|
||||
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
|
||||
bool used_gpu_substep = false;
|
||||
#if USE_CUDA_BSSN
|
||||
{
|
||||
double *state_in[BSSN_CUDA_STATE_COUNT];
|
||||
double *state_out[BSSN_CUDA_STATE_COUNT];
|
||||
double *matter[BSSN_CUDA_MATTER_COUNT] = {
|
||||
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
|
||||
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
|
||||
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn]};
|
||||
double propspeed[BSSN_CUDA_STATE_COUNT];
|
||||
double soa_flat[3 * BSSN_CUDA_STATE_COUNT];
|
||||
if (!fill_bssn_cuda_views(cg, SynchList_pre, state_in, propspeed, soa_flat) ||
|
||||
!fill_bssn_cuda_views(cg, SynchList_cor, state_out))
|
||||
{
|
||||
cout << "CUDA BSSN state list mismatch on corrector step" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int apply_bam_bc = 0;
|
||||
#if (SommerType == 0)
|
||||
#ifndef WithShell
|
||||
apply_bam_bc = (lev == 0) ? 1 : 0;
|
||||
#endif
|
||||
#endif
|
||||
if (bssn_cuda_rk4_substep(cg,
|
||||
cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
state_in, state_out, matter,
|
||||
propspeed, soa_flat, Pp->data->bbox,
|
||||
dT_lev, TRK4, iter_count, apply_bam_bc,
|
||||
Symmetry, lev, ndeps, cor))
|
||||
{
|
||||
cout << "CUDA corrector substep failed in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
used_gpu_substep = true;
|
||||
}
|
||||
#endif
|
||||
if (!used_gpu_substep)
|
||||
{
|
||||
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
|
||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn],
|
||||
cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn],
|
||||
cg->fgfs[Lap->sgfn],
|
||||
cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn],
|
||||
cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn],
|
||||
cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn],
|
||||
cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn],
|
||||
cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn],
|
||||
cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn],
|
||||
cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn],
|
||||
cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn],
|
||||
cg->fgfs[Lap1->sgfn],
|
||||
cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn],
|
||||
cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn],
|
||||
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
|
||||
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
|
||||
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
|
||||
cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn],
|
||||
cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn],
|
||||
cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn],
|
||||
cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn],
|
||||
cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn],
|
||||
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
|
||||
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
|
||||
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
|
||||
cg->fgfs[Cons_Ham->sgfn],
|
||||
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
|
||||
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
|
||||
Symmetry, lev, ndeps, cor))
|
||||
{
|
||||
cout << "find NaN in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
}
|
||||
// rk4 substep boundary fix
|
||||
{
|
||||
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList;
|
||||
// we do not check the correspondence here
|
||||
while (varl0)
|
||||
{
|
||||
#if !USE_CUDA_BSSN
|
||||
#if (SommerType == 0)
|
||||
#ifndef WithShell
|
||||
if (lev == 0) // sommerfeld indeed
|
||||
f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
cg->fgfs[varl1->data->sgfn],
|
||||
cg->fgfs[varl->data->sgfn],
|
||||
varl0->data->propspeed, varl0->data->SoA,
|
||||
Symmetry);
|
||||
#endif
|
||||
#endif
|
||||
f_rungekutta4_rout(cg->shape, dT_lev,
|
||||
cg->fgfs[varl0->data->sgfn],
|
||||
cg->fgfs[varl1->data->sgfn],
|
||||
cg->fgfs[varlrhs->data->sgfn],
|
||||
iter_count);
|
||||
#endif
|
||||
|
||||
#ifndef WithShell
|
||||
if (lev > 0) // fix BD point
|
||||
#endif
|
||||
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
dT_lev,
|
||||
cg->fgfs[phi0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn],
|
||||
varl0->data->SoA,
|
||||
Symmetry, cor);
|
||||
|
||||
#if (SommerType == 1)
|
||||
if (lev == 1) // shibata type sommerfeld
|
||||
f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2],
|
||||
Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5],
|
||||
dT_lev,
|
||||
cg->fgfs[phi0->sgfn],
|
||||
cg->fgfs[Lap0->sgfn],
|
||||
cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn],
|
||||
varl0->data->SoA,
|
||||
Symmetry, cor);
|
||||
#endif
|
||||
|
||||
varl0 = varl0->next;
|
||||
varl = varl->next;
|
||||
varl1 = varl1->next;
|
||||
varlrhs = varlrhs->next;
|
||||
}
|
||||
}
|
||||
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user