Optimize BSSN EScalar GPU path baseline
This commit is contained in:
@@ -15,10 +15,13 @@ using namespace std;
|
||||
#include "misc.h"
|
||||
#include "Ansorg.h"
|
||||
#include "fmisc.h"
|
||||
#include "Parallel.h"
|
||||
#include "bssnEM_class.h"
|
||||
#include "bssn_rhs.h"
|
||||
#include "empart.h"
|
||||
#include "Parallel.h"
|
||||
#include "bssnEM_class.h"
|
||||
#include "bssn_rhs.h"
|
||||
#if USE_CUDA_BSSN
|
||||
#include "bssn_rhs_cuda.h"
|
||||
#endif
|
||||
#include "empart.h"
|
||||
#include "initial_puncture.h"
|
||||
#include "initial_maxwell.h"
|
||||
#include "enforce_algebra.h"
|
||||
@@ -32,11 +35,111 @@ using namespace std;
|
||||
#ifdef With_AHF
|
||||
#include "derivatives.h"
|
||||
#include "myglobal.h"
|
||||
#endif
|
||||
|
||||
//================================================================================================
|
||||
|
||||
// Define bssnEM_class
|
||||
#endif
|
||||
|
||||
//================================================================================================
|
||||
|
||||
#if USE_CUDA_BSSN
|
||||
namespace {
|
||||
|
||||
bool fill_bssn_cuda_views_prefix(Block *cg, MyList<var> *vars,
|
||||
double **host_views,
|
||||
double *propspeeds = nullptr,
|
||||
double *soa_flat = nullptr)
|
||||
{
|
||||
int idx = 0;
|
||||
while (vars && idx < BSSN_CUDA_STATE_COUNT)
|
||||
{
|
||||
host_views[idx] = cg->fgfs[vars->data->sgfn];
|
||||
if (propspeeds)
|
||||
propspeeds[idx] = vars->data->propspeed;
|
||||
if (soa_flat)
|
||||
{
|
||||
soa_flat[3 * idx + 0] = vars->data->SoA[0];
|
||||
soa_flat[3 * idx + 1] = vars->data->SoA[1];
|
||||
soa_flat[3 * idx + 2] = vars->data->SoA[2];
|
||||
}
|
||||
vars = vars->next;
|
||||
++idx;
|
||||
}
|
||||
return idx == BSSN_CUDA_STATE_COUNT;
|
||||
}
|
||||
|
||||
void skip_bssn_cuda_prefix(MyList<var> *&a, MyList<var> *&b, MyList<var> *&c)
|
||||
{
|
||||
for (int i = 0; i < BSSN_CUDA_STATE_COUNT && a && b && c; ++i)
|
||||
{
|
||||
a = a->next;
|
||||
b = b->next;
|
||||
c = c->next;
|
||||
}
|
||||
}
|
||||
|
||||
void skip_bssn_cuda_prefix(MyList<var> *&a, MyList<var> *&b,
|
||||
MyList<var> *&c, MyList<var> *&d)
|
||||
{
|
||||
for (int i = 0; i < BSSN_CUDA_STATE_COUNT && a && b && c && d; ++i)
|
||||
{
|
||||
a = a->next;
|
||||
b = b->next;
|
||||
c = c->next;
|
||||
d = d->next;
|
||||
}
|
||||
}
|
||||
|
||||
int run_bssn_em_cuda_substep(Block *cg,
|
||||
MyList<var> *state_in_list,
|
||||
MyList<var> *state_out_list,
|
||||
Patch *patch,
|
||||
double &dT_lev,
|
||||
double &TRK4,
|
||||
int &iter_count,
|
||||
int &Symmetry,
|
||||
int lev,
|
||||
double &ndeps,
|
||||
int &co,
|
||||
double &chitiny,
|
||||
var *rho, var *Sx, var *Sy, var *Sz,
|
||||
var *Sxx, var *Sxy, var *Sxz,
|
||||
var *Syy, var *Syz, var *Szz)
|
||||
{
|
||||
double *state_in[BSSN_CUDA_STATE_COUNT];
|
||||
double *state_out[BSSN_CUDA_STATE_COUNT];
|
||||
double *matter[BSSN_CUDA_MATTER_COUNT] = {
|
||||
cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
|
||||
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
|
||||
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn]};
|
||||
double propspeed[BSSN_CUDA_STATE_COUNT];
|
||||
double soa_flat[3 * BSSN_CUDA_STATE_COUNT];
|
||||
if (!fill_bssn_cuda_views_prefix(cg, state_in_list, state_in, propspeed, soa_flat) ||
|
||||
!fill_bssn_cuda_views_prefix(cg, state_out_list, state_out))
|
||||
return 1;
|
||||
|
||||
int apply_bam_bc = 0;
|
||||
#if (SommerType == 0)
|
||||
#ifndef WithShell
|
||||
apply_bam_bc = (lev == 0) ? 1 : 0;
|
||||
#endif
|
||||
#endif
|
||||
int use_zero_matter = 0;
|
||||
int keep_resident_state = 0;
|
||||
int apply_enforce_ga = 0;
|
||||
return bssn_cuda_rk4_substep(cg,
|
||||
cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
state_in, state_out, matter,
|
||||
propspeed, soa_flat, patch->bbox,
|
||||
dT_lev, TRK4, iter_count, apply_bam_bc,
|
||||
Symmetry, lev, ndeps, co,
|
||||
use_zero_matter,
|
||||
keep_resident_state, apply_enforce_ga, chitiny);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
//================================================================================================
|
||||
|
||||
// Define bssnEM_class
|
||||
|
||||
// It inherits some members and methods from the parent class bssn_class and modifies others.
|
||||
// The modified members and methods are defined below (and in the header bssnEM_class.h).
|
||||
@@ -853,10 +956,11 @@ void bssnEM_class::Step(int lev, int YN)
|
||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
|
||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
||||
#endif
|
||||
|
||||
if (
|
||||
f_compute_rhs_empart(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
#endif
|
||||
|
||||
bool used_gpu_substep = false;
|
||||
if (
|
||||
f_compute_rhs_empart(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi0->sgfn],
|
||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||
@@ -873,11 +977,20 @@ void bssnEM_class::Step(int lev, int YN)
|
||||
cg->fgfs[Kpsi_rhs->sgfn], cg->fgfs[Kphi_rhs->sgfn],
|
||||
cg->fgfs[rho->sgfn],
|
||||
cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
|
||||
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
|
||||
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
|
||||
Symmetry, lev, ndeps) ||
|
||||
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
|
||||
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
|
||||
Symmetry, lev, ndeps) ||
|
||||
#if USE_CUDA_BSSN
|
||||
((used_gpu_substep =
|
||||
(run_bssn_em_cuda_substep(cg, StateList, SynchList_pre, Pp->data,
|
||||
dT_lev, TRK4, iter_count, Symmetry, lev,
|
||||
ndeps, pre, chitiny,
|
||||
rho, Sx, Sy, Sz, Sxx, Sxy, Sxz, Syy, Syz, Szz) == 0))
|
||||
? 0
|
||||
: 1) ||
|
||||
#endif
|
||||
(!used_gpu_substep && f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
|
||||
@@ -906,10 +1019,10 @@ void bssnEM_class::Step(int lev, int YN)
|
||||
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
|
||||
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
|
||||
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
|
||||
cg->fgfs[Cons_Ham->sgfn],
|
||||
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
|
||||
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
|
||||
Symmetry, lev, ndeps, pre))
|
||||
cg->fgfs[Cons_Ham->sgfn],
|
||||
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
|
||||
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
|
||||
Symmetry, lev, ndeps, pre)))
|
||||
{
|
||||
cout << "find NaN in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
@@ -919,11 +1032,15 @@ void bssnEM_class::Step(int lev, int YN)
|
||||
}
|
||||
|
||||
// rk4 substep and boundary
|
||||
{
|
||||
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList;
|
||||
// we do not check the correspondence here
|
||||
|
||||
while (varl0)
|
||||
{
|
||||
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList;
|
||||
// we do not check the correspondence here
|
||||
#if USE_CUDA_BSSN
|
||||
if (used_gpu_substep)
|
||||
skip_bssn_cuda_prefix(varl0, varl, varlrhs);
|
||||
#endif
|
||||
|
||||
while (varl0)
|
||||
{
|
||||
#ifndef WithShell
|
||||
if (lev == 0) // sommerfeld indeed
|
||||
@@ -1309,10 +1426,11 @@ void bssnEM_class::Step(int lev, int YN)
|
||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
|
||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
||||
#endif
|
||||
|
||||
if (
|
||||
f_compute_rhs_empart(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
#endif
|
||||
|
||||
bool used_gpu_substep = false;
|
||||
if (
|
||||
f_compute_rhs_empart(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi->sgfn],
|
||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||
@@ -1329,11 +1447,20 @@ void bssnEM_class::Step(int lev, int YN)
|
||||
cg->fgfs[Kpsi1->sgfn], cg->fgfs[Kphi1->sgfn],
|
||||
cg->fgfs[rho->sgfn],
|
||||
cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
|
||||
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
|
||||
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
|
||||
Symmetry, lev, ndeps) ||
|
||||
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
||||
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
|
||||
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
|
||||
Symmetry, lev, ndeps) ||
|
||||
#if USE_CUDA_BSSN
|
||||
((used_gpu_substep =
|
||||
(run_bssn_em_cuda_substep(cg, SynchList_pre, SynchList_cor, Pp->data,
|
||||
dT_lev, TRK4, iter_count, Symmetry, lev,
|
||||
ndeps, cor, chitiny,
|
||||
rho, Sx, Sy, Sz, Sxx, Sxy, Sxz, Syy, Syz, Szz) == 0))
|
||||
? 0
|
||||
: 1) ||
|
||||
#endif
|
||||
(!used_gpu_substep && f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
|
||||
@@ -1361,10 +1488,10 @@ void bssnEM_class::Step(int lev, int YN)
|
||||
cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn],
|
||||
cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn],
|
||||
cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn],
|
||||
cg->fgfs[Cons_Ham->sgfn],
|
||||
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
|
||||
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
|
||||
Symmetry, lev, ndeps, cor))
|
||||
cg->fgfs[Cons_Ham->sgfn],
|
||||
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
|
||||
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
|
||||
Symmetry, lev, ndeps, cor)))
|
||||
{
|
||||
cout << "find NaN in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
@@ -1373,11 +1500,15 @@ void bssnEM_class::Step(int lev, int YN)
|
||||
ERROR = 1;
|
||||
}
|
||||
// rk4 substep and boundary
|
||||
{
|
||||
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList;
|
||||
// we do not check the correspondence here
|
||||
|
||||
while (varl0)
|
||||
{
|
||||
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList;
|
||||
// we do not check the correspondence here
|
||||
#if USE_CUDA_BSSN
|
||||
if (used_gpu_substep)
|
||||
skip_bssn_cuda_prefix(varl0, varl, varl1, varlrhs);
|
||||
#endif
|
||||
|
||||
while (varl0)
|
||||
{
|
||||
#ifndef WithShell
|
||||
if (lev == 0) // sommerfeld indeed
|
||||
|
||||
Reference in New Issue
Block a user