Refine GPU runtime controls and input checker
This commit is contained in:
@@ -2792,12 +2792,13 @@ void kern_escalar_sources(
|
||||
double * __restrict__ Sxz,
|
||||
double * __restrict__ Syy,
|
||||
double * __restrict__ Syz,
|
||||
double * __restrict__ Szz)
|
||||
double * __restrict__ Szz,
|
||||
double escalar_a2)
|
||||
{
|
||||
constexpr double PI_V = 3.141592653589793238462643383279502884;
|
||||
constexpr double TWO = 2.0;
|
||||
constexpr double HALF = 0.5;
|
||||
constexpr double A2 = 3.0;
|
||||
const double A2 = escalar_a2;
|
||||
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
i < d_gp.all;
|
||||
@@ -2852,7 +2853,7 @@ void kern_escalar_sources(
|
||||
}
|
||||
}
|
||||
|
||||
static void gpu_escalar_sources(int all)
|
||||
static void gpu_escalar_sources(int all, double escalar_a2)
|
||||
{
|
||||
#define D(s) g_buf.slot[s]
|
||||
gpu_fderivs(D(S_Sphi), D(S_Sphi_x), D(S_Sphi_y), D(S_Sphi_z), 1.0, 1.0, 1.0, all);
|
||||
@@ -2872,7 +2873,8 @@ static void gpu_escalar_sources(int all)
|
||||
D(S_Sphi_yy), D(S_Sphi_yz), D(S_Sphi_zz),
|
||||
D(S_Sphi_rhs), D(S_Spi_rhs),
|
||||
D(S_rho), D(S_Sx), D(S_Sy), D(S_Sz),
|
||||
D(S_Sxx), D(S_Sxy), D(S_Sxz), D(S_Syy), D(S_Syz), D(S_Szz));
|
||||
D(S_Sxx), D(S_Sxy), D(S_Sxz), D(S_Syy), D(S_Syz), D(S_Szz),
|
||||
escalar_a2);
|
||||
#undef D
|
||||
}
|
||||
|
||||
@@ -6571,7 +6573,8 @@ static int active_or_keyed_bank(StepContext &ctx,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void launch_rhs_pipeline(int all, double eps, int co, bool compute_escalar = false)
|
||||
static void launch_rhs_pipeline(int all, double eps, int co, bool compute_escalar = false,
|
||||
double escalar_a2 = 3.0)
|
||||
{
|
||||
const double SYM = 1.0;
|
||||
const double ANTI = -1.0;
|
||||
@@ -6652,7 +6655,7 @@ static void launch_rhs_pipeline(int all, double eps, int co, bool compute_escala
|
||||
D(S_gupyy), D(S_gupyz), D(S_gupzz));
|
||||
|
||||
if (compute_escalar) {
|
||||
gpu_escalar_sources(all);
|
||||
gpu_escalar_sources(all, escalar_a2);
|
||||
gpu_fderivs(D(S_trK), D(S_trK_x), D(S_trK_y), D(S_trK_z), SYM, SYM, SYM, all);
|
||||
}
|
||||
|
||||
@@ -7127,9 +7130,8 @@ int bssn_escalar_cuda_rk4_substep(void *block_tag,
|
||||
#ifdef fortran3
|
||||
set_escalar_parameter_(escalar_a2, escalar_phi0, escalar_r0, escalar_sigma0, escalar_l2);
|
||||
#endif
|
||||
if (fabs(escalar_a2 - 3.0) > 1.0e-12 && g_dispatch.my_rank == 0) {
|
||||
fprintf(stderr, "CUDA BSSN-EScalar currently supports FR a2=3 for EScalar_CC=2/3; got %.17g\n",
|
||||
escalar_a2);
|
||||
if (fabs(escalar_a2) <= 1.0e-300 && g_dispatch.my_rank == 0) {
|
||||
fprintf(stderr, "CUDA BSSN-EScalar requires nonzero FR a2; got %.17g\n", escalar_a2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -7187,7 +7189,7 @@ int bssn_escalar_cuda_rk4_substep(void *block_tag,
|
||||
}
|
||||
}
|
||||
|
||||
launch_rhs_pipeline((int)all, eps, co, true);
|
||||
launch_rhs_pipeline((int)all, eps, co, true, escalar_a2);
|
||||
|
||||
if (apply_bam_bc) {
|
||||
for (int i = 0; i < BSSN_ESCALAR_STATE_COUNT; ++i) {
|
||||
@@ -7250,7 +7252,7 @@ int bssn_escalar_cuda_compute_constraints(int *ex, double *X, double *Y, double
|
||||
const size_t bytes = all * sizeof(double);
|
||||
setup_grid_params(ex, X, Y, Z, Symmetry, eps, 0);
|
||||
upload_escalar_state_inputs(state_host_in, all);
|
||||
launch_rhs_pipeline((int)all, eps, 0, true);
|
||||
launch_rhs_pipeline((int)all, eps, 0, true, escalar_a2);
|
||||
|
||||
#define D(s) g_buf.slot[s]
|
||||
kern_escalar_constraint_fr<<<grid(all), BLK>>>(
|
||||
|
||||
Reference in New Issue
Block a user