Optimize 6th-order CUDA AMR stencils
This commit is contained in:
@@ -234,7 +234,12 @@ bool cuda_cell_gw3_restrict_params(const Parallel::gridseg *src,
|
||||
const Parallel::gridseg *dst,
|
||||
int first_fine[3])
|
||||
{
|
||||
#if USE_CUDA_BSSN && defined(Cell) && (ghost_width == 3)
|
||||
#if USE_CUDA_BSSN && defined(Cell) && ((ghost_width == 3) || (ghost_width == 4))
|
||||
#if ghost_width == 4
|
||||
const int stencil_hi = 4;
|
||||
#else
|
||||
const int stencil_hi = 3;
|
||||
#endif
|
||||
if (!src || !dst || !src->Bg || !dst->Bg)
|
||||
return false;
|
||||
for (int d = 0; d < dim; ++d)
|
||||
@@ -260,7 +265,7 @@ bool cuda_cell_gw3_restrict_params(const Parallel::gridseg *src,
|
||||
first_fine[d] = 2 * lbc - lbf - 1;
|
||||
if (first_fine[d] < 0)
|
||||
return false;
|
||||
if (first_fine[d] + 2 * (dst->shape[d] - 1) + 3 >= src->Bg->shape[d])
|
||||
if (first_fine[d] + 2 * (dst->shape[d] - 1) + stencil_hi >= src->Bg->shape[d])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@@ -275,7 +280,12 @@ bool cuda_cell_gw3_prolong_params(const Parallel::gridseg *src,
|
||||
int first_fine_ii[3],
|
||||
int coarse_lb[3])
|
||||
{
|
||||
#if USE_CUDA_BSSN && defined(Cell) && (ghost_width == 3)
|
||||
#if USE_CUDA_BSSN && defined(Cell) && ((ghost_width == 3) || (ghost_width == 4))
|
||||
#if ghost_width == 4
|
||||
const int stencil_hi = 4;
|
||||
#else
|
||||
const int stencil_hi = 3;
|
||||
#endif
|
||||
if (!src || !dst || !src->Bg || !dst->Bg)
|
||||
return false;
|
||||
for (int d = 0; d < dim; ++d)
|
||||
@@ -305,7 +315,7 @@ bool cuda_cell_gw3_prolong_params(const Parallel::gridseg *src,
|
||||
const int last_coarse = last_fine_ii / 2 - coarse_lb[d];
|
||||
if (first_coarse < -1)
|
||||
return false;
|
||||
if (last_coarse + 3 >= src->Bg->shape[d])
|
||||
if (last_coarse + stencil_hi >= src->Bg->shape[d])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
Reference in New Issue
Block a user