Optimize 6th-order CUDA AMR stencils

This commit is contained in:
2026-05-07 19:22:37 +08:00
parent 9ff2f065be
commit 0076b3ca18
2 changed files with 90 additions and 26 deletions

View File

@@ -234,7 +234,12 @@ bool cuda_cell_gw3_restrict_params(const Parallel::gridseg *src,
const Parallel::gridseg *dst,
int first_fine[3])
{
#if USE_CUDA_BSSN && defined(Cell) && (ghost_width == 3)
#if USE_CUDA_BSSN && defined(Cell) && ((ghost_width == 3) || (ghost_width == 4))
#if ghost_width == 4
const int stencil_hi = 4;
#else
const int stencil_hi = 3;
#endif
if (!src || !dst || !src->Bg || !dst->Bg)
return false;
for (int d = 0; d < dim; ++d)
@@ -260,7 +265,7 @@ bool cuda_cell_gw3_restrict_params(const Parallel::gridseg *src,
first_fine[d] = 2 * lbc - lbf - 1;
if (first_fine[d] < 0)
return false;
if (first_fine[d] + 2 * (dst->shape[d] - 1) + 3 >= src->Bg->shape[d])
if (first_fine[d] + 2 * (dst->shape[d] - 1) + stencil_hi >= src->Bg->shape[d])
return false;
}
return true;
@@ -275,7 +280,12 @@ bool cuda_cell_gw3_prolong_params(const Parallel::gridseg *src,
int first_fine_ii[3],
int coarse_lb[3])
{
#if USE_CUDA_BSSN && defined(Cell) && (ghost_width == 3)
#if USE_CUDA_BSSN && defined(Cell) && ((ghost_width == 3) || (ghost_width == 4))
#if ghost_width == 4
const int stencil_hi = 4;
#else
const int stencil_hi = 3;
#endif
if (!src || !dst || !src->Bg || !dst->Bg)
return false;
for (int d = 0; d < dim; ++d)
@@ -305,7 +315,7 @@ bool cuda_cell_gw3_prolong_params(const Parallel::gridseg *src,
const int last_coarse = last_fine_ii / 2 - coarse_lb[d];
if (first_coarse < -1)
return false;
if (last_coarse + 3 >= src->Bg->shape[d])
if (last_coarse + stencil_hi >= src->Bg->shape[d])
return false;
}
return true;