Optimize 6th-order CUDA AMR stencils

2026-05-07 19:22:37 +08:00
parent 9ff2f065be
commit 0076b3ca18
2 changed files with 90 additions and 26 deletions
--- a/AMSS_NCKU_source/Parallel.C
+++ b/AMSS_NCKU_source/Parallel.C
@@ -234,7 +234,12 @@ bool cuda_cell_gw3_restrict_params(const Parallel::gridseg *src,
                                   const Parallel::gridseg *dst,
                                   int first_fine[3])
 {
-#if USE_CUDA_BSSN && defined(Cell) && (ghost_width == 3)
+#if USE_CUDA_BSSN && defined(Cell) && ((ghost_width == 3) || (ghost_width == 4))
+#if ghost_width == 4
+  const int stencil_hi = 4;
+#else
+  const int stencil_hi = 3;
+#endif
  if (!src || !dst || !src->Bg || !dst->Bg)
    return false;
  for (int d = 0; d < dim; ++d)
@@ -260,7 +265,7 @@ bool cuda_cell_gw3_restrict_params(const Parallel::gridseg *src,
    first_fine[d] = 2 * lbc - lbf - 1;
    if (first_fine[d] < 0)
      return false;
-    if (first_fine[d] + 2 * (dst->shape[d] - 1) + 3 >= src->Bg->shape[d])
+    if (first_fine[d] + 2 * (dst->shape[d] - 1) + stencil_hi >= src->Bg->shape[d])
      return false;
  }
  return true;
@@ -275,7 +280,12 @@ bool cuda_cell_gw3_prolong_params(const Parallel::gridseg *src,
                                  int first_fine_ii[3],
                                  int coarse_lb[3])
 {
-#if USE_CUDA_BSSN && defined(Cell) && (ghost_width == 3)
+#if USE_CUDA_BSSN && defined(Cell) && ((ghost_width == 3) || (ghost_width == 4))
+#if ghost_width == 4
+  const int stencil_hi = 4;
+#else
+  const int stencil_hi = 3;
+#endif
  if (!src || !dst || !src->Bg || !dst->Bg)
    return false;
  for (int d = 0; d < dim; ++d)
@@ -305,7 +315,7 @@ bool cuda_cell_gw3_prolong_params(const Parallel::gridseg *src,
    const int last_coarse = last_fine_ii / 2 - coarse_lb[d];
    if (first_coarse < -1)
      return false;
-    if (last_coarse + 3 >= src->Bg->shape[d])
+    if (last_coarse + stencil_hi >= src->Bg->shape[d])
      return false;
  }
  return true;