diff --git a/AMSS_NCKU_source/fdderivs_c.C b/AMSS_NCKU_source/fdderivs_c.C index 5e2f298..4ae31d4 100644 --- a/AMSS_NCKU_source/fdderivs_c.C +++ b/AMSS_NCKU_source/fdderivs_c.C @@ -141,12 +141,26 @@ void fdderivs(const int ex[3], const int j4_hi = ex2 - 3; const int k4_hi = ex3 - 3; + /* + * Strategy A: + * Avoid redundant work in overlap of 2nd/4th-order regions. + * Only compute 2nd-order on shell points that are NOT overwritten by + * the 4th-order pass. + */ + const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi); + if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) { for (int k0 = k2_lo; k0 <= k2_hi; ++k0) { const int kF = k0 + 1; for (int j0 = j2_lo; j0 <= j2_hi; ++j0) { const int jF = j0 + 1; for (int i0 = i2_lo; i0 <= i2_hi; ++i0) { + if (has4 && + i0 >= i4_lo && i0 <= i4_hi && + j0 >= j4_lo && j0 <= j4_hi && + k0 >= k4_lo && k0 <= k4_hi) { + continue; + } const int iF = i0 + 1; const size_t p = idx_ex(i0, j0, k0, ex); @@ -193,7 +207,7 @@ void fdderivs(const int ex[3], } } - if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) { + if (has4) { for (int k0 = k4_lo; k0 <= k4_hi; ++k0) { const int kF = k0 + 1; for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {