Optimize fdderivs: skip redundant 2nd-order work in 4th-order overlap

This commit is contained in:
2026-03-02 03:21:21 +08:00
parent f70e90f694
commit e11363e06e

View File

@@ -141,12 +141,26 @@ void fdderivs(const int ex[3],
const int j4_hi = ex2 - 3; const int j4_hi = ex2 - 3;
const int k4_hi = ex3 - 3; const int k4_hi = ex3 - 3;
/*
* Strategy A:
* Avoid redundant work in overlap of 2nd/4th-order regions.
* Only compute 2nd-order on shell points that are NOT overwritten by
* the 4th-order pass.
*/
const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi);
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) { if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) { for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
const int kF = k0 + 1; const int kF = k0 + 1;
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) { for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
const int jF = j0 + 1; const int jF = j0 + 1;
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) { for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
if (has4 &&
i0 >= i4_lo && i0 <= i4_hi &&
j0 >= j4_lo && j0 <= j4_hi &&
k0 >= k4_lo && k0 <= k4_hi) {
continue;
}
const int iF = i0 + 1; const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex); const size_t p = idx_ex(i0, j0, k0, ex);
@@ -193,7 +207,7 @@ void fdderivs(const int ex[3],
} }
} }
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) { if (has4) {
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) { for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
const int kF = k0 + 1; const int kF = k0 + 1;
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) { for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {