Optimize fdderivs: skip redundant 2nd-order work in 4th-order overlap
This commit is contained in:
@@ -141,12 +141,26 @@ void fdderivs(const int ex[3],
|
|||||||
const int j4_hi = ex2 - 3;
|
const int j4_hi = ex2 - 3;
|
||||||
const int k4_hi = ex3 - 3;
|
const int k4_hi = ex3 - 3;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Strategy A:
|
||||||
|
* Avoid redundant work in overlap of 2nd/4th-order regions.
|
||||||
|
* Only compute 2nd-order on shell points that are NOT overwritten by
|
||||||
|
* the 4th-order pass.
|
||||||
|
*/
|
||||||
|
const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi);
|
||||||
|
|
||||||
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
||||||
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
||||||
const int kF = k0 + 1;
|
const int kF = k0 + 1;
|
||||||
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
||||||
const int jF = j0 + 1;
|
const int jF = j0 + 1;
|
||||||
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
||||||
|
if (has4 &&
|
||||||
|
i0 >= i4_lo && i0 <= i4_hi &&
|
||||||
|
j0 >= j4_lo && j0 <= j4_hi &&
|
||||||
|
k0 >= k4_lo && k0 <= k4_hi) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
const int iF = i0 + 1;
|
const int iF = i0 + 1;
|
||||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
@@ -193,7 +207,7 @@ void fdderivs(const int ex[3],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
if (has4) {
|
||||||
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
||||||
const int kF = k0 + 1;
|
const int kF = k0 + 1;
|
||||||
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
||||||
|
|||||||
Reference in New Issue
Block a user