Add full FD order support (2nd/4th/6th/8th) to C derivative kernels via ghost_width dispatch

Wrap each C kernel in #if (ghost_width == N) blocks matching Fortran stencil
coefficients from diff_new.f90, kodiss.f90, and lopsidediff.f90. Add fast-path
indexing for ord=1,4,5 in share_func.h.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-14 02:27:21 +08:00
parent 5d8dfaf679
commit c6ecc11d7e
6 changed files with 2612 additions and 877 deletions

View File

@@ -1,14 +1,13 @@
#include "macrodef.h"
#include "tool.h"
/*
* 你需要提供 symmetry_bd 的 C 版本(或 Fortran 绑到 C 的接口)。
* Fortran: call symmetry_bd(3,ex,f,fh,SoA)
* C 版 lopsided — upwind (lopsided) advection derivatives.
*
* 约定:
* nghost = 3
* ex[3] = {ex1,ex2,ex3}
* f = 原始网格 (ex1*ex2*ex3)
* fh = 扩展网格 ((ex1+3)*(ex2+3)*(ex3+3)),对应 Fortran 的 (-2:ex1, ...)
* SoA[3] = 输入参数
* Adds advection terms to f_rhs for all three spatial directions.
* Uses sign-biased (one-sided) stencils with centered fallbacks.
*
* For lopsided, symmetry_bd ord = ghost_width (same as kodiss).
*/
void lopsided(const int ex[3],
const double *X, const double *Y, const double *Z,
@@ -16,240 +15,577 @@ void lopsided(const int ex[3],
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3])
{
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
const double TWO = 2.0, F6 = 6.0, F18 = 18.0;
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2;
(void)OCTANT; // 这里和 Fortran 一样只是定义了不用也没关系
const double ZEO = 0.0, ONE = 1.0;
const double TWO = 2.0, F6 = 6.0, EIT = 8.0;
const double F3 = 3.0, F4 = 4.0, F5 = 5.0, F10 = 10.0, F12 = 12.0, F18 = 18.0;
const double F9 = 9.0, F45 = 45.0, F60 = 60.0;
const double F2 = 2.0, F15 = 15.0, F24 = 24.0, F30 = 30.0, F35 = 35.0;
const double F50 = 50.0, F77 = 77.0, F80 = 80.0, F100 = 100.0, F150 = 150.0;
const double F32 = 32.0, F168 = 168.0, F672 = 672.0, F840 = 840.0;
const double F140=140.0, F378=378.0, F420=420.0, F1050=1050.0;
const int NO_SYMM = 0, EQ_SYMM = 1;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// 对应 Fortran: dX = X(2)-X(1) Fortran 1-based
// C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
#if (ghost_width == 2)
/* ---- 2nd-order lopsided --------------------------------------------- */
{
const int ord = 2;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
// Fortran 里算了 d2dx/d2dy/d2dz 但本 subroutine 里没用到(保持一致也算出来)
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
(void)d2dx; (void)d2dy; (void)d2dz;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
// Fortran:
// imax = ex(1); jmax = ex(2); kmax = ex(3)
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
// Fortran:
// imin=jmin=kmin=1; 若满足对称条件则设为 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3)
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return; // 内存不足:直接返回(你也可以改成 abort/报错)
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran 主循环:
* do k=1,ex(3)-1
* do j=1,ex(2)-1
* do i=1,ex(1)-1
*
* 转成 C 0-based
* k0 = 0..ex3-2, j0 = 0..ex2-2, i0 = 0..ex1-2
*
* 并且 Fortran 里的 i/j/k 在 fh 访问时,仍然是 Fortran 索引值:
* iF=i0+1, jF=j0+1, kF=k0+1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// ---------------- x direction ----------------
const double sfx = Sfx[p];
if (sfx > ZEO) {
// Fortran: if(i+3 <= imax)
// iF+3 <= ex1 <=> i0+4 <= ex1 <=> i0 <= ex1-4
if (i0 <= ex1 - 4) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
/* x-direction */
const double sfx = Sfx[p];
if (sfx > ZEO) {
if (i0 <= ex1 - 3) // i+2 <= imax
f_rhs[p] += sfx * d2dx * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF+1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF+2, jF, kF, ex)]);
else if (i0 <= ex1 - 2) // i+1 <= imax
f_rhs[p] += sfx * d2dx * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF+1, jF, kF, ex)]);
} else if (sfx < ZEO) {
if ((i0 - 1) >= iminF) // i-2 >= imin
f_rhs[p] -= sfx * d2dx * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF-1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF-2, jF, kF, ex)]);
else if (i0 >= iminF) // i-1 >= imin
f_rhs[p] -= sfx * d2dx * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF-1, jF, kF, ex)]);
}
// elseif(i+2 <= imax) <=> i0 <= ex1-3
else if (i0 <= ex1 - 3) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i+1 <= imax) <=> i0 <= ex1-2循环里总成立
else if (i0 <= ex1 - 2) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
} else if (sfx < ZEO) {
// Fortran: if(i-3 >= imin)
// (iF-3) >= iminF <=> (i0-2) >= iminF
if ((i0 - 2) >= iminF) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
// elseif(i-2 >= imin) <=> (i0-1) >= iminF
else if ((i0 - 1) >= iminF) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i-1 >= imin) <=> i0 >= iminF
else if (i0 >= iminF) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
}
// ---------------- y direction ----------------
const double sfy = Sfy[p];
if (sfy > ZEO) {
// jF+3 <= ex2 <=> j0+4 <= ex2 <=> j0 <= ex2-4
if (j0 <= ex2 - 4) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
} else if (j0 <= ex2 - 3) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 <= ex2 - 2) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
/* y-direction */
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0 <= ex2-3)
f_rhs[p] += sfy * d2dy * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF, jF+1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF+2, kF, ex)]);
else if (j0 <= ex2-2)
f_rhs[p] += sfy * d2dy * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF+1, kF, ex)]);
} else if (sfy < ZEO) {
if ((j0-1) >= jminF)
f_rhs[p] -= sfy * d2dy * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF, jF-1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF-2, kF, ex)]);
else if (j0 >= jminF)
f_rhs[p] -= sfy * d2dy * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF-1, kF, ex)]);
}
} else if (sfy < ZEO) {
if ((j0 - 2) >= jminF) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
} else if ((j0 - 1) >= jminF) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 >= jminF) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
}
}
// ---------------- z direction ----------------
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3 - 4) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
} else if (k0 <= ex3 - 3) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 <= ex3 - 2) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
}
} else if (sfz < ZEO) {
if ((k0 - 2) >= kminF) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
} else if ((k0 - 1) >= kminF) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 >= kminF) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
/* z-direction */
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3-3)
f_rhs[p] += sfz * d2dz * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF, jF, kF+1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF+2, ex)]);
else if (k0 <= ex3-2)
f_rhs[p] += sfz * d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF+1, ex)]);
} else if (sfz < ZEO) {
if ((k0-1) >= kminF)
f_rhs[p] -= sfz * d2dz * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF, jF, kF-1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF-2, ex)]);
else if (k0 >= kminF)
f_rhs[p] -= sfz * d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF-1, ex)]);
}
}
}
}
free(fh);
return;
}
free(fh);
#elif (ghost_width == 3)
/* ---- 4th-order lopsided (original code) ---------------------------- */
{
const int ord = 3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 2;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 2;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 2;
const size_t p = idx_ex(i0, j0, k0, ex);
const double sfx = Sfx[p];
if (sfx > ZEO) {
if (i0 <= ex1 - 4) // i+3 <= imax
f_rhs[p] += sfx * d12dx * (
-F3 *fh[idx_fh_F(iF-1, jF, kF, ex)]
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
+F18*fh[idx_fh_F(iF+1, jF, kF, ex)]
-F6 *fh[idx_fh_F(iF+2, jF, kF, ex)]
+ fh[idx_fh_F(iF+3, jF, kF, ex)]);
else if (i0 <= ex1 - 3) // i+2 <= imax
f_rhs[p] += sfx * d12dx * (
fh[idx_fh_F(iF-2, jF, kF, ex)]
-EIT*fh[idx_fh_F(iF-1, jF, kF, ex)]
+EIT*fh[idx_fh_F(iF+1, jF, kF, ex)]
- fh[idx_fh_F(iF+2, jF, kF, ex)]);
else if (i0 <= ex1 - 2) // i+1 <= imax → mirrored
f_rhs[p] -= sfx * d12dx * (
-F3 *fh[idx_fh_F(iF+1, jF, kF, ex)]
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
+F18*fh[idx_fh_F(iF-1, jF, kF, ex)]
-F6 *fh[idx_fh_F(iF-2, jF, kF, ex)]
+ fh[idx_fh_F(iF-3, jF, kF, ex)]);
} else if (sfx < ZEO) {
if ((i0 - 2) >= iminF) // i-3 >= imin
f_rhs[p] -= sfx * d12dx * (
-F3 *fh[idx_fh_F(iF+1, jF, kF, ex)]
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
+F18*fh[idx_fh_F(iF-1, jF, kF, ex)]
-F6 *fh[idx_fh_F(iF-2, jF, kF, ex)]
+ fh[idx_fh_F(iF-3, jF, kF, ex)]);
else if ((i0 - 1) >= iminF) // i-2 >= imin
f_rhs[p] += sfx * d12dx * (
fh[idx_fh_F(iF-2, jF, kF, ex)]
-EIT*fh[idx_fh_F(iF-1, jF, kF, ex)]
+EIT*fh[idx_fh_F(iF+1, jF, kF, ex)]
- fh[idx_fh_F(iF+2, jF, kF, ex)]);
else if (i0 >= iminF) // i-1 >= imin → mirrored
f_rhs[p] += sfx * d12dx * (
-F3 *fh[idx_fh_F(iF-1, jF, kF, ex)]
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
+F18*fh[idx_fh_F(iF+1, jF, kF, ex)]
-F6 *fh[idx_fh_F(iF+2, jF, kF, ex)]
+ fh[idx_fh_F(iF+3, jF, kF, ex)]);
}
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0 <= ex2-4)
f_rhs[p] += sfy * d12dy * (
-F3*fh[idx_fh_F(iF,jF-1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF+1,kF,ex)]-F6*fh[idx_fh_F(iF,jF+2,kF,ex)]
+fh[idx_fh_F(iF,jF+3,kF,ex)]);
else if (j0 <= ex2-3)
f_rhs[p] += sfy * d12dy * (fh[idx_fh_F(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F(iF,jF+1,kF,ex)]-fh[idx_fh_F(iF,jF+2,kF,ex)]);
else if (j0 <= ex2-2)
f_rhs[p] -= sfy * d12dy * (
-F3*fh[idx_fh_F(iF,jF+1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF-1,kF,ex)]-F6*fh[idx_fh_F(iF,jF-2,kF,ex)]
+fh[idx_fh_F(iF,jF-3,kF,ex)]);
} else if (sfy < ZEO) {
if ((j0-2) >= jminF)
f_rhs[p] -= sfy * d12dy * (
-F3*fh[idx_fh_F(iF,jF+1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF-1,kF,ex)]-F6*fh[idx_fh_F(iF,jF-2,kF,ex)]
+fh[idx_fh_F(iF,jF-3,kF,ex)]);
else if ((j0-1) >= jminF)
f_rhs[p] += sfy * d12dy * (fh[idx_fh_F(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F(iF,jF+1,kF,ex)]-fh[idx_fh_F(iF,jF+2,kF,ex)]);
else if (j0 >= jminF)
f_rhs[p] += sfy * d12dy * (
-F3*fh[idx_fh_F(iF,jF-1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF+1,kF,ex)]-F6*fh[idx_fh_F(iF,jF+2,kF,ex)]
+fh[idx_fh_F(iF,jF+3,kF,ex)]);
}
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3-4)
f_rhs[p] += sfz * d12dz * (
-F3*fh[idx_fh_F(iF,jF,kF-1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF,kF+1,ex)]-F6*fh[idx_fh_F(iF,jF,kF+2,ex)]
+fh[idx_fh_F(iF,jF,kF+3,ex)]);
else if (k0 <= ex3-3)
f_rhs[p] += sfz * d12dz * (fh[idx_fh_F(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F(iF,jF,kF+1,ex)]-fh[idx_fh_F(iF,jF,kF+2,ex)]);
else if (k0 <= ex3-2)
f_rhs[p] -= sfz * d12dz * (
-F3*fh[idx_fh_F(iF,jF,kF+1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF,kF-1,ex)]-F6*fh[idx_fh_F(iF,jF,kF-2,ex)]
+fh[idx_fh_F(iF,jF,kF-3,ex)]);
} else if (sfz < ZEO) {
if ((k0-2) >= kminF)
f_rhs[p] -= sfz * d12dz * (
-F3*fh[idx_fh_F(iF,jF,kF+1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF,kF-1,ex)]-F6*fh[idx_fh_F(iF,jF,kF-2,ex)]
+fh[idx_fh_F(iF,jF,kF-3,ex)]);
else if ((k0-1) >= kminF)
f_rhs[p] += sfz * d12dz * (fh[idx_fh_F(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F(iF,jF,kF+1,ex)]-fh[idx_fh_F(iF,jF,kF+2,ex)]);
else if (k0 >= kminF)
f_rhs[p] += sfz * d12dz * (
-F3*fh[idx_fh_F(iF,jF,kF-1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF,kF+1,ex)]-F6*fh[idx_fh_F(iF,jF,kF+2,ex)]
+fh[idx_fh_F(iF,jF,kF+3,ex)]);
}
}
}
}
free(fh);
return;
}
#elif (ghost_width == 4)
/* ---- 6th-order lopsided --------------------------------------------- */
{
const int ord = 4;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -3;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -3;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -3;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
const double d60dx = ONE / F60 / dX;
const double d60dy = ONE / F60 / dY;
const double d60dz = ONE / F60 / dZ;
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 3;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 3;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 3;
const size_t p = idx_ex(i0, j0, k0, ex);
/* ---- x-direction ---- */
const double sfx = Sfx[p];
if (sfx > ZEO) {
/* Primary biased: 2*f(i-2)-24*f(i-1)-35*f(i)+80*f(i+1)-30*f(i+2)+8*f(i+3)-f(i+4) */
if (i0 <= ex1-5 && (i0-1)>=iminF) // i+4<=imax && i-2>=imin
f_rhs[p] += sfx * d60dx * (
+F2*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-F24*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
-F30*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]
-fh[idx_fh_F_ord4(iF+4,jF,kF,ex)]);
/* Boundary-adapted: -10*f(i-1)-77*f(i)+150*f(i+1)-100*f(i+2)+50*f(i+3)-15*f(i+4)+2*f(i+5) */
else if (i0 <= ex1-6 && i0 >= iminF) // i+5<=imax && i-1>=imin
f_rhs[p] += sfx * d60dx * (
-F10*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]
+F150*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F100*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]
+F50*fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]-F15*fh[idx_fh_F_ord4(iF+4,jF,kF,ex)]
+F2*fh[idx_fh_F_ord4(iF+5,jF,kF,ex)]);
/* Centered fallbacks */
else if (i0 <= ex1-4 && (i0-2)>=iminF) // 6th: i+3<=imax && i-3>=imin
f_rhs[p] += sfx * d60dx * (
-fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]
-F45*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
-F9*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]);
else if (i0 <= ex1-3 && (i0-1)>=iminF) // 4th
f_rhs[p] += sfx * d12dx * (
fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
+EIT*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]);
else if (i0 <= ex1-2 && i0>=iminF) // 2nd
f_rhs[p] += sfx * d2dx * (
-fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
} else if (sfx < ZEO) {
if ((i0-4)>=iminF && i0<=ex1-2) // i-4>=imin && i+2<=imax
f_rhs[p] -= sfx * d60dx * (
+F2*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]-F24*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
-F30*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]
-fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]);
else if ((i0-5)>=iminF && i0<=ex1-2) // i-5>=imin && i+1<=imax
f_rhs[p] -= sfx * d60dx * (
-F10*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]
+F150*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F100*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]
+F50*fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]-F15*fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]
+F2*fh[idx_fh_F_ord4(iF-5,jF,kF,ex)]);
else if ((i0-3)>=iminF && i0<=ex1-2) // 6th centered
f_rhs[p] -= sfx * d60dx * (
-fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]
-F45*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
-F9*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]);
else if ((i0-2)>=iminF && i0<=ex1-2) // 4th
f_rhs[p] -= sfx * d12dx * (
fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
+EIT*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]);
else if ((i0-1)>=iminF && i0<=ex1-2) // 2nd
f_rhs[p] -= sfx * d2dx * (
-fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
}
/* ---- y-direction ---- */
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0<=ex2-5 && (j0-1)>=jminF)
f_rhs[p] += sfy * d60dy*(F2*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F24*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F30*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+4,kF,ex)]);
else if (j0<=ex2-6 && j0>=jminF)
f_rhs[p] += sfy * d60dy*(-F10*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F100*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+F50*fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]-F15*fh[idx_fh_F_ord4(iF,jF+4,kF,ex)]+F2*fh[idx_fh_F_ord4(iF,jF+5,kF,ex)]);
else if (j0<=ex2-4 && (j0-2)>=jminF)
f_rhs[p] += sfy * d60dy*(-fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]);
else if (j0<=ex2-3 && (j0-1)>=jminF)
f_rhs[p] += sfy * d12dy*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]);
else if (j0<=ex2-2 && j0>=jminF)
f_rhs[p] += sfy * d2dy*(-fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
} else if (sfy < ZEO) {
if ((j0-4)>=jminF && j0<=ex2-2)
f_rhs[p] -= sfy * d60dy*(F2*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]-F24*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F30*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]-fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]);
else if ((j0-5)>=jminF && j0<=ex2-2)
f_rhs[p] -= sfy * d60dy*(-F10*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F100*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+F50*fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]-F15*fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]+F2*fh[idx_fh_F_ord4(iF,jF-5,kF,ex)]);
else if ((j0-3)>=jminF && j0<=ex2-2)
f_rhs[p] -= sfy * d60dy*(-fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]);
else if ((j0-2)>=jminF && j0<=ex2-2)
f_rhs[p] -= sfy * d12dy*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]);
else if ((j0-1)>=jminF && j0<=ex2-2)
f_rhs[p] -= sfy * d2dy*(-fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
}
/* ---- z-direction ---- */
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0<=ex3-5 && (k0-1)>=kminF)
f_rhs[p] += sfz * d60dz*(F2*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F24*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F30*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+4,ex)]);
else if (k0<=ex3-6 && k0>=kminF)
f_rhs[p] += sfz * d60dz*(-F10*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F100*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+F50*fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]-F15*fh[idx_fh_F_ord4(iF,jF,kF+4,ex)]+F2*fh[idx_fh_F_ord4(iF,jF,kF+5,ex)]);
else if (k0<=ex3-4 && (k0-2)>=kminF)
f_rhs[p] += sfz * d60dz*(-fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]);
else if (k0<=ex3-3 && (k0-1)>=kminF)
f_rhs[p] += sfz * d12dz*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]);
else if (k0<=ex3-2 && k0>=kminF)
f_rhs[p] += sfz * d2dz*(-fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
} else if (sfz < ZEO) {
if ((k0-4)>=kminF && k0<=ex3-2)
f_rhs[p] -= sfz * d60dz*(F2*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]-F24*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F30*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]-fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]);
else if ((k0-5)>=kminF && k0<=ex3-2)
f_rhs[p] -= sfz * d60dz*(-F10*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F100*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+F50*fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]-F15*fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]+F2*fh[idx_fh_F_ord4(iF,jF,kF-5,ex)]);
else if ((k0-3)>=kminF && k0<=ex3-2)
f_rhs[p] -= sfz * d60dz*(-fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]);
else if ((k0-2)>=kminF && k0<=ex3-2)
f_rhs[p] -= sfz * d12dz*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]);
else if ((k0-1)>=kminF && k0<=ex3-2)
f_rhs[p] -= sfz * d2dz*(-fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
}
}
}
}
free(fh);
return;
}
#elif (ghost_width == 5)
/* ---- 8th-order lopsided --------------------------------------------- */
{
const int ord = 5;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -4;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -4;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -4;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
const double d840dx = ONE / F840 / dX;
const double d840dy = ONE / F840 / dY;
const double d840dz = ONE / F840 / dZ;
const double d60dx = ONE / F60 / dX;
const double d60dy = ONE / F60 / dY;
const double d60dz = ONE / F60 / dZ;
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 4;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 4;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 4;
const size_t p = idx_ex(i0, j0, k0, ex);
const double sfx = Sfx[p];
if (sfx > ZEO) {
/* 8th biased: -5*f(i-3)+60*f(i-2)-420*f(i-1)-378*f(i)+1050*f(i+1)-420*f(i+2)+140*f(i+3)-30*f(i+4)+3*f(i+5) */
if (i0 <= ex1-6 && (i0-2)>=iminF) // i+5<=imax && i-3>=imin
f_rhs[p] += sfx * d840dx * (
-F5*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F60*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
-F420*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]
+F1050*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F420*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
+F140*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F30*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]
+F3*fh[idx_fh_F_ord5(iF+5,jF,kF,ex)]);
/* 8th centered: +3*f(i-4)-32*f(i-3)+168*f(i-2)-672*f(i-1)+672*f(i+1)-168*f(i+2)+32*f(i+3)-3*f(i+4) */
else if (i0 <= ex1-5 && (i0-3)>=iminF)
f_rhs[p] += sfx * d840dx * (
+F3*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]-F32*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]
+F168*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-F672*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
+F672*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F168*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
+F32*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F3*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]);
else if (i0 <= ex1-4 && (i0-2)>=iminF) // 6th centered
f_rhs[p] += sfx * d60dx * (
-fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
-F45*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]
-F9*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]);
else if (i0 <= ex1-3 && (i0-1)>=iminF) // 4th centered
f_rhs[p] += sfx * d12dx * (
fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
+EIT*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]);
else if (i0 <= ex1-2 && i0>=iminF) // 2nd centered
f_rhs[p] += sfx * d2dx * (
-fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]);
} else if (sfx < ZEO) {
if ((i0-5)>=iminF && i0<=ex1-2) // i-5>=imin && i+3<=imax
f_rhs[p] -= sfx * d840dx * (
-F5*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]+F60*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
-F420*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]
+F1050*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]-F420*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
+F140*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]-F30*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]
+F3*fh[idx_fh_F_ord5(iF-5,jF,kF,ex)]);
else if ((i0-4)>=iminF && i0<=ex1-2) // 8th centered
f_rhs[p] -= sfx * d840dx * (
+F3*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]-F32*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]
+F168*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-F672*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
+F672*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F168*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
+F32*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F3*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]);
else if ((i0-3)>=iminF && i0<=ex1-2) // 6th centered
f_rhs[p] -= sfx * d60dx * (
-fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
-F45*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]
-F9*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]);
else if ((i0-2)>=iminF && i0<=ex1-2) // 4th centered
f_rhs[p] -= sfx * d12dx * (
fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
+EIT*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]);
else if ((i0-1)>=iminF && i0<=ex1-2) // 2nd centered
f_rhs[p] -= sfx * d2dx * (
-fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]);
}
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0<=ex2-6 && (j0-2)>=jminF)
f_rhs[p] += sfy * d840dy*(-F5*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F60*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F140*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F30*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]+F3*fh[idx_fh_F_ord5(iF,jF+5,kF,ex)]);
else if (j0<=ex2-5 && (j0-3)>=jminF)
f_rhs[p] += sfy * d840dy*(+F3*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]-F32*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F168*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F672*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F672*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F168*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F32*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F3*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]);
else if (j0<=ex2-4 && (j0-2)>=jminF)
f_rhs[p] += sfy * d60dy*(-fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]);
else if (j0<=ex2-3 && (j0-1)>=jminF)
f_rhs[p] += sfy * d12dy*(fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]);
else if (j0<=ex2-2 && j0>=jminF)
f_rhs[p] += sfy * d2dy*(-fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]);
} else if (sfy < ZEO) {
if ((j0-5)>=jminF && j0<=ex2-2)
f_rhs[p] -= sfy * d840dy*(-F5*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]+F60*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]+F140*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]-F30*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]+F3*fh[idx_fh_F_ord5(iF,jF-5,kF,ex)]);
else if ((j0-4)>=jminF && j0<=ex2-2)
f_rhs[p] -= sfy * d840dy*(+F3*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]-F32*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F168*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F672*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F672*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F168*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F32*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F3*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]);
else if ((j0-3)>=jminF && j0<=ex2-2)
f_rhs[p] -= sfy * d60dy*(-fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]);
else if ((j0-2)>=jminF && j0<=ex2-2)
f_rhs[p] -= sfy * d12dy*(fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]);
else if ((j0-1)>=jminF && j0<=ex2-2)
f_rhs[p] -= sfy * d2dy*(-fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]);
}
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0<=ex3-6 && (k0-2)>=kminF)
f_rhs[p] += sfz * d840dz*(-F5*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F60*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F140*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F30*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]+F3*fh[idx_fh_F_ord5(iF,jF,kF+5,ex)]);
else if (k0<=ex3-5 && (k0-3)>=kminF)
f_rhs[p] += sfz * d840dz*(+F3*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]-F32*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F168*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F672*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F672*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F168*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F32*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F3*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]);
else if (k0<=ex3-4 && (k0-2)>=kminF)
f_rhs[p] += sfz * d60dz*(-fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]);
else if (k0<=ex3-3 && (k0-1)>=kminF)
f_rhs[p] += sfz * d12dz*(fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]);
else if (k0<=ex3-2 && k0>=kminF)
f_rhs[p] += sfz * d2dz*(-fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]);
} else if (sfz < ZEO) {
if ((k0-5)>=kminF && k0<=ex3-2)
f_rhs[p] -= sfz * d840dz*(-F5*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]+F60*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]+F140*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]-F30*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]+F3*fh[idx_fh_F_ord5(iF,jF,kF-5,ex)]);
else if ((k0-4)>=kminF && k0<=ex3-2)
f_rhs[p] -= sfz * d840dz*(+F3*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]-F32*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F168*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F672*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F672*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F168*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F32*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F3*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]);
else if ((k0-3)>=kminF && k0<=ex3-2)
f_rhs[p] -= sfz * d60dz*(-fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]);
else if ((k0-2)>=kminF && k0<=ex3-2)
f_rhs[p] -= sfz * d12dz*(fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]);
else if ((k0-1)>=kminF && k0<=ex3-2)
f_rhs[p] -= sfz * d2dz*(-fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]);
}
}
}
}
free(fh);
return;
}
#else
#error "lopsided_c.C: unsupported ghost_width (must be 2, 3, 4, or 5)"
#endif
}