Add full FD order support (2nd/4th/6th/8th) to C derivative kernels via ghost_width dispatch
Wrap each C kernel in #if (ghost_width == N) blocks matching Fortran stencil coefficients from diff_new.f90, kodiss.f90, and lopsidediff.f90. Add fast-path indexing for ord=1,4,5 in share_func.h. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -46,6 +46,45 @@ static inline size_t idx_fh_F(int iF, int jF, int kF, const int ex[3]) {
|
||||
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||
}
|
||||
|
||||
/*
|
||||
* fh 对应 Fortran: fh(0:ex1, 0:ex2, 0:ex3)
|
||||
* ord=1 => shift=0
|
||||
* iF/jF/kF 为 Fortran 索引 (0..ex)
|
||||
*/
|
||||
static inline size_t idx_fh_F_ord1(int iF, int jF, int kF, const int ex[3]) {
|
||||
const int nx = ex[0] + 1; // ex1 + ord
|
||||
const int ny = ex[1] + 1;
|
||||
return (size_t)iF + (size_t)jF * (size_t)nx + (size_t)kF * (size_t)nx * (size_t)ny;
|
||||
}
|
||||
|
||||
/*
|
||||
* fh 对应 Fortran: fh(-3:ex1, -3:ex2, -3:ex3)
|
||||
* ord=4 => shift=3
|
||||
*/
|
||||
static inline size_t idx_fh_F_ord4(int iF, int jF, int kF, const int ex[3]) {
|
||||
const int shift = 3;
|
||||
const int nx = ex[0] + 4; // ex1 + ord
|
||||
const int ny = ex[1] + 4;
|
||||
const int ii = iF + shift; // 0..ex1+3
|
||||
const int jj = jF + shift; // 0..ex2+3
|
||||
const int kk = kF + shift; // 0..ex3+3
|
||||
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||
}
|
||||
|
||||
/*
|
||||
* fh 对应 Fortran: fh(-4:ex1, -4:ex2, -4:ex3)
|
||||
* ord=5 => shift=4
|
||||
*/
|
||||
static inline size_t idx_fh_F_ord5(int iF, int jF, int kF, const int ex[3]) {
|
||||
const int shift = 4;
|
||||
const int nx = ex[0] + 5; // ex1 + ord
|
||||
const int ny = ex[1] + 5;
|
||||
const int ii = iF + shift; // 0..ex1+4
|
||||
const int jj = jF + shift; // 0..ex2+4
|
||||
const int kk = kF + shift; // 0..ex3+4
|
||||
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||
}
|
||||
|
||||
/*
|
||||
* func: (1..extc1, 1..extc2, 1..extc3) 1-based in Fortran
|
||||
* funcc: (-ord+1..extc1, -ord+1..extc2, -ord+1..extc3) in Fortran
|
||||
@@ -231,7 +270,10 @@ static inline void symmetry_bd(int ord,
|
||||
{
|
||||
if (ord <= 0) return;
|
||||
|
||||
/* Fast paths used by current C kernels: ord=2 (derivs), ord=3 (lopsided/KO). */
|
||||
if (ord == 1) {
|
||||
symmetry_bd_impl(1, 0, extc, func, funcc, SoA);
|
||||
return;
|
||||
}
|
||||
if (ord == 2) {
|
||||
symmetry_bd_impl(2, 1, extc, func, funcc, SoA);
|
||||
return;
|
||||
@@ -240,6 +282,10 @@ static inline void symmetry_bd(int ord,
|
||||
symmetry_bd_impl(3, 2, extc, func, funcc, SoA);
|
||||
return;
|
||||
}
|
||||
if (ord == 4) {
|
||||
symmetry_bd_impl(4, 3, extc, func, funcc, SoA);
|
||||
return;
|
||||
}
|
||||
|
||||
symmetry_bd_impl(ord, ord - 1, extc, func, funcc, SoA);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user