#include "tool.h" void fdderivs(const int ex[3], const double *f, double *fxx, double *fxy, double *fxz, double *fyy, double *fyz, double *fzz, const double *X, const double *Y, const double *Z, double SYM1, double SYM2, double SYM3, int Symmetry, int onoff) { (void)onoff; const int NO_SYMM = 0, EQ_SYMM = 1; const double ZEO = 0.0, ONE = 1.0, TWO = 2.0; const double F1o4 = 2.5e-1; // 1/4 const double F8 = 8.0; const double F16 = 16.0; const double F30 = 30.0; const double F1o12 = ONE / 12.0; const double F1o144 = ONE / 144.0; const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2]; const double dX = X[1] - X[0]; const double dY = Y[1] - Y[0]; const double dZ = Z[1] - Z[0]; const int imaxF = ex1; const int jmaxF = ex2; const int kmaxF = ex3; int iminF = 1, jminF = 1, kminF = 1; if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1; if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1; if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1; const double SoA[3] = { SYM1, SYM2, SYM3 }; /* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */ const size_t nx = (size_t)ex1 + 2; const size_t ny = (size_t)ex2 + 2; const size_t nz = (size_t)ex3 + 2; const size_t fh_size = nx * ny * nz; static double *fh = NULL; static size_t cap = 0; if (fh_size > cap) { free(fh); fh = (double*)aligned_alloc(64, fh_size * sizeof(double)); cap = fh_size; } // double *fh = (double*)malloc(fh_size * sizeof(double)); if (!fh) return; symmetry_bd(2, ex, f, fh, SoA); /* 系数:按 Fortran 原式 */ const double Sdxdx = ONE / (dX * dX); const double Sdydy = ONE / (dY * dY); const double Sdzdz = ONE / (dZ * dZ); const double Fdxdx = F1o12 / (dX * dX); const double Fdydy = F1o12 / (dY * dY); const double Fdzdz = F1o12 / (dZ * dZ); const double Sdxdy = F1o4 / (dX * dY); const double Sdxdz = F1o4 / (dX * dZ); const double Sdydz = F1o4 / (dY * dZ); const double Fdxdy = F1o144 / (dX * dY); const double Fdxdz = F1o144 / (dX * dZ); const double Fdydz = F1o144 / (dY * dZ); /* 只清零不被主循环覆盖的边界面 */ { /* 高边界:k0=ex3-1 */ for (int j0 = 0; j0 < ex2; ++j0) for (int i0 = 0; i0 < ex1; ++i0) { const size_t p = idx_ex(i0, j0, ex3 - 1, ex); fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO; fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO; } /* 高边界:j0=ex2-1 */ for (int k0 = 0; k0 < ex3 - 1; ++k0) for (int i0 = 0; i0 < ex1; ++i0) { const size_t p = idx_ex(i0, ex2 - 1, k0, ex); fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO; fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO; } /* 高边界:i0=ex1-1 */ for (int k0 = 0; k0 < ex3 - 1; ++k0) for (int j0 = 0; j0 < ex2 - 1; ++j0) { const size_t p = idx_ex(ex1 - 1, j0, k0, ex); fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO; fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO; } /* 低边界:当二阶模板也不可用时,对应 i0/j0/k0=0 面 */ if (kminF == 1) { for (int j0 = 0; j0 < ex2; ++j0) for (int i0 = 0; i0 < ex1; ++i0) { const size_t p = idx_ex(i0, j0, 0, ex); fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO; fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO; } } if (jminF == 1) { for (int k0 = 0; k0 < ex3; ++k0) for (int i0 = 0; i0 < ex1; ++i0) { const size_t p = idx_ex(i0, 0, k0, ex); fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO; fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO; } } if (iminF == 1) { for (int k0 = 0; k0 < ex3; ++k0) for (int j0 = 0; j0 < ex2; ++j0) { const size_t p = idx_ex(0, j0, k0, ex); fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO; fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO; } } } /* * 两段式: * 1) 二阶可用区域先计算二阶模板 * 2) 高阶可用区域再覆盖四阶模板 */ const int i2_lo = (iminF > 0) ? iminF : 0; const int j2_lo = (jminF > 0) ? jminF : 0; const int k2_lo = (kminF > 0) ? kminF : 0; const int i2_hi = ex1 - 2; const int j2_hi = ex2 - 2; const int k2_hi = ex3 - 2; const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0; const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0; const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0; const int i4_hi = ex1 - 3; const int j4_hi = ex2 - 3; const int k4_hi = ex3 - 3; if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) { for (int k0 = k2_lo; k0 <= k2_hi; ++k0) { const int kF = k0 + 1; for (int j0 = j2_lo; j0 <= j2_hi; ++j0) { const int jF = j0 + 1; for (int i0 = i2_lo; i0 <= i2_hi; ++i0) { const int iF = i0 + 1; const size_t p = idx_ex(i0, j0, k0, ex); fxx[p] = Sdxdx * ( fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] - TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] + fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] ); fyy[p] = Sdydy * ( fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] - TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] + fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] ); fzz[p] = Sdzdz * ( fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] - TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] + fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] ); fxy[p] = Sdxdy * ( fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] - fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] - fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] + fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)] ); fxz[p] = Sdxdz * ( fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] - fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] - fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] + fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)] ); fyz[p] = Sdydz * ( fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] - fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] - fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] + fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)] ); } } } } if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) { for (int k0 = k4_lo; k0 <= k4_hi; ++k0) { const int kF = k0 + 1; for (int j0 = j4_lo; j0 <= j4_hi; ++j0) { const int jF = j0 + 1; for (int i0 = i4_lo; i0 <= i4_hi; ++i0) { const int iF = i0 + 1; const size_t p = idx_ex(i0, j0, k0, ex); fxx[p] = Fdxdx * ( -fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] + F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] - F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] - fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] + F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] ); fyy[p] = Fdydy * ( -fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] + F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] - F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] - fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] + F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] ); fzz[p] = Fdzdz * ( -fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] + F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] - F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] - fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] + F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] ); { const double t_jm2 = ( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)] -F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)] +F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)] - fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] ); const double t_jm1 = ( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)] -F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] +F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] - fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] ); const double t_jp1 = ( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)] -F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)] - fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] ); const double t_jp2 = ( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)] -F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)] +F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)] - fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] ); fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 ); } { const double t_km2 = ( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)] -F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)] +F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)] - fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] ); const double t_km1 = ( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)] -F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] +F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] - fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] ); const double t_kp1 = ( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)] -F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)] - fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] ); const double t_kp2 = ( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)] -F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)] +F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)] - fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] ); fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 ); } { const double t_km2 = ( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)] -F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)] +F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)] - fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] ); const double t_km1 = ( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)] -F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] +F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] - fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] ); const double t_kp1 = ( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)] -F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)] - fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] ); const double t_kp2 = ( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)] -F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)] +F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)] - fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] ); fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 ); } } } } } // free(fh); }