[TEST]UPSTREAM: Pick some source changes from 48080d0a97
* Sync new folder structure
This commit is contained in:
76
AMSS_NCKU_source/Derivative/derivatives.h
Normal file
76
AMSS_NCKU_source/Derivative/derivatives.h
Normal file
@@ -0,0 +1,76 @@
|
||||
|
||||
#ifndef DERIVATIVES
|
||||
#define DERIVATIVES
|
||||
|
||||
#ifdef fortran1
|
||||
#define f_fderivs fderivs
|
||||
#define f_fderivs_sh fderivs_sh
|
||||
#define f_fderivs_shc fderivs_shc
|
||||
#define f_fdderivs_shc fdderivs_shc
|
||||
#define f_fdderivs fdderivs
|
||||
#endif
|
||||
#ifdef fortran2
|
||||
#define f_fderivs FDERIVS
|
||||
#define f_fderivs_sh FDERIVS_SH
|
||||
#define f_fderivs_shc FDERIVS_SHC
|
||||
#define f_fdderivs_shc FDDERIVS_SHC
|
||||
#define f_fdderivs FDDERIVS
|
||||
#endif
|
||||
#ifdef fortran3
|
||||
#define f_fderivs fderivs_
|
||||
#define f_fderivs_sh fderivs_sh_
|
||||
#define f_fderivs_shc fderivs_shc_
|
||||
#define f_fdderivs_shc fdderivs_shc_
|
||||
#define f_fdderivs fdderivs_
|
||||
#endif
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_fderivs(int *, double *,
|
||||
double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
double &, double &, double &, int &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_fderivs_sh(int *, double *,
|
||||
double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
double &, double &, double &, int &, int &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_fderivs_shc(int *, double *,
|
||||
double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
double &, double &, double &, int &, int &, int &,
|
||||
double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
double *, double *, double *);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_fdderivs_shc(int *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
double &, double &, double &, int &, int &, int &,
|
||||
double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *, double *, double *, double *);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_fdderivs(int *, double *,
|
||||
double *, double *, double *, double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
double &, double &, double &, int &, int &);
|
||||
}
|
||||
|
||||
#endif /* DERIVATIVES */
|
||||
4307
AMSS_NCKU_source/Derivative/diff_new.f90
Normal file
4307
AMSS_NCKU_source/Derivative/diff_new.f90
Normal file
File diff suppressed because it is too large
Load Diff
4777
AMSS_NCKU_source/Derivative/diff_new_sh.f90
Normal file
4777
AMSS_NCKU_source/Derivative/diff_new_sh.f90
Normal file
File diff suppressed because it is too large
Load Diff
4997
AMSS_NCKU_source/Derivative/diff_newwb.f90
Normal file
4997
AMSS_NCKU_source/Derivative/diff_newwb.f90
Normal file
File diff suppressed because it is too large
Load Diff
332
AMSS_NCKU_source/Derivative/fdderivs_c.C
Normal file
332
AMSS_NCKU_source/Derivative/fdderivs_c.C
Normal file
@@ -0,0 +1,332 @@
|
||||
#include "tool.h"
|
||||
void fdderivs(const int ex[3],
|
||||
const double *f,
|
||||
double *fxx, double *fxy, double *fxz,
|
||||
double *fyy, double *fyz, double *fzz,
|
||||
const double *X, const double *Y, const double *Z,
|
||||
double SYM1, double SYM2, double SYM3,
|
||||
int Symmetry, int onoff)
|
||||
{
|
||||
(void)onoff;
|
||||
|
||||
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
|
||||
const double F1o4 = 2.5e-1; // 1/4
|
||||
const double F8 = 8.0;
|
||||
const double F16 = 16.0;
|
||||
const double F30 = 30.0;
|
||||
const double F1o12 = ONE / 12.0;
|
||||
const double F1o144 = ONE / 144.0;
|
||||
|
||||
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||
|
||||
const double dX = X[1] - X[0];
|
||||
const double dY = Y[1] - Y[0];
|
||||
const double dZ = Z[1] - Z[0];
|
||||
|
||||
const int imaxF = ex1;
|
||||
const int jmaxF = ex2;
|
||||
const int kmaxF = ex3;
|
||||
|
||||
int iminF = 1, jminF = 1, kminF = 1;
|
||||
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
||||
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
||||
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
||||
|
||||
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
||||
|
||||
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
|
||||
const size_t nx = (size_t)ex1 + 2;
|
||||
const size_t ny = (size_t)ex2 + 2;
|
||||
const size_t nz = (size_t)ex3 + 2;
|
||||
const size_t fh_size = nx * ny * nz;
|
||||
|
||||
static double *fh = NULL;
|
||||
static size_t cap = 0;
|
||||
|
||||
if (fh_size > cap) {
|
||||
free(fh);
|
||||
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||
cap = fh_size;
|
||||
}
|
||||
// double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||
if (!fh) return;
|
||||
|
||||
symmetry_bd(2, ex, f, fh, SoA);
|
||||
|
||||
/* 系数:按 Fortran 原式 */
|
||||
const double Sdxdx = ONE / (dX * dX);
|
||||
const double Sdydy = ONE / (dY * dY);
|
||||
const double Sdzdz = ONE / (dZ * dZ);
|
||||
|
||||
const double Fdxdx = F1o12 / (dX * dX);
|
||||
const double Fdydy = F1o12 / (dY * dY);
|
||||
const double Fdzdz = F1o12 / (dZ * dZ);
|
||||
|
||||
const double Sdxdy = F1o4 / (dX * dY);
|
||||
const double Sdxdz = F1o4 / (dX * dZ);
|
||||
const double Sdydz = F1o4 / (dY * dZ);
|
||||
|
||||
const double Fdxdy = F1o144 / (dX * dY);
|
||||
const double Fdxdz = F1o144 / (dX * dZ);
|
||||
const double Fdydz = F1o144 / (dY * dZ);
|
||||
|
||||
/* 只清零不被主循环覆盖的边界面 */
|
||||
{
|
||||
/* 高边界:k0=ex3-1 */
|
||||
for (int j0 = 0; j0 < ex2; ++j0)
|
||||
for (int i0 = 0; i0 < ex1; ++i0) {
|
||||
const size_t p = idx_ex(i0, j0, ex3 - 1, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
/* 高边界:j0=ex2-1 */
|
||||
for (int k0 = 0; k0 < ex3 - 1; ++k0)
|
||||
for (int i0 = 0; i0 < ex1; ++i0) {
|
||||
const size_t p = idx_ex(i0, ex2 - 1, k0, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
/* 高边界:i0=ex1-1 */
|
||||
for (int k0 = 0; k0 < ex3 - 1; ++k0)
|
||||
for (int j0 = 0; j0 < ex2 - 1; ++j0) {
|
||||
const size_t p = idx_ex(ex1 - 1, j0, k0, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
|
||||
/* 低边界:当二阶模板也不可用时,对应 i0/j0/k0=0 面 */
|
||||
if (kminF == 1) {
|
||||
for (int j0 = 0; j0 < ex2; ++j0)
|
||||
for (int i0 = 0; i0 < ex1; ++i0) {
|
||||
const size_t p = idx_ex(i0, j0, 0, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
}
|
||||
if (jminF == 1) {
|
||||
for (int k0 = 0; k0 < ex3; ++k0)
|
||||
for (int i0 = 0; i0 < ex1; ++i0) {
|
||||
const size_t p = idx_ex(i0, 0, k0, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
}
|
||||
if (iminF == 1) {
|
||||
for (int k0 = 0; k0 < ex3; ++k0)
|
||||
for (int j0 = 0; j0 < ex2; ++j0) {
|
||||
const size_t p = idx_ex(0, j0, k0, ex);
|
||||
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
||||
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* 两段式:
|
||||
* 1) 二阶可用区域先计算二阶模板
|
||||
* 2) 高阶可用区域再覆盖四阶模板
|
||||
*/
|
||||
const int i2_lo = (iminF > 0) ? iminF : 0;
|
||||
const int j2_lo = (jminF > 0) ? jminF : 0;
|
||||
const int k2_lo = (kminF > 0) ? kminF : 0;
|
||||
const int i2_hi = ex1 - 2;
|
||||
const int j2_hi = ex2 - 2;
|
||||
const int k2_hi = ex3 - 2;
|
||||
|
||||
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
||||
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
||||
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
||||
const int i4_hi = ex1 - 3;
|
||||
const int j4_hi = ex2 - 3;
|
||||
const int k4_hi = ex3 - 3;
|
||||
|
||||
/*
|
||||
* Strategy A:
|
||||
* Avoid redundant work in overlap of 2nd/4th-order regions.
|
||||
* Only compute 2nd-order on shell points that are NOT overwritten by
|
||||
* the 4th-order pass.
|
||||
*/
|
||||
const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi);
|
||||
|
||||
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
||||
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
||||
if (has4 &&
|
||||
i0 >= i4_lo && i0 <= i4_hi &&
|
||||
j0 >= j4_lo && j0 <= j4_hi &&
|
||||
k0 >= k4_lo && k0 <= k4_hi) {
|
||||
continue;
|
||||
}
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
|
||||
fxx[p] = Sdxdx * (
|
||||
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||
);
|
||||
|
||||
fyy[p] = Sdydy * (
|
||||
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||
);
|
||||
|
||||
fzz[p] = Sdzdz * (
|
||||
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||
);
|
||||
|
||||
fxy[p] = Sdxdy * (
|
||||
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
|
||||
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||
);
|
||||
|
||||
fxz[p] = Sdxdz * (
|
||||
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
|
||||
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
|
||||
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
|
||||
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||
);
|
||||
|
||||
fyz[p] = Sdydz * (
|
||||
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
|
||||
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
|
||||
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
|
||||
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (has4) {
|
||||
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
|
||||
fxx[p] = Fdxdx * (
|
||||
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||
);
|
||||
|
||||
fyy[p] = Fdydy * (
|
||||
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||
);
|
||||
|
||||
fzz[p] = Fdzdz * (
|
||||
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
|
||||
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||
);
|
||||
|
||||
{
|
||||
const double t_jm2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
|
||||
|
||||
const double t_jm1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
|
||||
|
||||
const double t_jp1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
|
||||
|
||||
const double t_jp2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
|
||||
|
||||
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
|
||||
}
|
||||
|
||||
{
|
||||
const double t_km2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
|
||||
|
||||
const double t_km1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
|
||||
|
||||
const double t_kp1 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
|
||||
|
||||
const double t_kp2 =
|
||||
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
|
||||
|
||||
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||
}
|
||||
|
||||
{
|
||||
const double t_km2 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
|
||||
|
||||
const double t_km1 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
|
||||
|
||||
const double t_kp1 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
|
||||
|
||||
const double t_kp2 =
|
||||
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
|
||||
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
|
||||
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
|
||||
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
|
||||
|
||||
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// free(fh);
|
||||
}
|
||||
167
AMSS_NCKU_source/Derivative/fderivs_c.C
Normal file
167
AMSS_NCKU_source/Derivative/fderivs_c.C
Normal file
@@ -0,0 +1,167 @@
|
||||
#include "tool.h"
|
||||
|
||||
/*
|
||||
* C 版 fderivs
|
||||
*
|
||||
* Fortran:
|
||||
* subroutine fderivs(ex,f,fx,fy,fz,X,Y,Z,SYM1,SYM2,SYM3,symmetry,onoff)
|
||||
*
|
||||
* 约定:
|
||||
* f, fx, fy, fz: ex1*ex2*ex3,按 idx_ex 布局
|
||||
* X: ex1, Y: ex2, Z: ex3
|
||||
*/
|
||||
void fderivs(const int ex[3],
|
||||
const double *f,
|
||||
double *fx, double *fy, double *fz,
|
||||
const double *X, const double *Y, const double *Z,
|
||||
double SYM1, double SYM2, double SYM3,
|
||||
int Symmetry, int onoff)
|
||||
{
|
||||
(void)onoff; // Fortran 里没用到
|
||||
|
||||
const double ZEO = 0.0, ONE = 1.0;
|
||||
const double TWO = 2.0, EIT = 8.0;
|
||||
const double F12 = 12.0;
|
||||
|
||||
const int NO_SYMM = 0, EQ_SYMM = 1; // OCTANT=2 在本子程序里不直接用
|
||||
|
||||
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||
|
||||
// dX = X(2)-X(1) -> C: X[1]-X[0]
|
||||
const double dX = X[1] - X[0];
|
||||
const double dY = Y[1] - Y[0];
|
||||
const double dZ = Z[1] - Z[0];
|
||||
|
||||
// Fortran 1-based bounds
|
||||
const int imaxF = ex1;
|
||||
const int jmaxF = ex2;
|
||||
const int kmaxF = ex3;
|
||||
|
||||
int iminF = 1, jminF = 1, kminF = 1;
|
||||
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
||||
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
||||
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
||||
|
||||
// SoA(1:3) = SYM1,SYM2,SYM3
|
||||
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
||||
|
||||
// fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2
|
||||
const size_t nx = (size_t)ex1 + 2;
|
||||
const size_t ny = (size_t)ex2 + 2;
|
||||
const size_t nz = (size_t)ex3 + 2;
|
||||
const size_t fh_size = nx * ny * nz;
|
||||
static double *fh = NULL;
|
||||
static size_t cap = 0;
|
||||
|
||||
if (fh_size > cap) {
|
||||
free(fh);
|
||||
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||
cap = fh_size;
|
||||
}
|
||||
// double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||
if (!fh) return;
|
||||
|
||||
// call symmetry_bd(2,ex,f,fh,SoA)
|
||||
symmetry_bd(2, ex, f, fh, SoA);
|
||||
|
||||
const double d12dx = ONE / F12 / dX;
|
||||
const double d12dy = ONE / F12 / dY;
|
||||
const double d12dz = ONE / F12 / dZ;
|
||||
|
||||
const double d2dx = ONE / TWO / dX;
|
||||
const double d2dy = ONE / TWO / dY;
|
||||
const double d2dz = ONE / TWO / dZ;
|
||||
|
||||
// fx = fy = fz = 0
|
||||
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||
for (size_t p = 0; p < all; ++p) {
|
||||
fx[p] = ZEO;
|
||||
fy[p] = ZEO;
|
||||
fz[p] = ZEO;
|
||||
}
|
||||
|
||||
/*
|
||||
* 两段式:
|
||||
* 1) 先在二阶可用区域计算二阶模板
|
||||
* 2) 再在高阶可用区域覆盖为四阶模板
|
||||
*
|
||||
* 与原 if/elseif 逻辑等价,但减少逐点分支判断。
|
||||
*/
|
||||
const int i2_lo = (iminF > 0) ? iminF : 0;
|
||||
const int j2_lo = (jminF > 0) ? jminF : 0;
|
||||
const int k2_lo = (kminF > 0) ? kminF : 0;
|
||||
const int i2_hi = ex1 - 2;
|
||||
const int j2_hi = ex2 - 2;
|
||||
const int k2_hi = ex3 - 2;
|
||||
|
||||
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
||||
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
||||
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
||||
const int i4_hi = ex1 - 3;
|
||||
const int j4_hi = ex2 - 3;
|
||||
const int k4_hi = ex3 - 3;
|
||||
|
||||
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
||||
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
|
||||
fx[p] = d2dx * (
|
||||
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||
);
|
||||
|
||||
fy[p] = d2dy * (
|
||||
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
||||
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||
);
|
||||
|
||||
fz[p] = d2dz * (
|
||||
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
||||
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
||||
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
||||
const int kF = k0 + 1;
|
||||
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
||||
const int jF = j0 + 1;
|
||||
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
||||
const int iF = i0 + 1;
|
||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||
|
||||
fx[p] = d12dx * (
|
||||
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
|
||||
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
|
||||
);
|
||||
|
||||
fy[p] = d12dy * (
|
||||
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
|
||||
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
||||
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
|
||||
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
|
||||
);
|
||||
|
||||
fz[p] = d12dz * (
|
||||
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
|
||||
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
||||
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
|
||||
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// free(fh);
|
||||
}
|
||||
5287
AMSS_NCKU_source/Derivative/point_diff_new_sh.f90
Normal file
5287
AMSS_NCKU_source/Derivative/point_diff_new_sh.f90
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user