895 lines
49 KiB
C
895 lines
49 KiB
C
#include "macrodef.h"
|
|
#include "tool.h"
|
|
|
|
/*
|
|
* C 版 fdderivs — second derivatives d2f/dx2, d2f/dxdy, d2f/dxdz, d2f/dy2, d2f/dydz, d2f/dz2.
|
|
*
|
|
* Finite difference order selected at compile time via ghost_width macro.
|
|
* Multi-pass skip strategy: lowest-order computes widest region while skipping
|
|
* the union of higher-order regions, then each higher pass overwrites its interior.
|
|
*/
|
|
void fdderivs(const int ex[3],
|
|
const double *f,
|
|
double *fxx, double *fxy, double *fxz,
|
|
double *fyy, double *fyz, double *fzz,
|
|
const double *X, const double *Y, const double *Z,
|
|
double SYM1, double SYM2, double SYM3,
|
|
int Symmetry, int onoff)
|
|
{
|
|
(void)onoff;
|
|
|
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
|
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
|
|
const double F1o4 = 2.5e-1;
|
|
const double F8 = 8.0;
|
|
const double F16 = 16.0;
|
|
const double F30 = 30.0;
|
|
const double F1o12 = ONE / 12.0;
|
|
const double F1o144 = ONE / 144.0;
|
|
const double F9 = 9.0, F45 = 45.0, F60 = 60.0;
|
|
const double F27 = 27.0, F270 = 270.0, F490 = 490.0;
|
|
const double F1o180 = ONE / 180.0;
|
|
const double F1o3600 = ONE / 3600.0;
|
|
const double F32 = 32.0, F128 = 128.0, F168 = 168.0, F672 = 672.0;
|
|
const double F840 = 840.0, F1008 = 1008.0, F8064 = 8064.0, F14350 = 14350.0;
|
|
const double F1o5040 = ONE / 5040.0;
|
|
const double F1o705600 = ONE / 705600.0;
|
|
|
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
|
const double dX = X[1] - X[0];
|
|
const double dY = Y[1] - Y[0];
|
|
const double dZ = Z[1] - Z[0];
|
|
|
|
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
|
|
|
|
#if (ghost_width == 2)
|
|
/* ---- 2nd-order ------------------------------------------------------ */
|
|
{
|
|
const int ord = 1;
|
|
|
|
int iminF = 1, jminF = 1, kminF = 1;
|
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = 0;
|
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = 0;
|
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = 0;
|
|
|
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
|
|
|
const size_t nx = (size_t)ex1 + ord;
|
|
const size_t ny = (size_t)ex2 + ord;
|
|
const size_t nz = (size_t)ex3 + ord;
|
|
const size_t fh_size = nx * ny * nz;
|
|
|
|
static double *fh_buf = NULL;
|
|
static size_t cap = 0;
|
|
if (fh_size > cap) {
|
|
free(fh_buf);
|
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
|
cap = fh_size;
|
|
}
|
|
double *fh = fh_buf;
|
|
if (!fh) return;
|
|
|
|
symmetry_bd(ord, ex, f, fh, SoA);
|
|
|
|
const double Sdxdx = ONE / (dX * dX);
|
|
const double Sdydy = ONE / (dY * dY);
|
|
const double Sdzdz = ONE / (dZ * dZ);
|
|
const double Sdxdy = F1o4 / (dX * dY);
|
|
const double Sdxdz = F1o4 / (dX * dZ);
|
|
const double Sdydz = F1o4 / (dY * dZ);
|
|
|
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
|
for (size_t p = 0; p < all; ++p) {
|
|
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
|
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
|
}
|
|
|
|
const int i2_lo = (iminF > 0) ? iminF : 0;
|
|
const int j2_lo = (jminF > 0) ? jminF : 0;
|
|
const int k2_lo = (kminF > 0) ? kminF : 0;
|
|
const int i2_hi = ex1 - 2;
|
|
const int j2_hi = ex2 - 2;
|
|
const int k2_hi = ex3 - 2;
|
|
|
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fxx[p] = Sdxdx * (
|
|
fh[idx_fh_F_ord1(iF - 1, jF, kF, ex)] -
|
|
TWO * fh[idx_fh_F_ord1(iF, jF, kF, ex)] +
|
|
fh[idx_fh_F_ord1(iF + 1, jF, kF, ex)]);
|
|
|
|
fyy[p] = Sdydy * (
|
|
fh[idx_fh_F_ord1(iF, jF - 1, kF, ex)] -
|
|
TWO * fh[idx_fh_F_ord1(iF, jF, kF, ex)] +
|
|
fh[idx_fh_F_ord1(iF, jF + 1, kF, ex)]);
|
|
|
|
fzz[p] = Sdzdz * (
|
|
fh[idx_fh_F_ord1(iF, jF, kF - 1, ex)] -
|
|
TWO * fh[idx_fh_F_ord1(iF, jF, kF, ex)] +
|
|
fh[idx_fh_F_ord1(iF, jF, kF + 1, ex)]);
|
|
|
|
fxy[p] = Sdxdy * (
|
|
fh[idx_fh_F_ord1(iF - 1, jF - 1, kF, ex)] -
|
|
fh[idx_fh_F_ord1(iF + 1, jF - 1, kF, ex)] -
|
|
fh[idx_fh_F_ord1(iF - 1, jF + 1, kF, ex)] +
|
|
fh[idx_fh_F_ord1(iF + 1, jF + 1, kF, ex)]);
|
|
|
|
fxz[p] = Sdxdz * (
|
|
fh[idx_fh_F_ord1(iF - 1, jF, kF - 1, ex)] -
|
|
fh[idx_fh_F_ord1(iF + 1, jF, kF - 1, ex)] -
|
|
fh[idx_fh_F_ord1(iF - 1, jF, kF + 1, ex)] +
|
|
fh[idx_fh_F_ord1(iF + 1, jF, kF + 1, ex)]);
|
|
|
|
fyz[p] = Sdydz * (
|
|
fh[idx_fh_F_ord1(iF, jF - 1, kF - 1, ex)] -
|
|
fh[idx_fh_F_ord1(iF, jF + 1, kF - 1, ex)] -
|
|
fh[idx_fh_F_ord1(iF, jF - 1, kF + 1, ex)] +
|
|
fh[idx_fh_F_ord1(iF, jF + 1, kF + 1, ex)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
#elif (ghost_width == 3)
|
|
/* ---- 4th-order (original code) ------------------------------------ */
|
|
{
|
|
const int ord = 2;
|
|
|
|
int iminF = 1, jminF = 1, kminF = 1;
|
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
|
|
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
|
|
|
const size_t nx = (size_t)ex1 + ord;
|
|
const size_t ny = (size_t)ex2 + ord;
|
|
const size_t nz = (size_t)ex3 + ord;
|
|
const size_t fh_size = nx * ny * nz;
|
|
|
|
static double *fh_buf = NULL;
|
|
static size_t cap = 0;
|
|
if (fh_size > cap) {
|
|
free(fh_buf);
|
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
|
cap = fh_size;
|
|
}
|
|
double *fh = fh_buf;
|
|
if (!fh) return;
|
|
|
|
symmetry_bd(ord, ex, f, fh, SoA);
|
|
|
|
const double Sdxdx = ONE / (dX * dX);
|
|
const double Sdydy = ONE / (dY * dY);
|
|
const double Sdzdz = ONE / (dZ * dZ);
|
|
const double Fdxdx = F1o12 / (dX * dX);
|
|
const double Fdydy = F1o12 / (dY * dY);
|
|
const double Fdzdz = F1o12 / (dZ * dZ);
|
|
const double Sdxdy = F1o4 / (dX * dY);
|
|
const double Sdxdz = F1o4 / (dX * dZ);
|
|
const double Sdydz = F1o4 / (dY * dZ);
|
|
const double Fdxdy = F1o144 / (dX * dY);
|
|
const double Fdxdz = F1o144 / (dX * dZ);
|
|
const double Fdydz = F1o144 / (dY * dZ);
|
|
|
|
/* zero high-boundary faces (points the loops below won't cover) */
|
|
for (int j0 = 0; j0 < ex2; ++j0)
|
|
for (int i0 = 0; i0 < ex1; ++i0) {
|
|
const size_t p = idx_ex(i0, j0, ex3 - 1, ex);
|
|
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
|
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
|
}
|
|
for (int k0 = 0; k0 < ex3 - 1; ++k0)
|
|
for (int i0 = 0; i0 < ex1; ++i0) {
|
|
const size_t p = idx_ex(i0, ex2 - 1, k0, ex);
|
|
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
|
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
|
}
|
|
for (int k0 = 0; k0 < ex3 - 1; ++k0)
|
|
for (int j0 = 0; j0 < ex2 - 1; ++j0) {
|
|
const size_t p = idx_ex(ex1 - 1, j0, k0, ex);
|
|
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
|
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
|
}
|
|
|
|
if (kminF == 1) {
|
|
for (int j0 = 0; j0 < ex2; ++j0)
|
|
for (int i0 = 0; i0 < ex1; ++i0) {
|
|
const size_t p = idx_ex(i0, j0, 0, ex);
|
|
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
|
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
|
}
|
|
}
|
|
if (jminF == 1) {
|
|
for (int k0 = 0; k0 < ex3; ++k0)
|
|
for (int i0 = 0; i0 < ex1; ++i0) {
|
|
const size_t p = idx_ex(i0, 0, k0, ex);
|
|
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
|
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
|
}
|
|
}
|
|
if (iminF == 1) {
|
|
for (int k0 = 0; k0 < ex3; ++k0)
|
|
for (int j0 = 0; j0 < ex2; ++j0) {
|
|
const size_t p = idx_ex(0, j0, k0, ex);
|
|
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
|
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
|
}
|
|
}
|
|
|
|
const int i2_lo = (iminF > 0) ? iminF : 0;
|
|
const int j2_lo = (jminF > 0) ? jminF : 0;
|
|
const int k2_lo = (kminF > 0) ? kminF : 0;
|
|
const int i2_hi = ex1 - 2;
|
|
const int j2_hi = ex2 - 2;
|
|
const int k2_hi = ex3 - 2;
|
|
|
|
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
|
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
|
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
|
const int i4_hi = ex1 - 3;
|
|
const int j4_hi = ex2 - 3;
|
|
const int k4_hi = ex3 - 3;
|
|
|
|
const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi);
|
|
|
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
|
if (has4 &&
|
|
i0 >= i4_lo && i0 <= i4_hi &&
|
|
j0 >= j4_lo && j0 <= j4_hi &&
|
|
k0 >= k4_lo && k0 <= k4_hi) {
|
|
continue;
|
|
}
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fxx[p] = Sdxdx * (
|
|
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
|
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]);
|
|
|
|
fyy[p] = Sdydy * (
|
|
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
|
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]);
|
|
|
|
fzz[p] = Sdzdz * (
|
|
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
|
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]);
|
|
|
|
fxy[p] = Sdxdy * (
|
|
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
|
|
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
|
|
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
|
|
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]);
|
|
|
|
fxz[p] = Sdxdz * (
|
|
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
|
|
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
|
|
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
|
|
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]);
|
|
|
|
fyz[p] = Sdydz * (
|
|
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
|
|
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
|
|
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
|
|
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (has4) {
|
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fxx[p] = Fdxdx * (
|
|
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
|
|
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
|
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
|
|
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]);
|
|
|
|
fyy[p] = Fdydy * (
|
|
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
|
|
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
|
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
|
|
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]);
|
|
|
|
fzz[p] = Fdzdz * (
|
|
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
|
|
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
|
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
|
|
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]);
|
|
|
|
/* fxy: 5x5 outer product */
|
|
{
|
|
const double t_jm2 = (
|
|
fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
|
|
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)]);
|
|
|
|
const double t_jm1 = (
|
|
fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
|
|
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)]);
|
|
|
|
const double t_jp1 = (
|
|
fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
|
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)]);
|
|
|
|
const double t_jp2 = (
|
|
fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
|
|
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)]);
|
|
|
|
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
|
|
}
|
|
|
|
/* fxz: 5x5 outer product */
|
|
{
|
|
const double t_km2 = (
|
|
fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
|
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)]);
|
|
|
|
const double t_km1 = (
|
|
fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
|
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)]);
|
|
|
|
const double t_kp1 = (
|
|
fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)]);
|
|
|
|
const double t_kp2 = (
|
|
fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
|
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)]);
|
|
|
|
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
|
}
|
|
|
|
/* fyz: 5x5 outer product */
|
|
{
|
|
const double t_km2 = (
|
|
fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
|
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)]);
|
|
|
|
const double t_km1 = (
|
|
fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
|
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)]);
|
|
|
|
const double t_kp1 = (
|
|
fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)]);
|
|
|
|
const double t_kp2 = (
|
|
fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
|
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
|
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
|
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)]);
|
|
|
|
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
#elif (ghost_width == 4)
|
|
/* ---- 6th-order ----------------------------------------------------- */
|
|
{
|
|
const int ord = 3;
|
|
|
|
int iminF = 1, jminF = 1, kminF = 1;
|
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
|
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
|
|
|
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
|
|
|
const size_t nx = (size_t)ex1 + ord;
|
|
const size_t ny = (size_t)ex2 + ord;
|
|
const size_t nz = (size_t)ex3 + ord;
|
|
const size_t fh_size = nx * ny * nz;
|
|
static double *fh_buf = NULL;
|
|
static size_t cap = 0;
|
|
if (fh_size > cap) {
|
|
free(fh_buf);
|
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
|
cap = fh_size;
|
|
}
|
|
double *fh = fh_buf;
|
|
if (!fh) return;
|
|
|
|
symmetry_bd(ord, ex, f, fh, SoA);
|
|
|
|
/* Denominators */
|
|
const double Sdxdx = ONE / (dX * dX); // 2nd
|
|
const double Sdydy = ONE / (dY * dY);
|
|
const double Sdzdz = ONE / (dZ * dZ);
|
|
const double Fdxdx = F1o12 / (dX * dX); // 4th
|
|
const double Fdydy = F1o12 / (dY * dY);
|
|
const double Fdzdz = F1o12 / (dZ * dZ);
|
|
const double Xdxdx = F1o180 / (dX * dX); // 6th
|
|
const double Xdydy = F1o180 / (dY * dY);
|
|
const double Xdzdz = F1o180 / (dZ * dZ);
|
|
const double Sdxdy = F1o4 / (dX * dY);
|
|
const double Sdxdz = F1o4 / (dX * dZ);
|
|
const double Sdydz = F1o4 / (dY * dZ);
|
|
const double Fdxdy = F1o144 / (dX * dY);
|
|
const double Fdxdz = F1o144 / (dX * dZ);
|
|
const double Fdydz = F1o144 / (dY * dZ);
|
|
const double Xdxdy = F1o3600 / (dX * dY);
|
|
const double Xdxdz = F1o3600 / (dX * dZ);
|
|
const double Xdydz = F1o3600 / (dY * dZ);
|
|
|
|
/* zero everything first */
|
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
|
for (size_t p = 0; p < all; ++p) {
|
|
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
|
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
|
}
|
|
|
|
/* loop bounds for each pass (from widest to narrowest) */
|
|
const int i2_lo = (iminF > 0) ? iminF : 0;
|
|
const int j2_lo = (jminF > 0) ? jminF : 0;
|
|
const int k2_lo = (kminF > 0) ? kminF : 0;
|
|
const int i2_hi = ex1 - 2, j2_hi = ex2 - 2, k2_hi = ex3 - 2;
|
|
|
|
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
|
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
|
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
|
const int i4_hi = ex1 - 3, j4_hi = ex2 - 3, k4_hi = ex3 - 3;
|
|
|
|
const int i6_lo = (iminF + 2 > 0) ? (iminF + 2) : 0;
|
|
const int j6_lo = (jminF + 2 > 0) ? (jminF + 2) : 0;
|
|
const int k6_lo = (kminF + 2 > 0) ? (kminF + 2) : 0;
|
|
const int i6_hi = ex1 - 4, j6_hi = ex2 - 4, k6_hi = ex3 - 4;
|
|
|
|
const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi);
|
|
const int has6 = (i6_lo <= i6_hi && j6_lo <= j6_hi && k6_lo <= k6_hi);
|
|
|
|
/* 2nd-order: skip 4th+6th overlap */
|
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
|
_Bool in4 = has4 && i0>=i4_lo && i0<=i4_hi && j0>=j4_lo && j0<=j4_hi && k0>=k4_lo && k0<=k4_hi;
|
|
if (in4) continue;
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fxx[p] = Sdxdx * (fh[idx_fh_F(iF - 1, jF, kF, ex)] - TWO*fh[idx_fh_F(iF,jF,kF,ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]);
|
|
fyy[p] = Sdydy * (fh[idx_fh_F(iF, jF - 1, kF, ex)] - TWO*fh[idx_fh_F(iF,jF,kF,ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]);
|
|
fzz[p] = Sdzdz * (fh[idx_fh_F(iF, jF, kF - 1, ex)] - TWO*fh[idx_fh_F(iF,jF,kF,ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]);
|
|
|
|
fxy[p] = Sdxdy * (fh[idx_fh_F(iF - 1, jF - 1, kF, ex)] - fh[idx_fh_F(iF + 1, jF - 1, kF, ex)] - fh[idx_fh_F(iF - 1, jF + 1, kF, ex)] + fh[idx_fh_F(iF + 1, jF + 1, kF, ex)]);
|
|
fxz[p] = Sdxdz * (fh[idx_fh_F(iF - 1, jF, kF - 1, ex)] - fh[idx_fh_F(iF + 1, jF, kF - 1, ex)] - fh[idx_fh_F(iF - 1, jF, kF + 1, ex)] + fh[idx_fh_F(iF + 1, jF, kF + 1, ex)]);
|
|
fyz[p] = Sdydz * (fh[idx_fh_F(iF, jF - 1, kF - 1, ex)] - fh[idx_fh_F(iF, jF + 1, kF - 1, ex)] - fh[idx_fh_F(iF, jF - 1, kF + 1, ex)] + fh[idx_fh_F(iF, jF + 1, kF + 1, ex)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* 4th-order: skip 6th overlap */
|
|
if (has4) {
|
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
|
if (has6 && i0>=i6_lo && i0<=i6_hi && j0>=j6_lo && j0<=j6_hi && k0>=k6_lo && k0<=k6_hi) continue;
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fxx[p] = Fdxdx * (-fh[idx_fh_F(iF - 2, jF, kF, ex)] + F16*fh[idx_fh_F(iF-1,jF,kF,ex)] - F30*fh[idx_fh_F(iF,jF,kF,ex)] - fh[idx_fh_F(iF+2,jF,kF,ex)] + F16*fh[idx_fh_F(iF+1,jF,kF,ex)]);
|
|
fyy[p] = Fdydy * (-fh[idx_fh_F(iF, jF - 2, kF, ex)] + F16*fh[idx_fh_F(iF,jF-1,kF,ex)] - F30*fh[idx_fh_F(iF,jF,kF,ex)] - fh[idx_fh_F(iF,jF+2,kF,ex)] + F16*fh[idx_fh_F(iF,jF+1,kF,ex)]);
|
|
fzz[p] = Fdzdz * (-fh[idx_fh_F(iF, jF, kF - 2, ex)] + F16*fh[idx_fh_F(iF,jF,kF-1,ex)] - F30*fh[idx_fh_F(iF,jF,kF,ex)] - fh[idx_fh_F(iF,jF,kF+2,ex)] + F16*fh[idx_fh_F(iF,jF,kF+1,ex)]);
|
|
|
|
{
|
|
const double t_jm2 = (fh[idx_fh_F(iF-2,jF-2,kF,ex)]-F8*fh[idx_fh_F(iF-1,jF-2,kF,ex)]+F8*fh[idx_fh_F(iF+1,jF-2,kF,ex)]-fh[idx_fh_F(iF+2,jF-2,kF,ex)]);
|
|
const double t_jm1 = (fh[idx_fh_F(iF-2,jF-1,kF,ex)]-F8*fh[idx_fh_F(iF-1,jF-1,kF,ex)]+F8*fh[idx_fh_F(iF+1,jF-1,kF,ex)]-fh[idx_fh_F(iF+2,jF-1,kF,ex)]);
|
|
const double t_jp1 = (fh[idx_fh_F(iF-2,jF+1,kF,ex)]-F8*fh[idx_fh_F(iF-1,jF+1,kF,ex)]+F8*fh[idx_fh_F(iF+1,jF+1,kF,ex)]-fh[idx_fh_F(iF+2,jF+1,kF,ex)]);
|
|
const double t_jp2 = (fh[idx_fh_F(iF-2,jF+2,kF,ex)]-F8*fh[idx_fh_F(iF-1,jF+2,kF,ex)]+F8*fh[idx_fh_F(iF+1,jF+2,kF,ex)]-fh[idx_fh_F(iF+2,jF+2,kF,ex)]);
|
|
fxy[p] = Fdxdy * (t_jm2 - F8*t_jm1 + F8*t_jp1 - t_jp2);
|
|
}
|
|
{
|
|
const double t_km2 = (fh[idx_fh_F(iF-2,jF,kF-2,ex)]-F8*fh[idx_fh_F(iF-1,jF,kF-2,ex)]+F8*fh[idx_fh_F(iF+1,jF,kF-2,ex)]-fh[idx_fh_F(iF+2,jF,kF-2,ex)]);
|
|
const double t_km1 = (fh[idx_fh_F(iF-2,jF,kF-1,ex)]-F8*fh[idx_fh_F(iF-1,jF,kF-1,ex)]+F8*fh[idx_fh_F(iF+1,jF,kF-1,ex)]-fh[idx_fh_F(iF+2,jF,kF-1,ex)]);
|
|
const double t_kp1 = (fh[idx_fh_F(iF-2,jF,kF+1,ex)]-F8*fh[idx_fh_F(iF-1,jF,kF+1,ex)]+F8*fh[idx_fh_F(iF+1,jF,kF+1,ex)]-fh[idx_fh_F(iF+2,jF,kF+1,ex)]);
|
|
const double t_kp2 = (fh[idx_fh_F(iF-2,jF,kF+2,ex)]-F8*fh[idx_fh_F(iF-1,jF,kF+2,ex)]+F8*fh[idx_fh_F(iF+1,jF,kF+2,ex)]-fh[idx_fh_F(iF+2,jF,kF+2,ex)]);
|
|
fxz[p] = Fdxdz * (t_km2 - F8*t_km1 + F8*t_kp1 - t_kp2);
|
|
}
|
|
{
|
|
const double t_km2 = (fh[idx_fh_F(iF,jF-2,kF-2,ex)]-F8*fh[idx_fh_F(iF,jF-1,kF-2,ex)]+F8*fh[idx_fh_F(iF,jF+1,kF-2,ex)]-fh[idx_fh_F(iF,jF+2,kF-2,ex)]);
|
|
const double t_km1 = (fh[idx_fh_F(iF,jF-2,kF-1,ex)]-F8*fh[idx_fh_F(iF,jF-1,kF-1,ex)]+F8*fh[idx_fh_F(iF,jF+1,kF-1,ex)]-fh[idx_fh_F(iF,jF+2,kF-1,ex)]);
|
|
const double t_kp1 = (fh[idx_fh_F(iF,jF-2,kF+1,ex)]-F8*fh[idx_fh_F(iF,jF-1,kF+1,ex)]+F8*fh[idx_fh_F(iF,jF+1,kF+1,ex)]-fh[idx_fh_F(iF,jF+2,kF+1,ex)]);
|
|
const double t_kp2 = (fh[idx_fh_F(iF,jF-2,kF+2,ex)]-F8*fh[idx_fh_F(iF,jF-1,kF+2,ex)]+F8*fh[idx_fh_F(iF,jF+1,kF+2,ex)]-fh[idx_fh_F(iF,jF+2,kF+2,ex)]);
|
|
fyz[p] = Fdydz * (t_km2 - F8*t_km1 + F8*t_kp1 - t_kp2);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* 6th-order: interior only */
|
|
if (has6) {
|
|
for (int k0 = k6_lo; k0 <= k6_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j6_lo; j0 <= j6_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i6_lo; i0 <= i6_hi; ++i0) {
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
/* Diagonal: [+2,-27,+270,-490,+270,-27,+2] / (180*dx^2) */
|
|
fxx[p] = Xdxdx * (
|
|
TWO * fh[idx_fh_F(iF - 3, jF, kF, ex)] -
|
|
F27 * fh[idx_fh_F(iF - 2, jF, kF, ex)] +
|
|
F270 * fh[idx_fh_F(iF - 1, jF, kF, ex)] -
|
|
F490 * fh[idx_fh_F(iF, jF, kF, ex)] +
|
|
F270 * fh[idx_fh_F(iF + 1, jF, kF, ex)] -
|
|
F27 * fh[idx_fh_F(iF + 2, jF, kF, ex)] +
|
|
TWO * fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
|
|
|
fyy[p] = Xdydy * (
|
|
TWO * fh[idx_fh_F(iF, jF - 3, kF, ex)] -
|
|
F27 * fh[idx_fh_F(iF, jF - 2, kF, ex)] +
|
|
F270 * fh[idx_fh_F(iF, jF - 1, kF, ex)] -
|
|
F490 * fh[idx_fh_F(iF, jF, kF, ex)] +
|
|
F270 * fh[idx_fh_F(iF, jF + 1, kF, ex)] -
|
|
F27 * fh[idx_fh_F(iF, jF + 2, kF, ex)] +
|
|
TWO * fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
|
|
|
fzz[p] = Xdzdz * (
|
|
TWO * fh[idx_fh_F(iF, jF, kF - 3, ex)] -
|
|
F27 * fh[idx_fh_F(iF, jF, kF - 2, ex)] +
|
|
F270 * fh[idx_fh_F(iF, jF, kF - 1, ex)] -
|
|
F490 * fh[idx_fh_F(iF, jF, kF, ex)] +
|
|
F270 * fh[idx_fh_F(iF, jF, kF + 1, ex)] -
|
|
F27 * fh[idx_fh_F(iF, jF, kF + 2, ex)] +
|
|
TWO * fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
|
|
|
/* Mixed: 7x7 outer product. Compute 1D x-stencil at each y/z offset,
|
|
then combine using 1D y/z weights [-1,+9,-45,0,+45,-9,+1] / (3600*dx*dy) */
|
|
{
|
|
// x-stencil: -f(i-3)+9f(i-2)-45f(i-1)+45f(i+1)-9f(i+2)+f(i+3)
|
|
// Helper macro would help but explicit is safer
|
|
#define XSTEN6(JF, KF_DUMMY) \
|
|
(-fh[idx_fh_F(iF-3,JF,KF_DUMMY,ex)] + F9*fh[idx_fh_F(iF-2,JF,KF_DUMMY,ex)] - F45*fh[idx_fh_F(iF-1,JF,KF_DUMMY,ex)] + F45*fh[idx_fh_F(iF+1,JF,KF_DUMMY,ex)] - F9*fh[idx_fh_F(iF+2,JF,KF_DUMMY,ex)] + fh[idx_fh_F(iF+3,JF,KF_DUMMY,ex)])
|
|
|
|
fxy[p] = Xdxdy * (
|
|
-XSTEN6(jF-3, kF) + F9*XSTEN6(jF-2, kF) - F45*XSTEN6(jF-1, kF) + F45*XSTEN6(jF+1, kF) - F9*XSTEN6(jF+2, kF) + XSTEN6(jF+3, kF));
|
|
|
|
fxz[p] = Xdxdz * (
|
|
-XSTEN6(jF, kF-3) + F9*XSTEN6(jF, kF-2) - F45*XSTEN6(jF, kF-1) + F45*XSTEN6(jF, kF+1) - F9*XSTEN6(jF, kF+2) + XSTEN6(jF, kF+3));
|
|
|
|
#undef XSTEN6
|
|
}
|
|
|
|
/* fyz: apply 1D y-stencil at each z offset */
|
|
{
|
|
#define YSTEN6(JF, KF_DUMMY) \
|
|
(-fh[idx_fh_F(iF,JF-3,KF_DUMMY,ex)] + F9*fh[idx_fh_F(iF,JF-2,KF_DUMMY,ex)] - F45*fh[idx_fh_F(iF,JF-1,KF_DUMMY,ex)] + F45*fh[idx_fh_F(iF,JF+1,KF_DUMMY,ex)] - F9*fh[idx_fh_F(iF,JF+2,KF_DUMMY,ex)] + fh[idx_fh_F(iF,JF+3,KF_DUMMY,ex)])
|
|
|
|
fyz[p] = Xdydz * (
|
|
-YSTEN6(jF, kF-3) + F9*YSTEN6(jF, kF-2) - F45*YSTEN6(jF, kF-1) + F45*YSTEN6(jF, kF+1) - F9*YSTEN6(jF, kF+2) + YSTEN6(jF, kF+3));
|
|
|
|
#undef YSTEN6
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
#elif (ghost_width == 5)
|
|
/* ---- 8th-order ----------------------------------------------------- */
|
|
{
|
|
const int ord = 4;
|
|
|
|
int iminF = 1, jminF = 1, kminF = 1;
|
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -3;
|
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -3;
|
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -3;
|
|
|
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
|
|
|
const size_t nx = (size_t)ex1 + ord;
|
|
const size_t ny = (size_t)ex2 + ord;
|
|
const size_t nz = (size_t)ex3 + ord;
|
|
const size_t fh_size = nx * ny * nz;
|
|
static double *fh_buf = NULL;
|
|
static size_t cap = 0;
|
|
if (fh_size > cap) {
|
|
free(fh_buf);
|
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
|
cap = fh_size;
|
|
}
|
|
double *fh = fh_buf;
|
|
if (!fh) return;
|
|
|
|
symmetry_bd(ord, ex, f, fh, SoA);
|
|
|
|
const double Sdxdx = ONE / (dX * dX);
|
|
const double Sdydy = ONE / (dY * dY);
|
|
const double Sdzdz = ONE / (dZ * dZ);
|
|
const double Fdxdx = F1o12 / (dX * dX);
|
|
const double Fdydy = F1o12 / (dY * dY);
|
|
const double Fdzdz = F1o12 / (dZ * dZ);
|
|
const double Xdxdx = F1o180 / (dX * dX);
|
|
const double Xdydy = F1o180 / (dY * dY);
|
|
const double Xdzdz = F1o180 / (dZ * dZ);
|
|
const double Edxdx = F1o5040 / (dX * dX);
|
|
const double Edydy = F1o5040 / (dY * dY);
|
|
const double Edzdz = F1o5040 / (dZ * dZ);
|
|
const double Sdxdy = F1o4 / (dX * dY);
|
|
const double Sdxdz = F1o4 / (dX * dZ);
|
|
const double Sdydz = F1o4 / (dY * dZ);
|
|
const double Fdxdy = F1o144 / (dX * dY);
|
|
const double Fdxdz = F1o144 / (dX * dZ);
|
|
const double Fdydz = F1o144 / (dY * dZ);
|
|
const double Xdxdy = F1o3600 / (dX * dY);
|
|
const double Xdxdz = F1o3600 / (dX * dZ);
|
|
const double Xdydz = F1o3600 / (dY * dZ);
|
|
const double Edxdy = F1o705600 / (dX * dY);
|
|
const double Edxdz = F1o705600 / (dX * dZ);
|
|
const double Edydz = F1o705600 / (dY * dZ);
|
|
|
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
|
for (size_t p = 0; p < all; ++p) {
|
|
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
|
|
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
|
|
}
|
|
|
|
/* Loop bounds for each pass */
|
|
const int i2_lo = (iminF > 0) ? iminF : 0;
|
|
const int j2_lo = (jminF > 0) ? jminF : 0;
|
|
const int k2_lo = (kminF > 0) ? kminF : 0;
|
|
const int i2_hi = ex1 - 2, j2_hi = ex2 - 2, k2_hi = ex3 - 2;
|
|
|
|
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
|
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
|
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
|
const int i4_hi = ex1 - 3, j4_hi = ex2 - 3, k4_hi = ex3 - 3;
|
|
|
|
const int i6_lo = (iminF + 2 > 0) ? (iminF + 2) : 0;
|
|
const int j6_lo = (jminF + 2 > 0) ? (jminF + 2) : 0;
|
|
const int k6_lo = (kminF + 2 > 0) ? (kminF + 2) : 0;
|
|
const int i6_hi = ex1 - 4, j6_hi = ex2 - 4, k6_hi = ex3 - 4;
|
|
|
|
const int i8_lo = (iminF + 3 > 0) ? (iminF + 3) : 0;
|
|
const int j8_lo = (jminF + 3 > 0) ? (jminF + 3) : 0;
|
|
const int k8_lo = (kminF + 3 > 0) ? (kminF + 3) : 0;
|
|
const int i8_hi = ex1 - 5, j8_hi = ex2 - 5, k8_hi = ex3 - 5;
|
|
|
|
const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi);
|
|
const int has6 = (i6_lo <= i6_hi && j6_lo <= j6_hi && k6_lo <= k6_hi);
|
|
const int has8 = (i8_lo <= i8_hi && j8_lo <= j8_hi && k8_lo <= k8_hi);
|
|
|
|
/* 2nd-order: skip 4th+6th+8th overlap */
|
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
|
_Bool in4 = has4 && i0>=i4_lo && i0<=i4_hi && j0>=j4_lo && j0<=j4_hi && k0>=k4_lo && k0<=k4_hi;
|
|
if (in4) continue;
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fxx[p] = Sdxdx * (fh[idx_fh_F_ord4(iF-1,jF,kF,ex)] - TWO*fh[idx_fh_F_ord4(iF,jF,kF,ex)] + fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
|
|
fyy[p] = Sdydy * (fh[idx_fh_F_ord4(iF,jF-1,kF,ex)] - TWO*fh[idx_fh_F_ord4(iF,jF,kF,ex)] + fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
|
|
fzz[p] = Sdzdz * (fh[idx_fh_F_ord4(iF,jF,kF-1,ex)] - TWO*fh[idx_fh_F_ord4(iF,jF,kF,ex)] + fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
|
|
|
|
fxy[p] = Sdxdy * (fh[idx_fh_F_ord4(iF-1,jF-1,kF,ex)] - fh[idx_fh_F_ord4(iF+1,jF-1,kF,ex)] - fh[idx_fh_F_ord4(iF-1,jF+1,kF,ex)] + fh[idx_fh_F_ord4(iF+1,jF+1,kF,ex)]);
|
|
fxz[p] = Sdxdz * (fh[idx_fh_F_ord4(iF-1,jF,kF-1,ex)] - fh[idx_fh_F_ord4(iF+1,jF,kF-1,ex)] - fh[idx_fh_F_ord4(iF-1,jF,kF+1,ex)] + fh[idx_fh_F_ord4(iF+1,jF,kF+1,ex)]);
|
|
fyz[p] = Sdydz * (fh[idx_fh_F_ord4(iF,jF-1,kF-1,ex)] - fh[idx_fh_F_ord4(iF,jF+1,kF-1,ex)] - fh[idx_fh_F_ord4(iF,jF-1,kF+1,ex)] + fh[idx_fh_F_ord4(iF,jF+1,kF+1,ex)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* 4th-order: skip 6th+8th overlap */
|
|
if (has4) {
|
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
|
_Bool in6 = has6 && i0>=i6_lo && i0<=i6_hi && j0>=j6_lo && j0<=j6_hi && k0>=k6_lo && k0<=k6_hi;
|
|
if (in6) continue;
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fxx[p] = Fdxdx * (-fh[idx_fh_F_ord4(iF-2,jF,kF,ex)] + F16*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)] - F30*fh[idx_fh_F_ord4(iF,jF,kF,ex)] - fh[idx_fh_F_ord4(iF+2,jF,kF,ex)] + F16*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
|
|
fyy[p] = Fdydy * (-fh[idx_fh_F_ord4(iF,jF-2,kF,ex)] + F16*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)] - F30*fh[idx_fh_F_ord4(iF,jF,kF,ex)] - fh[idx_fh_F_ord4(iF,jF+2,kF,ex)] + F16*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
|
|
fzz[p] = Fdzdz * (-fh[idx_fh_F_ord4(iF,jF,kF-2,ex)] + F16*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)] - F30*fh[idx_fh_F_ord4(iF,jF,kF,ex)] - fh[idx_fh_F_ord4(iF,jF,kF+2,ex)] + F16*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
|
|
|
|
{
|
|
const double t_jm2 = (fh[idx_fh_F_ord4(iF-2,jF-2,kF,ex)]-F8*fh[idx_fh_F_ord4(iF-1,jF-2,kF,ex)]+F8*fh[idx_fh_F_ord4(iF+1,jF-2,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF-2,kF,ex)]);
|
|
const double t_jm1 = (fh[idx_fh_F_ord4(iF-2,jF-1,kF,ex)]-F8*fh[idx_fh_F_ord4(iF-1,jF-1,kF,ex)]+F8*fh[idx_fh_F_ord4(iF+1,jF-1,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF-1,kF,ex)]);
|
|
const double t_jp1 = (fh[idx_fh_F_ord4(iF-2,jF+1,kF,ex)]-F8*fh[idx_fh_F_ord4(iF-1,jF+1,kF,ex)]+F8*fh[idx_fh_F_ord4(iF+1,jF+1,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF+1,kF,ex)]);
|
|
const double t_jp2 = (fh[idx_fh_F_ord4(iF-2,jF+2,kF,ex)]-F8*fh[idx_fh_F_ord4(iF-1,jF+2,kF,ex)]+F8*fh[idx_fh_F_ord4(iF+1,jF+2,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF+2,kF,ex)]);
|
|
fxy[p] = Fdxdy * (t_jm2 - F8*t_jm1 + F8*t_jp1 - t_jp2);
|
|
}
|
|
{
|
|
const double t_km2 = (fh[idx_fh_F_ord4(iF-2,jF,kF-2,ex)]-F8*fh[idx_fh_F_ord4(iF-1,jF,kF-2,ex)]+F8*fh[idx_fh_F_ord4(iF+1,jF,kF-2,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF-2,ex)]);
|
|
const double t_km1 = (fh[idx_fh_F_ord4(iF-2,jF,kF-1,ex)]-F8*fh[idx_fh_F_ord4(iF-1,jF,kF-1,ex)]+F8*fh[idx_fh_F_ord4(iF+1,jF,kF-1,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF-1,ex)]);
|
|
const double t_kp1 = (fh[idx_fh_F_ord4(iF-2,jF,kF+1,ex)]-F8*fh[idx_fh_F_ord4(iF-1,jF,kF+1,ex)]+F8*fh[idx_fh_F_ord4(iF+1,jF,kF+1,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF+1,ex)]);
|
|
const double t_kp2 = (fh[idx_fh_F_ord4(iF-2,jF,kF+2,ex)]-F8*fh[idx_fh_F_ord4(iF-1,jF,kF+2,ex)]+F8*fh[idx_fh_F_ord4(iF+1,jF,kF+2,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF+2,ex)]);
|
|
fxz[p] = Fdxdz * (t_km2 - F8*t_km1 + F8*t_kp1 - t_kp2);
|
|
}
|
|
{
|
|
const double t_km2 = (fh[idx_fh_F_ord4(iF,jF-2,kF-2,ex)]-F8*fh[idx_fh_F_ord4(iF,jF-1,kF-2,ex)]+F8*fh[idx_fh_F_ord4(iF,jF+1,kF-2,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF-2,ex)]);
|
|
const double t_km1 = (fh[idx_fh_F_ord4(iF,jF-2,kF-1,ex)]-F8*fh[idx_fh_F_ord4(iF,jF-1,kF-1,ex)]+F8*fh[idx_fh_F_ord4(iF,jF+1,kF-1,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF-1,ex)]);
|
|
const double t_kp1 = (fh[idx_fh_F_ord4(iF,jF-2,kF+1,ex)]-F8*fh[idx_fh_F_ord4(iF,jF-1,kF+1,ex)]+F8*fh[idx_fh_F_ord4(iF,jF+1,kF+1,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF+1,ex)]);
|
|
const double t_kp2 = (fh[idx_fh_F_ord4(iF,jF-2,kF+2,ex)]-F8*fh[idx_fh_F_ord4(iF,jF-1,kF+2,ex)]+F8*fh[idx_fh_F_ord4(iF,jF+1,kF+2,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF+2,ex)]);
|
|
fyz[p] = Fdydz * (t_km2 - F8*t_km1 + F8*t_kp1 - t_kp2);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* 6th-order: skip 8th overlap */
|
|
if (has6) {
|
|
for (int k0 = k6_lo; k0 <= k6_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j6_lo; j0 <= j6_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i6_lo; i0 <= i6_hi; ++i0) {
|
|
if (has8 && i0>=i8_lo && i0<=i8_hi && j0>=j8_lo && j0<=j8_hi && k0>=k8_lo && k0<=k8_hi) continue;
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fxx[p] = Xdxdx * (
|
|
TWO * fh[idx_fh_F_ord4(iF-3,jF,kF,ex)] - F27*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)] + F270*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)] - F490*fh[idx_fh_F_ord4(iF,jF,kF,ex)] + F270*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)] - F27*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)] + TWO*fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]);
|
|
fyy[p] = Xdydy * (
|
|
TWO * fh[idx_fh_F_ord4(iF,jF-3,kF,ex)] - F27*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)] + F270*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)] - F490*fh[idx_fh_F_ord4(iF,jF,kF,ex)] + F270*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)] - F27*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)] + TWO*fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]);
|
|
fzz[p] = Xdzdz * (
|
|
TWO * fh[idx_fh_F_ord4(iF,jF,kF-3,ex)] - F27*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)] + F270*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)] - F490*fh[idx_fh_F_ord4(iF,jF,kF,ex)] + F270*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)] - F27*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)] + TWO*fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]);
|
|
|
|
{
|
|
#define XSTEN6_8(JF, KF_DUMMY) \
|
|
(-fh[idx_fh_F_ord4(iF-3,JF,KF_DUMMY,ex)] + F9*fh[idx_fh_F_ord4(iF-2,JF,KF_DUMMY,ex)] - F45*fh[idx_fh_F_ord4(iF-1,JF,KF_DUMMY,ex)] + F45*fh[idx_fh_F_ord4(iF+1,JF,KF_DUMMY,ex)] - F9*fh[idx_fh_F_ord4(iF+2,JF,KF_DUMMY,ex)] + fh[idx_fh_F_ord4(iF+3,JF,KF_DUMMY,ex)])
|
|
fxy[p] = Xdxdy * (
|
|
-XSTEN6_8(jF-3,kF) + F9*XSTEN6_8(jF-2,kF) - F45*XSTEN6_8(jF-1,kF) + F45*XSTEN6_8(jF+1,kF) - F9*XSTEN6_8(jF+2,kF) + XSTEN6_8(jF+3,kF));
|
|
fxz[p] = Xdxdz * (
|
|
-XSTEN6_8(jF,kF-3) + F9*XSTEN6_8(jF,kF-2) - F45*XSTEN6_8(jF,kF-1) + F45*XSTEN6_8(jF,kF+1) - F9*XSTEN6_8(jF,kF+2) + XSTEN6_8(jF,kF+3));
|
|
#undef XSTEN6_8
|
|
}
|
|
{
|
|
#define YSTEN6_8(JF, KF_DUMMY) \
|
|
(-fh[idx_fh_F_ord4(iF,JF-3,KF_DUMMY,ex)] + F9*fh[idx_fh_F_ord4(iF,JF-2,KF_DUMMY,ex)] - F45*fh[idx_fh_F_ord4(iF,JF-1,KF_DUMMY,ex)] + F45*fh[idx_fh_F_ord4(iF,JF+1,KF_DUMMY,ex)] - F9*fh[idx_fh_F_ord4(iF,JF+2,KF_DUMMY,ex)] + fh[idx_fh_F_ord4(iF,JF+3,KF_DUMMY,ex)])
|
|
fyz[p] = Xdydz * (
|
|
-YSTEN6_8(jF,kF-3) + F9*YSTEN6_8(jF,kF-2) - F45*YSTEN6_8(jF,kF-1) + F45*YSTEN6_8(jF,kF+1) - F9*YSTEN6_8(jF,kF+2) + YSTEN6_8(jF,kF+3));
|
|
#undef YSTEN6_8
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* 8th-order: interior only */
|
|
if (has8) {
|
|
for (int k0 = k8_lo; k0 <= k8_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j8_lo; j0 <= j8_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i8_lo; i0 <= i8_hi; ++i0) {
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
/* Diagonal: [-9,+128,-1008,+8064,-14350,+8064,-1008,+128,-9] / (5040*dx^2) */
|
|
fxx[p] = Edxdx * (
|
|
-(double)9 * fh[idx_fh_F_ord4(iF - 4, jF, kF, ex)] +
|
|
F128 * fh[idx_fh_F_ord4(iF - 3, jF, kF, ex)] -
|
|
F1008 * fh[idx_fh_F_ord4(iF - 2, jF, kF, ex)] +
|
|
F8064 * fh[idx_fh_F_ord4(iF - 1, jF, kF, ex)] -
|
|
F14350* fh[idx_fh_F_ord4(iF, jF, kF, ex)] +
|
|
F8064 * fh[idx_fh_F_ord4(iF + 1, jF, kF, ex)] -
|
|
F1008 * fh[idx_fh_F_ord4(iF + 2, jF, kF, ex)] +
|
|
F128 * fh[idx_fh_F_ord4(iF + 3, jF, kF, ex)] -
|
|
(double)9 * fh[idx_fh_F_ord4(iF + 4, jF, kF, ex)]);
|
|
|
|
fyy[p] = Edydy * (
|
|
-(double)9 * fh[idx_fh_F_ord4(iF, jF - 4, kF, ex)] +
|
|
F128 * fh[idx_fh_F_ord4(iF, jF - 3, kF, ex)] -
|
|
F1008 * fh[idx_fh_F_ord4(iF, jF - 2, kF, ex)] +
|
|
F8064 * fh[idx_fh_F_ord4(iF, jF - 1, kF, ex)] -
|
|
F14350* fh[idx_fh_F_ord4(iF, jF, kF, ex)] +
|
|
F8064 * fh[idx_fh_F_ord4(iF, jF + 1, kF, ex)] -
|
|
F1008 * fh[idx_fh_F_ord4(iF, jF + 2, kF, ex)] +
|
|
F128 * fh[idx_fh_F_ord4(iF, jF + 3, kF, ex)] -
|
|
(double)9 * fh[idx_fh_F_ord4(iF, jF + 4, kF, ex)]);
|
|
|
|
fzz[p] = Edzdz * (
|
|
-(double)9 * fh[idx_fh_F_ord4(iF, jF, kF - 4, ex)] +
|
|
F128 * fh[idx_fh_F_ord4(iF, jF, kF - 3, ex)] -
|
|
F1008 * fh[idx_fh_F_ord4(iF, jF, kF - 2, ex)] +
|
|
F8064 * fh[idx_fh_F_ord4(iF, jF, kF - 1, ex)] -
|
|
F14350* fh[idx_fh_F_ord4(iF, jF, kF, ex)] +
|
|
F8064 * fh[idx_fh_F_ord4(iF, jF, kF + 1, ex)] -
|
|
F1008 * fh[idx_fh_F_ord4(iF, jF, kF + 2, ex)] +
|
|
F128 * fh[idx_fh_F_ord4(iF, jF, kF + 3, ex)] -
|
|
(double)9 * fh[idx_fh_F_ord4(iF, jF, kF + 4, ex)]);
|
|
|
|
/* Mixed: 9x9 outer product.
|
|
x-stencil: +3*f(i-4)-32*f(i-3)+168*f(i-2)-672*f(i-1)+672*f(i+1)-168*f(i+2)+32*f(i+3)-3*f(i+4)
|
|
y/z weights: same [+3,-32,+168,-672,+672,-168,+32,-3] / 705600 */
|
|
{
|
|
#define XSTEN8(JF, KF_DUMMY) \
|
|
(+(double)3*fh[idx_fh_F_ord4(iF-4,JF,KF_DUMMY,ex)] - F32*fh[idx_fh_F_ord4(iF-3,JF,KF_DUMMY,ex)] + F168*fh[idx_fh_F_ord4(iF-2,JF,KF_DUMMY,ex)] - F672*fh[idx_fh_F_ord4(iF-1,JF,KF_DUMMY,ex)] + F672*fh[idx_fh_F_ord4(iF+1,JF,KF_DUMMY,ex)] - F168*fh[idx_fh_F_ord4(iF+2,JF,KF_DUMMY,ex)] + F32*fh[idx_fh_F_ord4(iF+3,JF,KF_DUMMY,ex)] - (double)3*fh[idx_fh_F_ord4(iF+4,JF,KF_DUMMY,ex)])
|
|
|
|
fxy[p] = Edxdy * (
|
|
+(double)3*XSTEN8(jF-4,kF) - F32*XSTEN8(jF-3,kF) + F168*XSTEN8(jF-2,kF) - F672*XSTEN8(jF-1,kF) + F672*XSTEN8(jF+1,kF) - F168*XSTEN8(jF+2,kF) + F32*XSTEN8(jF+3,kF) - (double)3*XSTEN8(jF+4,kF));
|
|
|
|
fxz[p] = Edxdz * (
|
|
+(double)3*XSTEN8(jF,kF-4) - F32*XSTEN8(jF,kF-3) + F168*XSTEN8(jF,kF-2) - F672*XSTEN8(jF,kF-1) + F672*XSTEN8(jF,kF+1) - F168*XSTEN8(jF,kF+2) + F32*XSTEN8(jF,kF+3) - (double)3*XSTEN8(jF,kF+4));
|
|
|
|
#undef XSTEN8
|
|
}
|
|
{
|
|
#define YSTEN8(JF, KF_DUMMY) \
|
|
(+(double)3*fh[idx_fh_F_ord4(iF,JF-4,KF_DUMMY,ex)] - F32*fh[idx_fh_F_ord4(iF,JF-3,KF_DUMMY,ex)] + F168*fh[idx_fh_F_ord4(iF,JF-2,KF_DUMMY,ex)] - F672*fh[idx_fh_F_ord4(iF,JF-1,KF_DUMMY,ex)] + F672*fh[idx_fh_F_ord4(iF,JF+1,KF_DUMMY,ex)] - F168*fh[idx_fh_F_ord4(iF,JF+2,KF_DUMMY,ex)] + F32*fh[idx_fh_F_ord4(iF,JF+3,KF_DUMMY,ex)] - (double)3*fh[idx_fh_F_ord4(iF,JF+4,KF_DUMMY,ex)])
|
|
|
|
fyz[p] = Edydz * (
|
|
+(double)3*YSTEN8(jF,kF-4) - F32*YSTEN8(jF,kF-3) + F168*YSTEN8(jF,kF-2) - F672*YSTEN8(jF,kF-1) + F672*YSTEN8(jF,kF+1) - F168*YSTEN8(jF,kF+2) + F32*YSTEN8(jF,kF+3) - (double)3*YSTEN8(jF,kF+4));
|
|
|
|
#undef YSTEN8
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
#else
|
|
#error "fdderivs_c.C: unsupported ghost_width (must be 2, 3, 4, or 5)"
|
|
#endif
|
|
}
|