Wrap each C kernel in #if (ghost_width == N) blocks matching Fortran stencil coefficients from diff_new.f90, kodiss.f90, and lopsidediff.f90. Add fast-path indexing for ord=1,4,5 in share_func.h. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
617 lines
26 KiB
C
617 lines
26 KiB
C
#include "macrodef.h"
|
|
#include "tool.h"
|
|
|
|
/*
|
|
* C 版 fderivs — first derivatives df/dx, df/dy, df/dz.
|
|
*
|
|
* Finite difference order is selected at compile time via the ghost_width macro
|
|
* (defined in macrodef.fh):
|
|
* ghost_width = 2 → 2nd-order
|
|
* ghost_width = 3 → 4th-order
|
|
* ghost_width = 4 → 6th-order
|
|
* ghost_width = 5 → 8th-order
|
|
*
|
|
* Multi-pass overwrite strategy: compute the widest (lowest-order) stencil first,
|
|
* then overwrite interior regions with progressively higher-order stencils.
|
|
*/
|
|
void fderivs(const int ex[3],
|
|
const double *f,
|
|
double *fx, double *fy, double *fz,
|
|
const double *X, const double *Y, const double *Z,
|
|
double SYM1, double SYM2, double SYM3,
|
|
int Symmetry, int onoff)
|
|
{
|
|
(void)onoff;
|
|
|
|
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0, EIT = 8.0;
|
|
const double F9 = 9.0, F12 = 12.0, F45 = 45.0, F60 = 60.0;
|
|
const double F32 = 32.0, F168 = 168.0, F672 = 672.0, F840 = 840.0;
|
|
|
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
|
|
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
|
const double dX = X[1] - X[0];
|
|
const double dY = Y[1] - Y[0];
|
|
const double dZ = Z[1] - Z[0];
|
|
|
|
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
|
|
|
|
const int gw = ghost_width; // compile-time constant
|
|
|
|
#if (ghost_width == 2)
|
|
/* ---- 2nd-order ------------------------------------------------------ */
|
|
{
|
|
const int ord = 1; // symmetry_bd ord = ghost_width - 1
|
|
|
|
int iminF = 1, jminF = 1, kminF = 1;
|
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = 0;
|
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = 0;
|
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = 0;
|
|
|
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
|
|
|
const size_t nx = (size_t)ex1 + ord;
|
|
const size_t ny = (size_t)ex2 + ord;
|
|
const size_t nz = (size_t)ex3 + ord;
|
|
const size_t fh_size = nx * ny * nz;
|
|
|
|
static double *fh_buf = NULL;
|
|
static size_t cap = 0;
|
|
if (fh_size > cap) {
|
|
free(fh_buf);
|
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
|
cap = fh_size;
|
|
}
|
|
double *fh = fh_buf;
|
|
if (!fh) return;
|
|
|
|
symmetry_bd(ord, ex, f, fh, SoA);
|
|
|
|
const double d2dx = ONE / TWO / dX;
|
|
const double d2dy = ONE / TWO / dY;
|
|
const double d2dz = ONE / TWO / dZ;
|
|
|
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
|
for (size_t p = 0; p < all; ++p) {
|
|
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
|
|
}
|
|
|
|
/* 2nd-order pass: [-1, 0, +1] / (2*dx) */
|
|
const int i2_lo = (iminF > 0) ? iminF : 0;
|
|
const int j2_lo = (jminF > 0) ? jminF : 0;
|
|
const int k2_lo = (kminF > 0) ? kminF : 0;
|
|
const int i2_hi = ex1 - 2;
|
|
const int j2_hi = ex2 - 2;
|
|
const int k2_hi = ex3 - 2;
|
|
|
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
|
const int kF = k0; // Fortran index for fh: kF = k0 (shift=0)
|
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
|
const int jF = j0;
|
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
|
const int iF = i0;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fx[p] = d2dx * (
|
|
-fh[idx_fh_F_ord1(iF - 1, jF, kF, ex)] +
|
|
fh[idx_fh_F_ord1(iF + 1, jF, kF, ex)]
|
|
);
|
|
|
|
fy[p] = d2dy * (
|
|
-fh[idx_fh_F_ord1(iF, jF - 1, kF, ex)] +
|
|
fh[idx_fh_F_ord1(iF, jF + 1, kF, ex)]
|
|
);
|
|
|
|
fz[p] = d2dz * (
|
|
-fh[idx_fh_F_ord1(iF, jF, kF - 1, ex)] +
|
|
fh[idx_fh_F_ord1(iF, jF, kF + 1, ex)]
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
#elif (ghost_width == 3)
|
|
/* ---- 4th-order (original code) ------------------------------------ */
|
|
{
|
|
const int ord = 2; // symmetry_bd ord
|
|
|
|
int iminF = 1, jminF = 1, kminF = 1;
|
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
|
|
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
|
|
|
const size_t nx = (size_t)ex1 + ord;
|
|
const size_t ny = (size_t)ex2 + ord;
|
|
const size_t nz = (size_t)ex3 + ord;
|
|
const size_t fh_size = nx * ny * nz;
|
|
|
|
static double *fh_buf = NULL;
|
|
static size_t cap = 0;
|
|
if (fh_size > cap) {
|
|
free(fh_buf);
|
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
|
cap = fh_size;
|
|
}
|
|
double *fh = fh_buf;
|
|
if (!fh) return;
|
|
|
|
symmetry_bd(ord, ex, f, fh, SoA);
|
|
|
|
const double d12dx = ONE / F12 / dX;
|
|
const double d12dy = ONE / F12 / dY;
|
|
const double d12dz = ONE / F12 / dZ;
|
|
const double d2dx = ONE / TWO / dX;
|
|
const double d2dy = ONE / TWO / dY;
|
|
const double d2dz = ONE / TWO / dZ;
|
|
|
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
|
for (size_t p = 0; p < all; ++p) {
|
|
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
|
|
}
|
|
|
|
const int i2_lo = (iminF > 0) ? iminF : 0;
|
|
const int j2_lo = (jminF > 0) ? jminF : 0;
|
|
const int k2_lo = (kminF > 0) ? kminF : 0;
|
|
const int i2_hi = ex1 - 2;
|
|
const int j2_hi = ex2 - 2;
|
|
const int k2_hi = ex3 - 2;
|
|
|
|
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
|
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
|
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
|
const int i4_hi = ex1 - 3;
|
|
const int j4_hi = ex2 - 3;
|
|
const int k4_hi = ex3 - 3;
|
|
|
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fx[p] = d2dx * (
|
|
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
|
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
|
);
|
|
|
|
fy[p] = d2dy * (
|
|
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
|
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
|
);
|
|
|
|
fz[p] = d2dz * (
|
|
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
|
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
|
const int kF = k0 + 1;
|
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
|
const int jF = j0 + 1;
|
|
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
|
const int iF = i0 + 1;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fx[p] = d12dx * (
|
|
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
|
|
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
|
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
|
|
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
|
|
);
|
|
|
|
fy[p] = d12dy * (
|
|
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
|
|
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
|
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
|
|
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
|
|
);
|
|
|
|
fz[p] = d12dz * (
|
|
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
|
|
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
|
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
|
|
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
#elif (ghost_width == 4)
|
|
/* ---- 6th-order ----------------------------------------------------- */
|
|
{
|
|
const int ord = 3;
|
|
|
|
int iminF = 1, jminF = 1, kminF = 1;
|
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
|
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
|
|
|
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
|
|
|
const size_t nx = (size_t)ex1 + ord;
|
|
const size_t ny = (size_t)ex2 + ord;
|
|
const size_t nz = (size_t)ex3 + ord;
|
|
const size_t fh_size = nx * ny * nz;
|
|
|
|
static double *fh_buf = NULL;
|
|
static size_t cap = 0;
|
|
if (fh_size > cap) {
|
|
free(fh_buf);
|
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
|
cap = fh_size;
|
|
}
|
|
double *fh = fh_buf;
|
|
if (!fh) return;
|
|
|
|
symmetry_bd(ord, ex, f, fh, SoA);
|
|
|
|
/* Denominators */
|
|
const double d60dx = ONE / F60 / dX;
|
|
const double d60dy = ONE / F60 / dY;
|
|
const double d60dz = ONE / F60 / dZ;
|
|
const double d12dx = ONE / F12 / dX;
|
|
const double d12dy = ONE / F12 / dY;
|
|
const double d12dz = ONE / F12 / dZ;
|
|
const double d2dx = ONE / TWO / dX;
|
|
const double d2dy = ONE / TWO / dY;
|
|
const double d2dz = ONE / TWO / dZ;
|
|
|
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
|
for (size_t p = 0; p < all; ++p) {
|
|
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
|
|
}
|
|
|
|
/* 2nd-order pass: 3pt, widest */
|
|
const int i2_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
|
const int j2_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
|
const int k2_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
|
const int i2_hi = ex1 - 2;
|
|
const int j2_hi = ex2 - 2;
|
|
const int k2_hi = ex3 - 2;
|
|
|
|
/* 4th-order pass: 5pt */
|
|
const int i4_lo = (iminF + 2 > 0) ? (iminF + 2) : 0;
|
|
const int j4_lo = (jminF + 2 > 0) ? (jminF + 2) : 0;
|
|
const int k4_lo = (kminF + 2 > 0) ? (kminF + 2) : 0;
|
|
const int i4_hi = ex1 - 3;
|
|
const int j4_hi = ex2 - 3;
|
|
const int k4_hi = ex3 - 3;
|
|
|
|
/* 6th-order pass: 7pt, narrowest interior */
|
|
const int i6_lo = (iminF + 3 > 0) ? (iminF + 3) : 0;
|
|
const int j6_lo = (jminF + 3 > 0) ? (jminF + 3) : 0;
|
|
const int k6_lo = (kminF + 3 > 0) ? (kminF + 3) : 0;
|
|
const int i6_hi = ex1 - 4;
|
|
const int j6_hi = ex2 - 4;
|
|
const int k6_hi = ex3 - 4;
|
|
|
|
/* 2nd-order */
|
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
|
const int kF = k0 + 2;
|
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
|
const int jF = j0 + 2;
|
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
|
const int iF = i0 + 2;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fx[p] = d2dx * (
|
|
-fh[idx_fh_F(iF - 1, jF, kF, ex)] +
|
|
fh[idx_fh_F(iF + 1, jF, kF, ex)]);
|
|
fy[p] = d2dy * (
|
|
-fh[idx_fh_F(iF, jF - 1, kF, ex)] +
|
|
fh[idx_fh_F(iF, jF + 1, kF, ex)]);
|
|
fz[p] = d2dz * (
|
|
-fh[idx_fh_F(iF, jF, kF - 1, ex)] +
|
|
fh[idx_fh_F(iF, jF, kF + 1, ex)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* 4th-order overwrite */
|
|
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
|
const int kF = k0 + 2;
|
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
|
const int jF = j0 + 2;
|
|
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
|
const int iF = i0 + 2;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fx[p] = d12dx * (
|
|
fh[idx_fh_F(iF - 2, jF, kF, ex)] -
|
|
EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)] +
|
|
EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)] -
|
|
fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
|
|
|
fy[p] = d12dy * (
|
|
fh[idx_fh_F(iF, jF - 2, kF, ex)] -
|
|
EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)] +
|
|
EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)] -
|
|
fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
|
|
|
fz[p] = d12dz * (
|
|
fh[idx_fh_F(iF, jF, kF - 2, ex)] -
|
|
EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)] +
|
|
EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)] -
|
|
fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* 6th-order overwrite: [-1,+9,-45,0,+45,-9,+1] / (60*dx) */
|
|
if (i6_lo <= i6_hi && j6_lo <= j6_hi && k6_lo <= k6_hi) {
|
|
for (int k0 = k6_lo; k0 <= k6_hi; ++k0) {
|
|
const int kF = k0 + 2;
|
|
for (int j0 = j6_lo; j0 <= j6_hi; ++j0) {
|
|
const int jF = j0 + 2;
|
|
for (int i0 = i6_lo; i0 <= i6_hi; ++i0) {
|
|
const int iF = i0 + 2;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fx[p] = d60dx * (
|
|
-fh[idx_fh_F(iF - 3, jF, kF, ex)] +
|
|
F9 * fh[idx_fh_F(iF - 2, jF, kF, ex)] -
|
|
F45 * fh[idx_fh_F(iF - 1, jF, kF, ex)] +
|
|
F45 * fh[idx_fh_F(iF + 1, jF, kF, ex)] -
|
|
F9 * fh[idx_fh_F(iF + 2, jF, kF, ex)] +
|
|
fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
|
|
|
fy[p] = d60dy * (
|
|
-fh[idx_fh_F(iF, jF - 3, kF, ex)] +
|
|
F9 * fh[idx_fh_F(iF, jF - 2, kF, ex)] -
|
|
F45 * fh[idx_fh_F(iF, jF - 1, kF, ex)] +
|
|
F45 * fh[idx_fh_F(iF, jF + 1, kF, ex)] -
|
|
F9 * fh[idx_fh_F(iF, jF + 2, kF, ex)] +
|
|
fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
|
|
|
fz[p] = d60dz * (
|
|
-fh[idx_fh_F(iF, jF, kF - 3, ex)] +
|
|
F9 * fh[idx_fh_F(iF, jF, kF - 2, ex)] -
|
|
F45 * fh[idx_fh_F(iF, jF, kF - 1, ex)] +
|
|
F45 * fh[idx_fh_F(iF, jF, kF + 1, ex)] -
|
|
F9 * fh[idx_fh_F(iF, jF, kF + 2, ex)] +
|
|
fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
#elif (ghost_width == 5)
|
|
/* ---- 8th-order ----------------------------------------------------- */
|
|
{
|
|
const int ord = 4;
|
|
|
|
int iminF = 1, jminF = 1, kminF = 1;
|
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -3;
|
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -3;
|
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -3;
|
|
|
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
|
|
|
const size_t nx = (size_t)ex1 + ord;
|
|
const size_t ny = (size_t)ex2 + ord;
|
|
const size_t nz = (size_t)ex3 + ord;
|
|
const size_t fh_size = nx * ny * nz;
|
|
|
|
static double *fh_buf = NULL;
|
|
static size_t cap = 0;
|
|
if (fh_size > cap) {
|
|
free(fh_buf);
|
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
|
cap = fh_size;
|
|
}
|
|
double *fh = fh_buf;
|
|
if (!fh) return;
|
|
|
|
symmetry_bd(ord, ex, f, fh, SoA);
|
|
|
|
const double d840dx = ONE / F840 / dX;
|
|
const double d840dy = ONE / F840 / dY;
|
|
const double d840dz = ONE / F840 / dZ;
|
|
const double d60dx = ONE / F60 / dX;
|
|
const double d60dy = ONE / F60 / dY;
|
|
const double d60dz = ONE / F60 / dZ;
|
|
const double d12dx = ONE / F12 / dX;
|
|
const double d12dy = ONE / F12 / dY;
|
|
const double d12dz = ONE / F12 / dZ;
|
|
const double d2dx = ONE / TWO / dX;
|
|
const double d2dy = ONE / TWO / dY;
|
|
const double d2dz = ONE / TWO / dZ;
|
|
|
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
|
for (size_t p = 0; p < all; ++p) {
|
|
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
|
|
}
|
|
|
|
/* 2nd: 3pt, widest */
|
|
const int i2_lo = (iminF + 2 > 0) ? (iminF + 2) : 0;
|
|
const int j2_lo = (jminF + 2 > 0) ? (jminF + 2) : 0;
|
|
const int k2_lo = (kminF + 2 > 0) ? (kminF + 2) : 0;
|
|
const int i2_hi = ex1 - 2;
|
|
const int j2_hi = ex2 - 2;
|
|
const int k2_hi = ex3 - 2;
|
|
|
|
/* 4th: 5pt */
|
|
const int i4_lo = (iminF + 3 > 0) ? (iminF + 3) : 0;
|
|
const int j4_lo = (jminF + 3 > 0) ? (jminF + 3) : 0;
|
|
const int k4_lo = (kminF + 3 > 0) ? (kminF + 3) : 0;
|
|
const int i4_hi = ex1 - 3;
|
|
const int j4_hi = ex2 - 3;
|
|
const int k4_hi = ex3 - 3;
|
|
|
|
/* 6th: 7pt */
|
|
const int i6_lo = (iminF + 4 > 0) ? (iminF + 4) : 0;
|
|
const int j6_lo = (jminF + 4 > 0) ? (jminF + 4) : 0;
|
|
const int k6_lo = (kminF + 4 > 0) ? (kminF + 4) : 0;
|
|
const int i6_hi = ex1 - 4;
|
|
const int j6_hi = ex2 - 4;
|
|
const int k6_hi = ex3 - 4;
|
|
|
|
/* 8th: 9pt, narrowest */
|
|
const int i8_lo = (iminF + 5 > 0) ? (iminF + 5) : 0;
|
|
const int j8_lo = (jminF + 5 > 0) ? (jminF + 5) : 0;
|
|
const int k8_lo = (kminF + 5 > 0) ? (kminF + 5) : 0;
|
|
const int i8_hi = ex1 - 5;
|
|
const int j8_hi = ex2 - 5;
|
|
const int k8_hi = ex3 - 5;
|
|
|
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
|
const int kF = k0 + 3;
|
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
|
const int jF = j0 + 3;
|
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
|
const int iF = i0 + 3;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fx[p] = d2dx * (
|
|
-fh[idx_fh_F_ord4(iF - 1, jF, kF, ex)] +
|
|
fh[idx_fh_F_ord4(iF + 1, jF, kF, ex)]);
|
|
fy[p] = d2dy * (
|
|
-fh[idx_fh_F_ord4(iF, jF - 1, kF, ex)] +
|
|
fh[idx_fh_F_ord4(iF, jF + 1, kF, ex)]);
|
|
fz[p] = d2dz * (
|
|
-fh[idx_fh_F_ord4(iF, jF, kF - 1, ex)] +
|
|
fh[idx_fh_F_ord4(iF, jF, kF + 1, ex)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
|
const int kF = k0 + 3;
|
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
|
const int jF = j0 + 3;
|
|
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
|
const int iF = i0 + 3;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fx[p] = d12dx * (
|
|
fh[idx_fh_F_ord4(iF - 2, jF, kF, ex)] -
|
|
EIT * fh[idx_fh_F_ord4(iF - 1, jF, kF, ex)] +
|
|
EIT * fh[idx_fh_F_ord4(iF + 1, jF, kF, ex)] -
|
|
fh[idx_fh_F_ord4(iF + 2, jF, kF, ex)]);
|
|
|
|
fy[p] = d12dy * (
|
|
fh[idx_fh_F_ord4(iF, jF - 2, kF, ex)] -
|
|
EIT * fh[idx_fh_F_ord4(iF, jF - 1, kF, ex)] +
|
|
EIT * fh[idx_fh_F_ord4(iF, jF + 1, kF, ex)] -
|
|
fh[idx_fh_F_ord4(iF, jF + 2, kF, ex)]);
|
|
|
|
fz[p] = d12dz * (
|
|
fh[idx_fh_F_ord4(iF, jF, kF - 2, ex)] -
|
|
EIT * fh[idx_fh_F_ord4(iF, jF, kF - 1, ex)] +
|
|
EIT * fh[idx_fh_F_ord4(iF, jF, kF + 1, ex)] -
|
|
fh[idx_fh_F_ord4(iF, jF, kF + 2, ex)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (i6_lo <= i6_hi && j6_lo <= j6_hi && k6_lo <= k6_hi) {
|
|
for (int k0 = k6_lo; k0 <= k6_hi; ++k0) {
|
|
const int kF = k0 + 3;
|
|
for (int j0 = j6_lo; j0 <= j6_hi; ++j0) {
|
|
const int jF = j0 + 3;
|
|
for (int i0 = i6_lo; i0 <= i6_hi; ++i0) {
|
|
const int iF = i0 + 3;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fx[p] = d60dx * (
|
|
-fh[idx_fh_F_ord4(iF - 3, jF, kF, ex)] +
|
|
F9 * fh[idx_fh_F_ord4(iF - 2, jF, kF, ex)] -
|
|
F45 * fh[idx_fh_F_ord4(iF - 1, jF, kF, ex)] +
|
|
F45 * fh[idx_fh_F_ord4(iF + 1, jF, kF, ex)] -
|
|
F9 * fh[idx_fh_F_ord4(iF + 2, jF, kF, ex)] +
|
|
fh[idx_fh_F_ord4(iF + 3, jF, kF, ex)]);
|
|
|
|
fy[p] = d60dy * (
|
|
-fh[idx_fh_F_ord4(iF, jF - 3, kF, ex)] +
|
|
F9 * fh[idx_fh_F_ord4(iF, jF - 2, kF, ex)] -
|
|
F45 * fh[idx_fh_F_ord4(iF, jF - 1, kF, ex)] +
|
|
F45 * fh[idx_fh_F_ord4(iF, jF + 1, kF, ex)] -
|
|
F9 * fh[idx_fh_F_ord4(iF, jF + 2, kF, ex)] +
|
|
fh[idx_fh_F_ord4(iF, jF + 3, kF, ex)]);
|
|
|
|
fz[p] = d60dz * (
|
|
-fh[idx_fh_F_ord4(iF, jF, kF - 3, ex)] +
|
|
F9 * fh[idx_fh_F_ord4(iF, jF, kF - 2, ex)] -
|
|
F45 * fh[idx_fh_F_ord4(iF, jF, kF - 1, ex)] +
|
|
F45 * fh[idx_fh_F_ord4(iF, jF, kF + 1, ex)] -
|
|
F9 * fh[idx_fh_F_ord4(iF, jF, kF + 2, ex)] +
|
|
fh[idx_fh_F_ord4(iF, jF, kF + 3, ex)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* 8th-order overwrite: [+3,-32,+168,-672,0,+672,-168,+32,-3] / (840*dx) */
|
|
if (i8_lo <= i8_hi && j8_lo <= j8_hi && k8_lo <= k8_hi) {
|
|
for (int k0 = k8_lo; k0 <= k8_hi; ++k0) {
|
|
const int kF = k0 + 3;
|
|
for (int j0 = j8_lo; j0 <= j8_hi; ++j0) {
|
|
const int jF = j0 + 3;
|
|
for (int i0 = i8_lo; i0 <= i8_hi; ++i0) {
|
|
const int iF = i0 + 3;
|
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
|
|
fx[p] = d840dx * (
|
|
+(double)3 * fh[idx_fh_F_ord4(iF - 4, jF, kF, ex)] -
|
|
F32 * fh[idx_fh_F_ord4(iF - 3, jF, kF, ex)] +
|
|
F168 * fh[idx_fh_F_ord4(iF - 2, jF, kF, ex)] -
|
|
F672 * fh[idx_fh_F_ord4(iF - 1, jF, kF, ex)] +
|
|
F672 * fh[idx_fh_F_ord4(iF + 1, jF, kF, ex)] -
|
|
F168 * fh[idx_fh_F_ord4(iF + 2, jF, kF, ex)] +
|
|
F32 * fh[idx_fh_F_ord4(iF + 3, jF, kF, ex)] -
|
|
(double)3 * fh[idx_fh_F_ord4(iF + 4, jF, kF, ex)]);
|
|
|
|
fy[p] = d840dy * (
|
|
+(double)3 * fh[idx_fh_F_ord4(iF, jF - 4, kF, ex)] -
|
|
F32 * fh[idx_fh_F_ord4(iF, jF - 3, kF, ex)] +
|
|
F168 * fh[idx_fh_F_ord4(iF, jF - 2, kF, ex)] -
|
|
F672 * fh[idx_fh_F_ord4(iF, jF - 1, kF, ex)] +
|
|
F672 * fh[idx_fh_F_ord4(iF, jF + 1, kF, ex)] -
|
|
F168 * fh[idx_fh_F_ord4(iF, jF + 2, kF, ex)] +
|
|
F32 * fh[idx_fh_F_ord4(iF, jF + 3, kF, ex)] -
|
|
(double)3 * fh[idx_fh_F_ord4(iF, jF + 4, kF, ex)]);
|
|
|
|
fz[p] = d840dz * (
|
|
+(double)3 * fh[idx_fh_F_ord4(iF, jF, kF - 4, ex)] -
|
|
F32 * fh[idx_fh_F_ord4(iF, jF, kF - 3, ex)] +
|
|
F168 * fh[idx_fh_F_ord4(iF, jF, kF - 2, ex)] -
|
|
F672 * fh[idx_fh_F_ord4(iF, jF, kF - 1, ex)] +
|
|
F672 * fh[idx_fh_F_ord4(iF, jF, kF + 1, ex)] -
|
|
F168 * fh[idx_fh_F_ord4(iF, jF, kF + 2, ex)] +
|
|
F32 * fh[idx_fh_F_ord4(iF, jF, kF + 3, ex)] -
|
|
(double)3 * fh[idx_fh_F_ord4(iF, jF, kF + 4, ex)]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
#else
|
|
#error "fderivs_c.C: unsupported ghost_width (must be 2, 3, 4, or 5)"
|
|
#endif
|
|
}
|