Add full FD order support (2nd/4th/6th/8th) to C derivative kernels via ghost_width dispatch
Wrap each C kernel in #if (ghost_width == N) blocks matching Fortran stencil coefficients from diff_new.f90, kodiss.f90, and lopsidediff.f90. Add fast-path indexing for ord=1,4,5 in share_func.h. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1,14 +1,18 @@
|
|||||||
|
#include "macrodef.h"
|
||||||
#include "tool.h"
|
#include "tool.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* C 版 fderivs
|
* C 版 fderivs — first derivatives df/dx, df/dy, df/dz.
|
||||||
*
|
*
|
||||||
* Fortran:
|
* Finite difference order is selected at compile time via the ghost_width macro
|
||||||
* subroutine fderivs(ex,f,fx,fy,fz,X,Y,Z,SYM1,SYM2,SYM3,symmetry,onoff)
|
* (defined in macrodef.fh):
|
||||||
|
* ghost_width = 2 → 2nd-order
|
||||||
|
* ghost_width = 3 → 4th-order
|
||||||
|
* ghost_width = 4 → 6th-order
|
||||||
|
* ghost_width = 5 → 8th-order
|
||||||
*
|
*
|
||||||
* 约定:
|
* Multi-pass overwrite strategy: compute the widest (lowest-order) stencil first,
|
||||||
* f, fx, fy, fz: ex1*ex2*ex3,按 idx_ex 布局
|
* then overwrite interior regions with progressively higher-order stencils.
|
||||||
* X: ex1, Y: ex2, Z: ex3
|
|
||||||
*/
|
*/
|
||||||
void fderivs(const int ex[3],
|
void fderivs(const int ex[3],
|
||||||
const double *f,
|
const double *f,
|
||||||
@@ -17,151 +21,596 @@ void fderivs(const int ex[3],
|
|||||||
double SYM1, double SYM2, double SYM3,
|
double SYM1, double SYM2, double SYM3,
|
||||||
int Symmetry, int onoff)
|
int Symmetry, int onoff)
|
||||||
{
|
{
|
||||||
(void)onoff; // Fortran 里没用到
|
(void)onoff;
|
||||||
|
|
||||||
const double ZEO = 0.0, ONE = 1.0;
|
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0, EIT = 8.0;
|
||||||
const double TWO = 2.0, EIT = 8.0;
|
const double F9 = 9.0, F12 = 12.0, F45 = 45.0, F60 = 60.0;
|
||||||
const double F12 = 12.0;
|
const double F32 = 32.0, F168 = 168.0, F672 = 672.0, F840 = 840.0;
|
||||||
|
|
||||||
const int NO_SYMM = 0, EQ_SYMM = 1; // OCTANT=2 在本子程序里不直接用
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||||
|
|
||||||
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||||
|
|
||||||
// dX = X(2)-X(1) -> C: X[1]-X[0]
|
|
||||||
const double dX = X[1] - X[0];
|
const double dX = X[1] - X[0];
|
||||||
const double dY = Y[1] - Y[0];
|
const double dY = Y[1] - Y[0];
|
||||||
const double dZ = Z[1] - Z[0];
|
const double dZ = Z[1] - Z[0];
|
||||||
|
|
||||||
// Fortran 1-based bounds
|
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
|
||||||
const int imaxF = ex1;
|
|
||||||
const int jmaxF = ex2;
|
|
||||||
const int kmaxF = ex3;
|
|
||||||
|
|
||||||
int iminF = 1, jminF = 1, kminF = 1;
|
const int gw = ghost_width; // compile-time constant
|
||||||
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
|
||||||
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
|
||||||
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
|
||||||
|
|
||||||
// SoA(1:3) = SYM1,SYM2,SYM3
|
#if (ghost_width == 2)
|
||||||
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
/* ---- 2nd-order ------------------------------------------------------ */
|
||||||
|
{
|
||||||
|
const int ord = 1; // symmetry_bd ord = ghost_width - 1
|
||||||
|
|
||||||
// fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
const size_t nx = (size_t)ex1 + 2;
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = 0;
|
||||||
const size_t ny = (size_t)ex2 + 2;
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = 0;
|
||||||
const size_t nz = (size_t)ex3 + 2;
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = 0;
|
||||||
const size_t fh_size = nx * ny * nz;
|
|
||||||
static double *fh = NULL;
|
|
||||||
static size_t cap = 0;
|
|
||||||
|
|
||||||
if (fh_size > cap) {
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
||||||
free(fh);
|
|
||||||
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
|
||||||
cap = fh_size;
|
|
||||||
}
|
|
||||||
// double *fh = (double*)malloc(fh_size * sizeof(double));
|
|
||||||
if (!fh) return;
|
|
||||||
|
|
||||||
// call symmetry_bd(2,ex,f,fh,SoA)
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
symmetry_bd(2, ex, f, fh, SoA);
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
const double d12dx = ONE / F12 / dX;
|
static double *fh_buf = NULL;
|
||||||
const double d12dy = ONE / F12 / dY;
|
static size_t cap = 0;
|
||||||
const double d12dz = ONE / F12 / dZ;
|
if (fh_size > cap) {
|
||||||
|
free(fh_buf);
|
||||||
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
|
cap = fh_size;
|
||||||
|
}
|
||||||
|
double *fh = fh_buf;
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
const double d2dx = ONE / TWO / dX;
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
const double d2dy = ONE / TWO / dY;
|
|
||||||
const double d2dz = ONE / TWO / dZ;
|
|
||||||
|
|
||||||
// fx = fy = fz = 0
|
const double d2dx = ONE / TWO / dX;
|
||||||
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
const double d2dy = ONE / TWO / dY;
|
||||||
for (size_t p = 0; p < all; ++p) {
|
const double d2dz = ONE / TWO / dZ;
|
||||||
fx[p] = ZEO;
|
|
||||||
fy[p] = ZEO;
|
|
||||||
fz[p] = ZEO;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||||
* 两段式:
|
for (size_t p = 0; p < all; ++p) {
|
||||||
* 1) 先在二阶可用区域计算二阶模板
|
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
|
||||||
* 2) 再在高阶可用区域覆盖为四阶模板
|
}
|
||||||
*
|
|
||||||
* 与原 if/elseif 逻辑等价,但减少逐点分支判断。
|
|
||||||
*/
|
|
||||||
const int i2_lo = (iminF > 0) ? iminF : 0;
|
|
||||||
const int j2_lo = (jminF > 0) ? jminF : 0;
|
|
||||||
const int k2_lo = (kminF > 0) ? kminF : 0;
|
|
||||||
const int i2_hi = ex1 - 2;
|
|
||||||
const int j2_hi = ex2 - 2;
|
|
||||||
const int k2_hi = ex3 - 2;
|
|
||||||
|
|
||||||
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
/* 2nd-order pass: [-1, 0, +1] / (2*dx) */
|
||||||
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
const int i2_lo = (iminF > 0) ? iminF : 0;
|
||||||
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
const int j2_lo = (jminF > 0) ? jminF : 0;
|
||||||
const int i4_hi = ex1 - 3;
|
const int k2_lo = (kminF > 0) ? kminF : 0;
|
||||||
const int j4_hi = ex2 - 3;
|
const int i2_hi = ex1 - 2;
|
||||||
const int k4_hi = ex3 - 3;
|
const int j2_hi = ex2 - 2;
|
||||||
|
const int k2_hi = ex3 - 2;
|
||||||
|
|
||||||
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
||||||
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
||||||
const int kF = k0 + 1;
|
const int kF = k0; // Fortran index for fh: kF = k0 (shift=0)
|
||||||
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
||||||
const int jF = j0 + 1;
|
const int jF = j0;
|
||||||
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
||||||
const int iF = i0 + 1;
|
const int iF = i0;
|
||||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
fx[p] = d2dx * (
|
fx[p] = d2dx * (
|
||||||
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
-fh[idx_fh_F_ord1(iF - 1, jF, kF, ex)] +
|
||||||
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
fh[idx_fh_F_ord1(iF + 1, jF, kF, ex)]
|
||||||
);
|
);
|
||||||
|
|
||||||
fy[p] = d2dy * (
|
fy[p] = d2dy * (
|
||||||
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
-fh[idx_fh_F_ord1(iF, jF - 1, kF, ex)] +
|
||||||
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
fh[idx_fh_F_ord1(iF, jF + 1, kF, ex)]
|
||||||
);
|
);
|
||||||
|
|
||||||
fz[p] = d2dz * (
|
fz[p] = d2dz * (
|
||||||
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
-fh[idx_fh_F_ord1(iF, jF, kF - 1, ex)] +
|
||||||
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
fh[idx_fh_F_ord1(iF, jF, kF + 1, ex)]
|
||||||
);
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
#elif (ghost_width == 3)
|
||||||
|
/* ---- 4th-order (original code) ------------------------------------ */
|
||||||
|
{
|
||||||
|
const int ord = 2; // symmetry_bd ord
|
||||||
|
|
||||||
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
||||||
const int kF = k0 + 1;
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
||||||
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
||||||
const int jF = j0 + 1;
|
|
||||||
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
|
||||||
const int iF = i0 + 1;
|
|
||||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
||||||
|
|
||||||
fx[p] = d12dx * (
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
||||||
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
|
|
||||||
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
|
||||||
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
|
|
||||||
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
|
|
||||||
);
|
|
||||||
|
|
||||||
fy[p] = d12dy * (
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
|
const size_t fh_size = nx * ny * nz;
|
||||||
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
|
|
||||||
);
|
|
||||||
|
|
||||||
fz[p] = d12dz * (
|
static double *fh_buf = NULL;
|
||||||
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
|
static size_t cap = 0;
|
||||||
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
if (fh_size > cap) {
|
||||||
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
|
free(fh_buf);
|
||||||
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
);
|
cap = fh_size;
|
||||||
|
}
|
||||||
|
double *fh = fh_buf;
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
const double d12dx = ONE / F12 / dX;
|
||||||
|
const double d12dy = ONE / F12 / dY;
|
||||||
|
const double d12dz = ONE / F12 / dZ;
|
||||||
|
const double d2dx = ONE / TWO / dX;
|
||||||
|
const double d2dy = ONE / TWO / dY;
|
||||||
|
const double d2dz = ONE / TWO / dZ;
|
||||||
|
|
||||||
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||||
|
for (size_t p = 0; p < all; ++p) {
|
||||||
|
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int i2_lo = (iminF > 0) ? iminF : 0;
|
||||||
|
const int j2_lo = (jminF > 0) ? jminF : 0;
|
||||||
|
const int k2_lo = (kminF > 0) ? kminF : 0;
|
||||||
|
const int i2_hi = ex1 - 2;
|
||||||
|
const int j2_hi = ex2 - 2;
|
||||||
|
const int k2_hi = ex3 - 2;
|
||||||
|
|
||||||
|
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
||||||
|
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
||||||
|
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
||||||
|
const int i4_hi = ex1 - 3;
|
||||||
|
const int j4_hi = ex2 - 3;
|
||||||
|
const int k4_hi = ex3 - 3;
|
||||||
|
|
||||||
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
||||||
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
fx[p] = d2dx * (
|
||||||
|
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fy[p] = d2dy * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fz[p] = d2dz * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// free(fh);
|
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
||||||
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
fx[p] = d12dx * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fy[p] = d12dy * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fz[p] = d12dz * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#elif (ghost_width == 4)
|
||||||
|
/* ---- 6th-order ----------------------------------------------------- */
|
||||||
|
{
|
||||||
|
const int ord = 3;
|
||||||
|
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
|
||||||
|
|
||||||
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
||||||
|
|
||||||
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
static double *fh_buf = NULL;
|
||||||
|
static size_t cap = 0;
|
||||||
|
if (fh_size > cap) {
|
||||||
|
free(fh_buf);
|
||||||
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
|
cap = fh_size;
|
||||||
|
}
|
||||||
|
double *fh = fh_buf;
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
/* Denominators */
|
||||||
|
const double d60dx = ONE / F60 / dX;
|
||||||
|
const double d60dy = ONE / F60 / dY;
|
||||||
|
const double d60dz = ONE / F60 / dZ;
|
||||||
|
const double d12dx = ONE / F12 / dX;
|
||||||
|
const double d12dy = ONE / F12 / dY;
|
||||||
|
const double d12dz = ONE / F12 / dZ;
|
||||||
|
const double d2dx = ONE / TWO / dX;
|
||||||
|
const double d2dy = ONE / TWO / dY;
|
||||||
|
const double d2dz = ONE / TWO / dZ;
|
||||||
|
|
||||||
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||||
|
for (size_t p = 0; p < all; ++p) {
|
||||||
|
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 2nd-order pass: 3pt, widest */
|
||||||
|
const int i2_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
||||||
|
const int j2_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
||||||
|
const int k2_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
||||||
|
const int i2_hi = ex1 - 2;
|
||||||
|
const int j2_hi = ex2 - 2;
|
||||||
|
const int k2_hi = ex3 - 2;
|
||||||
|
|
||||||
|
/* 4th-order pass: 5pt */
|
||||||
|
const int i4_lo = (iminF + 2 > 0) ? (iminF + 2) : 0;
|
||||||
|
const int j4_lo = (jminF + 2 > 0) ? (jminF + 2) : 0;
|
||||||
|
const int k4_lo = (kminF + 2 > 0) ? (kminF + 2) : 0;
|
||||||
|
const int i4_hi = ex1 - 3;
|
||||||
|
const int j4_hi = ex2 - 3;
|
||||||
|
const int k4_hi = ex3 - 3;
|
||||||
|
|
||||||
|
/* 6th-order pass: 7pt, narrowest interior */
|
||||||
|
const int i6_lo = (iminF + 3 > 0) ? (iminF + 3) : 0;
|
||||||
|
const int j6_lo = (jminF + 3 > 0) ? (jminF + 3) : 0;
|
||||||
|
const int k6_lo = (kminF + 3 > 0) ? (kminF + 3) : 0;
|
||||||
|
const int i6_hi = ex1 - 4;
|
||||||
|
const int j6_hi = ex2 - 4;
|
||||||
|
const int k6_hi = ex3 - 4;
|
||||||
|
|
||||||
|
/* 2nd-order */
|
||||||
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
||||||
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
||||||
|
const int kF = k0 + 2;
|
||||||
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
||||||
|
const int jF = j0 + 2;
|
||||||
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
||||||
|
const int iF = i0 + 2;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
fx[p] = d2dx * (
|
||||||
|
-fh[idx_fh_F(iF - 1, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F(iF + 1, jF, kF, ex)]);
|
||||||
|
fy[p] = d2dy * (
|
||||||
|
-fh[idx_fh_F(iF, jF - 1, kF, ex)] +
|
||||||
|
fh[idx_fh_F(iF, jF + 1, kF, ex)]);
|
||||||
|
fz[p] = d2dz * (
|
||||||
|
-fh[idx_fh_F(iF, jF, kF - 1, ex)] +
|
||||||
|
fh[idx_fh_F(iF, jF, kF + 1, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 4th-order overwrite */
|
||||||
|
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
||||||
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
||||||
|
const int kF = k0 + 2;
|
||||||
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
||||||
|
const int jF = j0 + 2;
|
||||||
|
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
||||||
|
const int iF = i0 + 2;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
fx[p] = d12dx * (
|
||||||
|
fh[idx_fh_F(iF - 2, jF, kF, ex)] -
|
||||||
|
EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)] +
|
||||||
|
EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
||||||
|
|
||||||
|
fy[p] = d12dy * (
|
||||||
|
fh[idx_fh_F(iF, jF - 2, kF, ex)] -
|
||||||
|
EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)] +
|
||||||
|
EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
||||||
|
|
||||||
|
fz[p] = d12dz * (
|
||||||
|
fh[idx_fh_F(iF, jF, kF - 2, ex)] -
|
||||||
|
EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)] +
|
||||||
|
EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)] -
|
||||||
|
fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 6th-order overwrite: [-1,+9,-45,0,+45,-9,+1] / (60*dx) */
|
||||||
|
if (i6_lo <= i6_hi && j6_lo <= j6_hi && k6_lo <= k6_hi) {
|
||||||
|
for (int k0 = k6_lo; k0 <= k6_hi; ++k0) {
|
||||||
|
const int kF = k0 + 2;
|
||||||
|
for (int j0 = j6_lo; j0 <= j6_hi; ++j0) {
|
||||||
|
const int jF = j0 + 2;
|
||||||
|
for (int i0 = i6_lo; i0 <= i6_hi; ++i0) {
|
||||||
|
const int iF = i0 + 2;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
fx[p] = d60dx * (
|
||||||
|
-fh[idx_fh_F(iF - 3, jF, kF, ex)] +
|
||||||
|
F9 * fh[idx_fh_F(iF - 2, jF, kF, ex)] -
|
||||||
|
F45 * fh[idx_fh_F(iF - 1, jF, kF, ex)] +
|
||||||
|
F45 * fh[idx_fh_F(iF + 1, jF, kF, ex)] -
|
||||||
|
F9 * fh[idx_fh_F(iF + 2, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
||||||
|
|
||||||
|
fy[p] = d60dy * (
|
||||||
|
-fh[idx_fh_F(iF, jF - 3, kF, ex)] +
|
||||||
|
F9 * fh[idx_fh_F(iF, jF - 2, kF, ex)] -
|
||||||
|
F45 * fh[idx_fh_F(iF, jF - 1, kF, ex)] +
|
||||||
|
F45 * fh[idx_fh_F(iF, jF + 1, kF, ex)] -
|
||||||
|
F9 * fh[idx_fh_F(iF, jF + 2, kF, ex)] +
|
||||||
|
fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
||||||
|
|
||||||
|
fz[p] = d60dz * (
|
||||||
|
-fh[idx_fh_F(iF, jF, kF - 3, ex)] +
|
||||||
|
F9 * fh[idx_fh_F(iF, jF, kF - 2, ex)] -
|
||||||
|
F45 * fh[idx_fh_F(iF, jF, kF - 1, ex)] +
|
||||||
|
F45 * fh[idx_fh_F(iF, jF, kF + 1, ex)] -
|
||||||
|
F9 * fh[idx_fh_F(iF, jF, kF + 2, ex)] +
|
||||||
|
fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#elif (ghost_width == 5)
|
||||||
|
/* ---- 8th-order ----------------------------------------------------- */
|
||||||
|
{
|
||||||
|
const int ord = 4;
|
||||||
|
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -3;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -3;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -3;
|
||||||
|
|
||||||
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
||||||
|
|
||||||
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
static double *fh_buf = NULL;
|
||||||
|
static size_t cap = 0;
|
||||||
|
if (fh_size > cap) {
|
||||||
|
free(fh_buf);
|
||||||
|
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
|
cap = fh_size;
|
||||||
|
}
|
||||||
|
double *fh = fh_buf;
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
const double d840dx = ONE / F840 / dX;
|
||||||
|
const double d840dy = ONE / F840 / dY;
|
||||||
|
const double d840dz = ONE / F840 / dZ;
|
||||||
|
const double d60dx = ONE / F60 / dX;
|
||||||
|
const double d60dy = ONE / F60 / dY;
|
||||||
|
const double d60dz = ONE / F60 / dZ;
|
||||||
|
const double d12dx = ONE / F12 / dX;
|
||||||
|
const double d12dy = ONE / F12 / dY;
|
||||||
|
const double d12dz = ONE / F12 / dZ;
|
||||||
|
const double d2dx = ONE / TWO / dX;
|
||||||
|
const double d2dy = ONE / TWO / dY;
|
||||||
|
const double d2dz = ONE / TWO / dZ;
|
||||||
|
|
||||||
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||||
|
for (size_t p = 0; p < all; ++p) {
|
||||||
|
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 2nd: 3pt, widest */
|
||||||
|
const int i2_lo = (iminF + 2 > 0) ? (iminF + 2) : 0;
|
||||||
|
const int j2_lo = (jminF + 2 > 0) ? (jminF + 2) : 0;
|
||||||
|
const int k2_lo = (kminF + 2 > 0) ? (kminF + 2) : 0;
|
||||||
|
const int i2_hi = ex1 - 2;
|
||||||
|
const int j2_hi = ex2 - 2;
|
||||||
|
const int k2_hi = ex3 - 2;
|
||||||
|
|
||||||
|
/* 4th: 5pt */
|
||||||
|
const int i4_lo = (iminF + 3 > 0) ? (iminF + 3) : 0;
|
||||||
|
const int j4_lo = (jminF + 3 > 0) ? (jminF + 3) : 0;
|
||||||
|
const int k4_lo = (kminF + 3 > 0) ? (kminF + 3) : 0;
|
||||||
|
const int i4_hi = ex1 - 3;
|
||||||
|
const int j4_hi = ex2 - 3;
|
||||||
|
const int k4_hi = ex3 - 3;
|
||||||
|
|
||||||
|
/* 6th: 7pt */
|
||||||
|
const int i6_lo = (iminF + 4 > 0) ? (iminF + 4) : 0;
|
||||||
|
const int j6_lo = (jminF + 4 > 0) ? (jminF + 4) : 0;
|
||||||
|
const int k6_lo = (kminF + 4 > 0) ? (kminF + 4) : 0;
|
||||||
|
const int i6_hi = ex1 - 4;
|
||||||
|
const int j6_hi = ex2 - 4;
|
||||||
|
const int k6_hi = ex3 - 4;
|
||||||
|
|
||||||
|
/* 8th: 9pt, narrowest */
|
||||||
|
const int i8_lo = (iminF + 5 > 0) ? (iminF + 5) : 0;
|
||||||
|
const int j8_lo = (jminF + 5 > 0) ? (jminF + 5) : 0;
|
||||||
|
const int k8_lo = (kminF + 5 > 0) ? (kminF + 5) : 0;
|
||||||
|
const int i8_hi = ex1 - 5;
|
||||||
|
const int j8_hi = ex2 - 5;
|
||||||
|
const int k8_hi = ex3 - 5;
|
||||||
|
|
||||||
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
||||||
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
||||||
|
const int kF = k0 + 3;
|
||||||
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
||||||
|
const int jF = j0 + 3;
|
||||||
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
||||||
|
const int iF = i0 + 3;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
fx[p] = d2dx * (
|
||||||
|
-fh[idx_fh_F_ord4(iF - 1, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord4(iF + 1, jF, kF, ex)]);
|
||||||
|
fy[p] = d2dy * (
|
||||||
|
-fh[idx_fh_F_ord4(iF, jF - 1, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord4(iF, jF + 1, kF, ex)]);
|
||||||
|
fz[p] = d2dz * (
|
||||||
|
-fh[idx_fh_F_ord4(iF, jF, kF - 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord4(iF, jF, kF + 1, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
||||||
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
||||||
|
const int kF = k0 + 3;
|
||||||
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
||||||
|
const int jF = j0 + 3;
|
||||||
|
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
||||||
|
const int iF = i0 + 3;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
fx[p] = d12dx * (
|
||||||
|
fh[idx_fh_F_ord4(iF - 2, jF, kF, ex)] -
|
||||||
|
EIT * fh[idx_fh_F_ord4(iF - 1, jF, kF, ex)] +
|
||||||
|
EIT * fh[idx_fh_F_ord4(iF + 1, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord4(iF + 2, jF, kF, ex)]);
|
||||||
|
|
||||||
|
fy[p] = d12dy * (
|
||||||
|
fh[idx_fh_F_ord4(iF, jF - 2, kF, ex)] -
|
||||||
|
EIT * fh[idx_fh_F_ord4(iF, jF - 1, kF, ex)] +
|
||||||
|
EIT * fh[idx_fh_F_ord4(iF, jF + 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord4(iF, jF + 2, kF, ex)]);
|
||||||
|
|
||||||
|
fz[p] = d12dz * (
|
||||||
|
fh[idx_fh_F_ord4(iF, jF, kF - 2, ex)] -
|
||||||
|
EIT * fh[idx_fh_F_ord4(iF, jF, kF - 1, ex)] +
|
||||||
|
EIT * fh[idx_fh_F_ord4(iF, jF, kF + 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord4(iF, jF, kF + 2, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i6_lo <= i6_hi && j6_lo <= j6_hi && k6_lo <= k6_hi) {
|
||||||
|
for (int k0 = k6_lo; k0 <= k6_hi; ++k0) {
|
||||||
|
const int kF = k0 + 3;
|
||||||
|
for (int j0 = j6_lo; j0 <= j6_hi; ++j0) {
|
||||||
|
const int jF = j0 + 3;
|
||||||
|
for (int i0 = i6_lo; i0 <= i6_hi; ++i0) {
|
||||||
|
const int iF = i0 + 3;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
fx[p] = d60dx * (
|
||||||
|
-fh[idx_fh_F_ord4(iF - 3, jF, kF, ex)] +
|
||||||
|
F9 * fh[idx_fh_F_ord4(iF - 2, jF, kF, ex)] -
|
||||||
|
F45 * fh[idx_fh_F_ord4(iF - 1, jF, kF, ex)] +
|
||||||
|
F45 * fh[idx_fh_F_ord4(iF + 1, jF, kF, ex)] -
|
||||||
|
F9 * fh[idx_fh_F_ord4(iF + 2, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord4(iF + 3, jF, kF, ex)]);
|
||||||
|
|
||||||
|
fy[p] = d60dy * (
|
||||||
|
-fh[idx_fh_F_ord4(iF, jF - 3, kF, ex)] +
|
||||||
|
F9 * fh[idx_fh_F_ord4(iF, jF - 2, kF, ex)] -
|
||||||
|
F45 * fh[idx_fh_F_ord4(iF, jF - 1, kF, ex)] +
|
||||||
|
F45 * fh[idx_fh_F_ord4(iF, jF + 1, kF, ex)] -
|
||||||
|
F9 * fh[idx_fh_F_ord4(iF, jF + 2, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord4(iF, jF + 3, kF, ex)]);
|
||||||
|
|
||||||
|
fz[p] = d60dz * (
|
||||||
|
-fh[idx_fh_F_ord4(iF, jF, kF - 3, ex)] +
|
||||||
|
F9 * fh[idx_fh_F_ord4(iF, jF, kF - 2, ex)] -
|
||||||
|
F45 * fh[idx_fh_F_ord4(iF, jF, kF - 1, ex)] +
|
||||||
|
F45 * fh[idx_fh_F_ord4(iF, jF, kF + 1, ex)] -
|
||||||
|
F9 * fh[idx_fh_F_ord4(iF, jF, kF + 2, ex)] +
|
||||||
|
fh[idx_fh_F_ord4(iF, jF, kF + 3, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 8th-order overwrite: [+3,-32,+168,-672,0,+672,-168,+32,-3] / (840*dx) */
|
||||||
|
if (i8_lo <= i8_hi && j8_lo <= j8_hi && k8_lo <= k8_hi) {
|
||||||
|
for (int k0 = k8_lo; k0 <= k8_hi; ++k0) {
|
||||||
|
const int kF = k0 + 3;
|
||||||
|
for (int j0 = j8_lo; j0 <= j8_hi; ++j0) {
|
||||||
|
const int jF = j0 + 3;
|
||||||
|
for (int i0 = i8_lo; i0 <= i8_hi; ++i0) {
|
||||||
|
const int iF = i0 + 3;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
fx[p] = d840dx * (
|
||||||
|
+(double)3 * fh[idx_fh_F_ord4(iF - 4, jF, kF, ex)] -
|
||||||
|
F32 * fh[idx_fh_F_ord4(iF - 3, jF, kF, ex)] +
|
||||||
|
F168 * fh[idx_fh_F_ord4(iF - 2, jF, kF, ex)] -
|
||||||
|
F672 * fh[idx_fh_F_ord4(iF - 1, jF, kF, ex)] +
|
||||||
|
F672 * fh[idx_fh_F_ord4(iF + 1, jF, kF, ex)] -
|
||||||
|
F168 * fh[idx_fh_F_ord4(iF + 2, jF, kF, ex)] +
|
||||||
|
F32 * fh[idx_fh_F_ord4(iF + 3, jF, kF, ex)] -
|
||||||
|
(double)3 * fh[idx_fh_F_ord4(iF + 4, jF, kF, ex)]);
|
||||||
|
|
||||||
|
fy[p] = d840dy * (
|
||||||
|
+(double)3 * fh[idx_fh_F_ord4(iF, jF - 4, kF, ex)] -
|
||||||
|
F32 * fh[idx_fh_F_ord4(iF, jF - 3, kF, ex)] +
|
||||||
|
F168 * fh[idx_fh_F_ord4(iF, jF - 2, kF, ex)] -
|
||||||
|
F672 * fh[idx_fh_F_ord4(iF, jF - 1, kF, ex)] +
|
||||||
|
F672 * fh[idx_fh_F_ord4(iF, jF + 1, kF, ex)] -
|
||||||
|
F168 * fh[idx_fh_F_ord4(iF, jF + 2, kF, ex)] +
|
||||||
|
F32 * fh[idx_fh_F_ord4(iF, jF + 3, kF, ex)] -
|
||||||
|
(double)3 * fh[idx_fh_F_ord4(iF, jF + 4, kF, ex)]);
|
||||||
|
|
||||||
|
fz[p] = d840dz * (
|
||||||
|
+(double)3 * fh[idx_fh_F_ord4(iF, jF, kF - 4, ex)] -
|
||||||
|
F32 * fh[idx_fh_F_ord4(iF, jF, kF - 3, ex)] +
|
||||||
|
F168 * fh[idx_fh_F_ord4(iF, jF, kF - 2, ex)] -
|
||||||
|
F672 * fh[idx_fh_F_ord4(iF, jF, kF - 1, ex)] +
|
||||||
|
F672 * fh[idx_fh_F_ord4(iF, jF, kF + 1, ex)] -
|
||||||
|
F168 * fh[idx_fh_F_ord4(iF, jF, kF + 2, ex)] +
|
||||||
|
F32 * fh[idx_fh_F_ord4(iF, jF, kF + 3, ex)] -
|
||||||
|
(double)3 * fh[idx_fh_F_ord4(iF, jF, kF + 4, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#error "fderivs_c.C: unsupported ghost_width (must be 2, 3, 4, or 5)"
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,16 +1,16 @@
|
|||||||
|
#include "macrodef.h"
|
||||||
#include "tool.h"
|
#include "tool.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* C 版 kodis
|
* C 版 kodis — Kreiss-Oliger numerical dissipation (Cartesian patches).
|
||||||
*
|
*
|
||||||
* Fortran signature:
|
* The KO operator is (D₊D₋)^r applied to f_rhs with alternating sign (-1)^(r-1).
|
||||||
* subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
|
|
||||||
*
|
*
|
||||||
* 约定:
|
* FD order → r → cof=2^(2r) mapping:
|
||||||
* X: ex1, Y: ex2, Z: ex3
|
* ghost_width=2 (2nd) → r=2, cof=16, sign=-
|
||||||
* f, f_rhs: ex1*ex2*ex3 按 idx_ex 布局
|
* ghost_width=3 (4th) → r=3, cof=64, sign=+
|
||||||
* SoA[3]
|
* ghost_width=4 (6th) → r=4, cof=256, sign=-
|
||||||
* eps: double
|
* ghost_width=5 (8th) → r=5, cof=1024,sign=+
|
||||||
*/
|
*/
|
||||||
void kodis(const int ex[3],
|
void kodis(const int ex[3],
|
||||||
const double *X, const double *Y, const double *Z,
|
const double *X, const double *Y, const double *Z,
|
||||||
@@ -18,100 +18,304 @@ void kodis(const int ex[3],
|
|||||||
const double SoA[3],
|
const double SoA[3],
|
||||||
int Symmetry, double eps)
|
int Symmetry, double eps)
|
||||||
{
|
{
|
||||||
const double ONE = 1.0, SIX = 6.0, FIT = 15.0, TWT = 20.0;
|
const double ZEO = 0.0;
|
||||||
const double cof = 64.0; // 2^6
|
|
||||||
const int NO_SYMM = 0, OCTANT = 2;
|
|
||||||
|
|
||||||
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||||
|
|
||||||
// Fortran: dX = X(2)-X(1) -> C: X[1]-X[0]
|
|
||||||
const double dX = X[1] - X[0];
|
const double dX = X[1] - X[0];
|
||||||
const double dY = Y[1] - Y[0];
|
const double dY = Y[1] - Y[0];
|
||||||
const double dZ = Z[1] - Z[0];
|
const double dZ = Z[1] - Z[0];
|
||||||
(void)ONE; // ONE 在原 Fortran 里只是参数,这里不一定用得上
|
|
||||||
|
|
||||||
// Fortran: imax=ex(1) 等是 1-based 上界
|
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
|
||||||
const int imaxF = ex1;
|
|
||||||
const int jmaxF = ex2;
|
|
||||||
const int kmaxF = ex3;
|
|
||||||
|
|
||||||
// Fortran: imin=jmin=kmin=1,某些对称情况变 -2
|
#if (ghost_width == 2)
|
||||||
int iminF = 1, jminF = 1, kminF = 1;
|
/* ---- r=2, cof=16, sign=-, 5pt stencil ----------------------------- */
|
||||||
|
{
|
||||||
|
const int ord = 2;
|
||||||
|
const int r = 2;
|
||||||
|
const double cof = 16.0;
|
||||||
|
const double F4 = 4.0, F6 = 6.0;
|
||||||
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||||
|
|
||||||
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -2;
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
||||||
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -2;
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
||||||
|
|
||||||
// 分配 fh:大小 (ex1+3)*(ex2+3)*(ex3+3),对应 ord=3
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
const size_t nx = (size_t)ex1 + 3;
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
const size_t ny = (size_t)ex2 + 3;
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
const size_t nz = (size_t)ex3 + 3;
|
const size_t fh_size = nx * ny * nz;
|
||||||
const size_t fh_size = nx * ny * nz;
|
|
||||||
|
|
||||||
double *fh = (double*)malloc(fh_size * sizeof(double));
|
double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
if (!fh) return;
|
if (!fh) return;
|
||||||
|
|
||||||
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
symmetry_bd(3, ex, f, fh, SoA);
|
|
||||||
|
|
||||||
/*
|
/* i±2 must be valid: i-2 >= iminF && i+2 <= imaxF
|
||||||
* Fortran loops:
|
C 0-based: i0 >= iminF+1, i0 <= ex1-3 */
|
||||||
* do k=1,ex3
|
const int i0_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
||||||
* do j=1,ex2
|
const int j0_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
||||||
* do i=1,ex1
|
const int k0_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
||||||
*
|
const int i0_hi = imaxF - 3;
|
||||||
* C: k0=0..ex3-1, j0=0..ex2-1, i0=0..ex1-1
|
const int j0_hi = jmaxF - 3;
|
||||||
* 并定义 Fortran index: iF=i0+1, ...
|
const int k0_hi = kmaxF - 3;
|
||||||
*/
|
|
||||||
// 收紧循环范围:只遍历满足 iF±3/jF±3/kF±3 条件的内部点
|
|
||||||
// iF-3 >= iminF => iF >= iminF+3 => i0 >= iminF+2 (因为 iF=i0+1)
|
|
||||||
// iF+3 <= imaxF => iF <= imaxF-3 => i0 <= imaxF-4
|
|
||||||
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
|
|
||||||
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
|
|
||||||
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
|
|
||||||
const int i0_hi = imaxF - 4; // inclusive
|
|
||||||
const int j0_hi = jmaxF - 4;
|
|
||||||
const int k0_hi = kmaxF - 4;
|
|
||||||
|
|
||||||
if (i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi) {
|
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
|
||||||
|
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
const double Dx = (
|
||||||
|
(fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] + fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]) -
|
||||||
|
F4 * (fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] + fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]) +
|
||||||
|
F6 * fh[idx_fh_F_ord2(iF, jF, kF, ex)]
|
||||||
|
) / dX;
|
||||||
|
|
||||||
|
const double Dy = (
|
||||||
|
(fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] + fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]) -
|
||||||
|
F4 * (fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] + fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]) +
|
||||||
|
F6 * fh[idx_fh_F_ord2(iF, jF, kF, ex)]
|
||||||
|
) / dY;
|
||||||
|
|
||||||
|
const double Dz = (
|
||||||
|
(fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] + fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]) -
|
||||||
|
F4 * (fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] + fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]) +
|
||||||
|
F6 * fh[idx_fh_F_ord2(iF, jF, kF, ex)]
|
||||||
|
) / dZ;
|
||||||
|
|
||||||
|
f_rhs[p] -= (eps / cof) * (Dx + Dy + Dz); /* sign=- */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
free(fh);
|
free(fh);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
#elif (ghost_width == 3)
|
||||||
|
/* ---- r=3, cof=64, sign=+, 7pt stencil (current default) ---------- */
|
||||||
|
{
|
||||||
|
const int ord = 3;
|
||||||
|
const int r = 3;
|
||||||
|
const double cof = 64.0;
|
||||||
|
const double SIX = 6.0, FIT = 15.0, TWT = 20.0;
|
||||||
|
const int NO_SYMM = 0, OCTANT = 2;
|
||||||
|
|
||||||
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
const int kF = k0 + 1;
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
||||||
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
|
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -2;
|
||||||
const int jF = j0 + 1;
|
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -2;
|
||||||
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
|
|
||||||
const int iF = i0 + 1;
|
|
||||||
|
|
||||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
// 三个方向各一份同型的 7 点组合(实际上是对称的 6th-order dissipation/filter 核)
|
double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
const double Dx_term =
|
if (!fh) return;
|
||||||
( (fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
|
|
||||||
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
|
|
||||||
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
|
|
||||||
TWT * fh[idx_fh_F(iF , jF, kF, ex)] ) / dX;
|
|
||||||
|
|
||||||
const double Dy_term =
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
( (fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
|
|
||||||
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
|
|
||||||
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
|
|
||||||
TWT * fh[idx_fh_F(iF, jF , kF, ex)] ) / dY;
|
|
||||||
|
|
||||||
const double Dz_term =
|
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
|
||||||
( (fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
|
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
|
||||||
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
|
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
|
||||||
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
|
const int i0_hi = imaxF - 4;
|
||||||
TWT * fh[idx_fh_F(iF, jF, kF , ex)] ) / dZ;
|
const int j0_hi = jmaxF - 4;
|
||||||
|
const int k0_hi = kmaxF - 4;
|
||||||
|
|
||||||
// Fortran:
|
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
|
||||||
// f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof*(Dx_term + Dy_term + Dz_term)
|
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
|
||||||
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
|
const int kF = k0 + 2;
|
||||||
|
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
|
||||||
|
const int jF = j0 + 2;
|
||||||
|
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
|
||||||
|
const int iF = i0 + 2;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
const double Dx = (
|
||||||
|
(fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
|
||||||
|
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
|
||||||
|
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
|
||||||
|
TWT * fh[idx_fh_F(iF, jF, kF, ex)]
|
||||||
|
) / dX;
|
||||||
|
|
||||||
|
const double Dy = (
|
||||||
|
(fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
|
||||||
|
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
|
||||||
|
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
|
||||||
|
TWT * fh[idx_fh_F(iF, jF, kF, ex)]
|
||||||
|
) / dY;
|
||||||
|
|
||||||
|
const double Dz = (
|
||||||
|
(fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
|
||||||
|
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
|
||||||
|
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
|
||||||
|
TWT * fh[idx_fh_F(iF, jF, kF, ex)]
|
||||||
|
) / dZ;
|
||||||
|
|
||||||
|
f_rhs[p] += (eps / cof) * (Dx + Dy + Dz); /* sign=+ */
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
#elif (ghost_width == 4)
|
||||||
|
/* ---- r=4, cof=256, sign=-, 9pt stencil ---------------------------- */
|
||||||
|
{
|
||||||
|
const int ord = 4;
|
||||||
|
const int r = 4;
|
||||||
|
const double cof = 256.0;
|
||||||
|
const double F8 = 8.0, F28 = 28.0, F56 = 56.0, F70 = 70.0;
|
||||||
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||||
|
|
||||||
free(fh);
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
}
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -3;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -3;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -3;
|
||||||
|
|
||||||
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
/* i±4 valid: i-4>=iminF → i0>=iminF+3, i+4<=imaxF → i0<=ex1-5 */
|
||||||
|
const int i0_lo = (iminF + 3 > 0) ? iminF + 3 : 0;
|
||||||
|
const int j0_lo = (jminF + 3 > 0) ? jminF + 3 : 0;
|
||||||
|
const int k0_lo = (kminF + 3 > 0) ? kminF + 3 : 0;
|
||||||
|
const int i0_hi = imaxF - 5;
|
||||||
|
const int j0_hi = jmaxF - 5;
|
||||||
|
const int k0_hi = kmaxF - 5;
|
||||||
|
|
||||||
|
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
|
||||||
|
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
|
||||||
|
const int kF = k0 + 3;
|
||||||
|
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
|
||||||
|
const int jF = j0 + 3;
|
||||||
|
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
|
||||||
|
const int iF = i0 + 3;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
/* Stencil: [1,-8,28,-56,70,-56,28,-8,1] */
|
||||||
|
const double Dx = (
|
||||||
|
(fh[idx_fh_F_ord4(iF - 4, jF, kF, ex)] + fh[idx_fh_F_ord4(iF + 4, jF, kF, ex)]) -
|
||||||
|
F8 * (fh[idx_fh_F_ord4(iF - 3, jF, kF, ex)] + fh[idx_fh_F_ord4(iF + 3, jF, kF, ex)]) +
|
||||||
|
F28* (fh[idx_fh_F_ord4(iF - 2, jF, kF, ex)] + fh[idx_fh_F_ord4(iF + 2, jF, kF, ex)]) -
|
||||||
|
F56* (fh[idx_fh_F_ord4(iF - 1, jF, kF, ex)] + fh[idx_fh_F_ord4(iF + 1, jF, kF, ex)]) +
|
||||||
|
F70* fh[idx_fh_F_ord4(iF, jF, kF, ex)]
|
||||||
|
) / dX;
|
||||||
|
|
||||||
|
const double Dy = (
|
||||||
|
(fh[idx_fh_F_ord4(iF, jF - 4, kF, ex)] + fh[idx_fh_F_ord4(iF, jF + 4, kF, ex)]) -
|
||||||
|
F8 * (fh[idx_fh_F_ord4(iF, jF - 3, kF, ex)] + fh[idx_fh_F_ord4(iF, jF + 3, kF, ex)]) +
|
||||||
|
F28* (fh[idx_fh_F_ord4(iF, jF - 2, kF, ex)] + fh[idx_fh_F_ord4(iF, jF + 2, kF, ex)]) -
|
||||||
|
F56* (fh[idx_fh_F_ord4(iF, jF - 1, kF, ex)] + fh[idx_fh_F_ord4(iF, jF + 1, kF, ex)]) +
|
||||||
|
F70* fh[idx_fh_F_ord4(iF, jF, kF, ex)]
|
||||||
|
) / dY;
|
||||||
|
|
||||||
|
const double Dz = (
|
||||||
|
(fh[idx_fh_F_ord4(iF, jF, kF - 4, ex)] + fh[idx_fh_F_ord4(iF, jF, kF + 4, ex)]) -
|
||||||
|
F8 * (fh[idx_fh_F_ord4(iF, jF, kF - 3, ex)] + fh[idx_fh_F_ord4(iF, jF, kF + 3, ex)]) +
|
||||||
|
F28* (fh[idx_fh_F_ord4(iF, jF, kF - 2, ex)] + fh[idx_fh_F_ord4(iF, jF, kF + 2, ex)]) -
|
||||||
|
F56* (fh[idx_fh_F_ord4(iF, jF, kF - 1, ex)] + fh[idx_fh_F_ord4(iF, jF, kF + 1, ex)]) +
|
||||||
|
F70* fh[idx_fh_F_ord4(iF, jF, kF, ex)]
|
||||||
|
) / dZ;
|
||||||
|
|
||||||
|
f_rhs[p] -= (eps / cof) * (Dx + Dy + Dz); /* sign=- */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#elif (ghost_width == 5)
|
||||||
|
/* ---- r=5, cof=1024, sign=+, 11pt stencil ------------------------- */
|
||||||
|
{
|
||||||
|
const int ord = 5;
|
||||||
|
const int r = 5;
|
||||||
|
const double cof = 1024.0;
|
||||||
|
const double F10 = 10.0, F45 = 45.0, F120 = 120.0;
|
||||||
|
const double F210 = 210.0, F252 = 252.0;
|
||||||
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||||
|
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -4;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -4;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -4;
|
||||||
|
|
||||||
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
/* i±5 valid: i0>=iminF+4, i0<=ex1-6 */
|
||||||
|
const int i0_lo = (iminF + 4 > 0) ? iminF + 4 : 0;
|
||||||
|
const int j0_lo = (jminF + 4 > 0) ? jminF + 4 : 0;
|
||||||
|
const int k0_lo = (kminF + 4 > 0) ? kminF + 4 : 0;
|
||||||
|
const int i0_hi = imaxF - 6;
|
||||||
|
const int j0_hi = jmaxF - 6;
|
||||||
|
const int k0_hi = kmaxF - 6;
|
||||||
|
|
||||||
|
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
|
||||||
|
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
|
||||||
|
const int kF = k0 + 4;
|
||||||
|
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
|
||||||
|
const int jF = j0 + 4;
|
||||||
|
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
|
||||||
|
const int iF = i0 + 4;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
/* Stencil: [1,-10,45,-120,210,-252,210,-120,45,-10,1] */
|
||||||
|
const double Dx = (
|
||||||
|
(fh[idx_fh_F_ord5(iF - 5, jF, kF, ex)] + fh[idx_fh_F_ord5(iF + 5, jF, kF, ex)]) -
|
||||||
|
F10 * (fh[idx_fh_F_ord5(iF - 4, jF, kF, ex)] + fh[idx_fh_F_ord5(iF + 4, jF, kF, ex)]) +
|
||||||
|
F45 * (fh[idx_fh_F_ord5(iF - 3, jF, kF, ex)] + fh[idx_fh_F_ord5(iF + 3, jF, kF, ex)]) -
|
||||||
|
F120* (fh[idx_fh_F_ord5(iF - 2, jF, kF, ex)] + fh[idx_fh_F_ord5(iF + 2, jF, kF, ex)]) +
|
||||||
|
F210* (fh[idx_fh_F_ord5(iF - 1, jF, kF, ex)] + fh[idx_fh_F_ord5(iF + 1, jF, kF, ex)]) -
|
||||||
|
F252* fh[idx_fh_F_ord5(iF, jF, kF, ex)]
|
||||||
|
) / dX;
|
||||||
|
|
||||||
|
const double Dy = (
|
||||||
|
(fh[idx_fh_F_ord5(iF, jF - 5, kF, ex)] + fh[idx_fh_F_ord5(iF, jF + 5, kF, ex)]) -
|
||||||
|
F10 * (fh[idx_fh_F_ord5(iF, jF - 4, kF, ex)] + fh[idx_fh_F_ord5(iF, jF + 4, kF, ex)]) +
|
||||||
|
F45 * (fh[idx_fh_F_ord5(iF, jF - 3, kF, ex)] + fh[idx_fh_F_ord5(iF, jF + 3, kF, ex)]) -
|
||||||
|
F120* (fh[idx_fh_F_ord5(iF, jF - 2, kF, ex)] + fh[idx_fh_F_ord5(iF, jF + 2, kF, ex)]) +
|
||||||
|
F210* (fh[idx_fh_F_ord5(iF, jF - 1, kF, ex)] + fh[idx_fh_F_ord5(iF, jF + 1, kF, ex)]) -
|
||||||
|
F252* fh[idx_fh_F_ord5(iF, jF, kF, ex)]
|
||||||
|
) / dY;
|
||||||
|
|
||||||
|
const double Dz = (
|
||||||
|
(fh[idx_fh_F_ord5(iF, jF, kF - 5, ex)] + fh[idx_fh_F_ord5(iF, jF, kF + 5, ex)]) -
|
||||||
|
F10 * (fh[idx_fh_F_ord5(iF, jF, kF - 4, ex)] + fh[idx_fh_F_ord5(iF, jF, kF + 4, ex)]) +
|
||||||
|
F45 * (fh[idx_fh_F_ord5(iF, jF, kF - 3, ex)] + fh[idx_fh_F_ord5(iF, jF, kF + 3, ex)]) -
|
||||||
|
F120* (fh[idx_fh_F_ord5(iF, jF, kF - 2, ex)] + fh[idx_fh_F_ord5(iF, jF, kF + 2, ex)]) +
|
||||||
|
F210* (fh[idx_fh_F_ord5(iF, jF, kF - 1, ex)] + fh[idx_fh_F_ord5(iF, jF, kF + 1, ex)]) -
|
||||||
|
F252* fh[idx_fh_F_ord5(iF, jF, kF, ex)]
|
||||||
|
) / dZ;
|
||||||
|
|
||||||
|
f_rhs[p] += (eps / cof) * (Dx + Dy + Dz); /* sign=+ */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#error "kodiss_c.C: unsupported ghost_width (must be 2, 3, 4, or 5)"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,14 +1,13 @@
|
|||||||
|
#include "macrodef.h"
|
||||||
#include "tool.h"
|
#include "tool.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 你需要提供 symmetry_bd 的 C 版本(或 Fortran 绑到 C 的接口)。
|
* C 版 lopsided — upwind (lopsided) advection derivatives.
|
||||||
* Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
|
||||||
*
|
*
|
||||||
* 约定:
|
* Adds advection terms to f_rhs for all three spatial directions.
|
||||||
* nghost = 3
|
* Uses sign-biased (one-sided) stencils with centered fallbacks.
|
||||||
* ex[3] = {ex1,ex2,ex3}
|
*
|
||||||
* f = 原始网格 (ex1*ex2*ex3)
|
* For lopsided, symmetry_bd ord = ghost_width (same as kodiss).
|
||||||
* fh = 扩展网格 ((ex1+3)*(ex2+3)*(ex3+3)),对应 Fortran 的 (-2:ex1, ...)
|
|
||||||
* SoA[3] = 输入参数
|
|
||||||
*/
|
*/
|
||||||
void lopsided(const int ex[3],
|
void lopsided(const int ex[3],
|
||||||
const double *X, const double *Y, const double *Z,
|
const double *X, const double *Y, const double *Z,
|
||||||
@@ -16,240 +15,577 @@ void lopsided(const int ex[3],
|
|||||||
const double *Sfx, const double *Sfy, const double *Sfz,
|
const double *Sfx, const double *Sfy, const double *Sfz,
|
||||||
int Symmetry, const double SoA[3])
|
int Symmetry, const double SoA[3])
|
||||||
{
|
{
|
||||||
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
|
const double ZEO = 0.0, ONE = 1.0;
|
||||||
const double TWO = 2.0, F6 = 6.0, F18 = 18.0;
|
const double TWO = 2.0, F6 = 6.0, EIT = 8.0;
|
||||||
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
|
const double F3 = 3.0, F4 = 4.0, F5 = 5.0, F10 = 10.0, F12 = 12.0, F18 = 18.0;
|
||||||
|
const double F9 = 9.0, F45 = 45.0, F60 = 60.0;
|
||||||
const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2;
|
const double F2 = 2.0, F15 = 15.0, F24 = 24.0, F30 = 30.0, F35 = 35.0;
|
||||||
(void)OCTANT; // 这里和 Fortran 一样只是定义了不用也没关系
|
const double F50 = 50.0, F77 = 77.0, F80 = 80.0, F100 = 100.0, F150 = 150.0;
|
||||||
|
const double F32 = 32.0, F168 = 168.0, F672 = 672.0, F840 = 840.0;
|
||||||
|
const double F140=140.0, F378=378.0, F420=420.0, F1050=1050.0;
|
||||||
|
|
||||||
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||||
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||||
|
|
||||||
// 对应 Fortran: dX = X(2)-X(1) (Fortran 1-based)
|
|
||||||
// C: X[1]-X[0]
|
|
||||||
const double dX = X[1] - X[0];
|
const double dX = X[1] - X[0];
|
||||||
const double dY = Y[1] - Y[0];
|
const double dY = Y[1] - Y[0];
|
||||||
const double dZ = Z[1] - Z[0];
|
const double dZ = Z[1] - Z[0];
|
||||||
|
|
||||||
const double d12dx = ONE / F12 / dX;
|
#if (ghost_width == 2)
|
||||||
const double d12dy = ONE / F12 / dY;
|
/* ---- 2nd-order lopsided --------------------------------------------- */
|
||||||
const double d12dz = ONE / F12 / dZ;
|
{
|
||||||
|
const int ord = 2;
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
||||||
|
|
||||||
// Fortran 里算了 d2dx/d2dy/d2dz 但本 subroutine 里没用到(保持一致也算出来)
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
const double d2dx = ONE / TWO / dX;
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
const double d2dy = ONE / TWO / dY;
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
const double d2dz = ONE / TWO / dZ;
|
const size_t fh_size = nx * ny * nz;
|
||||||
(void)d2dx; (void)d2dy; (void)d2dz;
|
|
||||||
|
|
||||||
// Fortran:
|
double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
// imax = ex(1); jmax = ex(2); kmax = ex(3)
|
if (!fh) return;
|
||||||
const int imaxF = ex1;
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
const int jmaxF = ex2;
|
|
||||||
const int kmaxF = ex3;
|
|
||||||
|
|
||||||
// Fortran:
|
const double d2dx = ONE / TWO / dX;
|
||||||
// imin=jmin=kmin=1; 若满足对称条件则设为 -2
|
const double d2dy = ONE / TWO / dY;
|
||||||
int iminF = 1, jminF = 1, kminF = 1;
|
const double d2dz = ONE / TWO / dZ;
|
||||||
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
|
||||||
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
|
|
||||||
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
|
|
||||||
|
|
||||||
// 分配 fh:大小 (ex1+3)*(ex2+3)*(ex3+3)
|
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
|
||||||
const size_t nx = (size_t)ex1 + 3;
|
|
||||||
const size_t ny = (size_t)ex2 + 3;
|
|
||||||
const size_t nz = (size_t)ex3 + 3;
|
|
||||||
const size_t fh_size = nx * ny * nz;
|
|
||||||
|
|
||||||
double *fh = (double*)malloc(fh_size * sizeof(double));
|
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||||
if (!fh) return; // 内存不足:直接返回(你也可以改成 abort/报错)
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
/* x-direction */
|
||||||
symmetry_bd(3, ex, f, fh, SoA);
|
const double sfx = Sfx[p];
|
||||||
|
if (sfx > ZEO) {
|
||||||
/*
|
if (i0 <= ex1 - 3) // i+2 <= imax
|
||||||
* Fortran 主循环:
|
f_rhs[p] += sfx * d2dx * (
|
||||||
* do k=1,ex(3)-1
|
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
* do j=1,ex(2)-1
|
F4*fh[idx_fh_F_ord2(iF+1, jF, kF, ex)] -
|
||||||
* do i=1,ex(1)-1
|
fh[idx_fh_F_ord2(iF+2, jF, kF, ex)]);
|
||||||
*
|
else if (i0 <= ex1 - 2) // i+1 <= imax
|
||||||
* 转成 C 0-based:
|
f_rhs[p] += sfx * d2dx * (
|
||||||
* k0 = 0..ex3-2, j0 = 0..ex2-2, i0 = 0..ex1-2
|
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
*
|
fh[idx_fh_F_ord2(iF+1, jF, kF, ex)]);
|
||||||
* 并且 Fortran 里的 i/j/k 在 fh 访问时,仍然是 Fortran 索引值:
|
} else if (sfx < ZEO) {
|
||||||
* iF=i0+1, jF=j0+1, kF=k0+1
|
if ((i0 - 1) >= iminF) // i-2 >= imin
|
||||||
*/
|
f_rhs[p] -= sfx * d2dx * (
|
||||||
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
const int kF = k0 + 1;
|
F4*fh[idx_fh_F_ord2(iF-1, jF, kF, ex)] -
|
||||||
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
fh[idx_fh_F_ord2(iF-2, jF, kF, ex)]);
|
||||||
const int jF = j0 + 1;
|
else if (i0 >= iminF) // i-1 >= imin
|
||||||
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
f_rhs[p] -= sfx * d2dx * (
|
||||||
const int iF = i0 + 1;
|
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF-1, jF, kF, ex)]);
|
||||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
||||||
|
|
||||||
// ---------------- x direction ----------------
|
|
||||||
const double sfx = Sfx[p];
|
|
||||||
if (sfx > ZEO) {
|
|
||||||
// Fortran: if(i+3 <= imax)
|
|
||||||
// iF+3 <= ex1 <=> i0+4 <= ex1 <=> i0 <= ex1-4
|
|
||||||
if (i0 <= ex1 - 4) {
|
|
||||||
f_rhs[p] += sfx * d12dx *
|
|
||||||
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
|
||||||
}
|
}
|
||||||
// elseif(i+2 <= imax) <=> i0 <= ex1-3
|
|
||||||
else if (i0 <= ex1 - 3) {
|
|
||||||
f_rhs[p] += sfx * d12dx *
|
|
||||||
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
|
||||||
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
|
||||||
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
|
||||||
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
|
||||||
}
|
|
||||||
// elseif(i+1 <= imax) <=> i0 <= ex1-2(循环里总成立)
|
|
||||||
else if (i0 <= ex1 - 2) {
|
|
||||||
f_rhs[p] -= sfx * d12dx *
|
|
||||||
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
|
|
||||||
}
|
|
||||||
} else if (sfx < ZEO) {
|
|
||||||
// Fortran: if(i-3 >= imin)
|
|
||||||
// (iF-3) >= iminF <=> (i0-2) >= iminF
|
|
||||||
if ((i0 - 2) >= iminF) {
|
|
||||||
f_rhs[p] -= sfx * d12dx *
|
|
||||||
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
|
|
||||||
}
|
|
||||||
// elseif(i-2 >= imin) <=> (i0-1) >= iminF
|
|
||||||
else if ((i0 - 1) >= iminF) {
|
|
||||||
f_rhs[p] += sfx * d12dx *
|
|
||||||
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
|
||||||
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
|
||||||
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
|
||||||
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
|
||||||
}
|
|
||||||
// elseif(i-1 >= imin) <=> i0 >= iminF
|
|
||||||
else if (i0 >= iminF) {
|
|
||||||
f_rhs[p] += sfx * d12dx *
|
|
||||||
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------- y direction ----------------
|
/* y-direction */
|
||||||
const double sfy = Sfy[p];
|
const double sfy = Sfy[p];
|
||||||
if (sfy > ZEO) {
|
if (sfy > ZEO) {
|
||||||
// jF+3 <= ex2 <=> j0+4 <= ex2 <=> j0 <= ex2-4
|
if (j0 <= ex2-3)
|
||||||
if (j0 <= ex2 - 4) {
|
f_rhs[p] += sfy * d2dy * (
|
||||||
f_rhs[p] += sfy * d12dy *
|
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
F4*fh[idx_fh_F_ord2(iF, jF+1, kF, ex)] -
|
||||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
fh[idx_fh_F_ord2(iF, jF+2, kF, ex)]);
|
||||||
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
else if (j0 <= ex2-2)
|
||||||
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
|
f_rhs[p] += sfy * d2dy * (
|
||||||
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
} else if (j0 <= ex2 - 3) {
|
fh[idx_fh_F_ord2(iF, jF+1, kF, ex)]);
|
||||||
f_rhs[p] += sfy * d12dy *
|
} else if (sfy < ZEO) {
|
||||||
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
if ((j0-1) >= jminF)
|
||||||
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
f_rhs[p] -= sfy * d2dy * (
|
||||||
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
F4*fh[idx_fh_F_ord2(iF, jF-1, kF, ex)] -
|
||||||
} else if (j0 <= ex2 - 2) {
|
fh[idx_fh_F_ord2(iF, jF-2, kF, ex)]);
|
||||||
f_rhs[p] -= sfy * d12dy *
|
else if (j0 >= jminF)
|
||||||
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
f_rhs[p] -= sfy * d2dy * (
|
||||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
fh[idx_fh_F_ord2(iF, jF-1, kF, ex)]);
|
||||||
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
|
|
||||||
}
|
}
|
||||||
} else if (sfy < ZEO) {
|
|
||||||
if ((j0 - 2) >= jminF) {
|
|
||||||
f_rhs[p] -= sfy * d12dy *
|
|
||||||
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
|
|
||||||
} else if ((j0 - 1) >= jminF) {
|
|
||||||
f_rhs[p] += sfy * d12dy *
|
|
||||||
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
|
||||||
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
|
||||||
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
|
||||||
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
|
||||||
} else if (j0 >= jminF) {
|
|
||||||
f_rhs[p] += sfy * d12dy *
|
|
||||||
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------- z direction ----------------
|
/* z-direction */
|
||||||
const double sfz = Sfz[p];
|
const double sfz = Sfz[p];
|
||||||
if (sfz > ZEO) {
|
if (sfz > ZEO) {
|
||||||
if (k0 <= ex3 - 4) {
|
if (k0 <= ex3-3)
|
||||||
f_rhs[p] += sfz * d12dz *
|
f_rhs[p] += sfz * d2dz * (
|
||||||
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
F4*fh[idx_fh_F_ord2(iF, jF, kF+1, ex)] -
|
||||||
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
fh[idx_fh_F_ord2(iF, jF, kF+2, ex)]);
|
||||||
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
|
else if (k0 <= ex3-2)
|
||||||
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
f_rhs[p] += sfz * d2dz * (
|
||||||
} else if (k0 <= ex3 - 3) {
|
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
f_rhs[p] += sfz * d12dz *
|
fh[idx_fh_F_ord2(iF, jF, kF+1, ex)]);
|
||||||
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
} else if (sfz < ZEO) {
|
||||||
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
if ((k0-1) >= kminF)
|
||||||
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
f_rhs[p] -= sfz * d2dz * (
|
||||||
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
} else if (k0 <= ex3 - 2) {
|
F4*fh[idx_fh_F_ord2(iF, jF, kF-1, ex)] -
|
||||||
f_rhs[p] -= sfz * d12dz *
|
fh[idx_fh_F_ord2(iF, jF, kF-2, ex)]);
|
||||||
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
else if (k0 >= kminF)
|
||||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
f_rhs[p] -= sfz * d2dz * (
|
||||||
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
fh[idx_fh_F_ord2(iF, jF, kF-1, ex)]);
|
||||||
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
|
|
||||||
}
|
|
||||||
} else if (sfz < ZEO) {
|
|
||||||
if ((k0 - 2) >= kminF) {
|
|
||||||
f_rhs[p] -= sfz * d12dz *
|
|
||||||
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
|
|
||||||
} else if ((k0 - 1) >= kminF) {
|
|
||||||
f_rhs[p] += sfz * d12dz *
|
|
||||||
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
|
||||||
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
|
||||||
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
|
||||||
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
|
||||||
} else if (k0 >= kminF) {
|
|
||||||
f_rhs[p] += sfz * d12dz *
|
|
||||||
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
free(fh);
|
#elif (ghost_width == 3)
|
||||||
|
/* ---- 4th-order lopsided (original code) ---------------------------- */
|
||||||
|
{
|
||||||
|
const int ord = 3;
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
|
||||||
|
|
||||||
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
const double d12dx = ONE / F12 / dX;
|
||||||
|
const double d12dy = ONE / F12 / dY;
|
||||||
|
const double d12dz = ONE / F12 / dZ;
|
||||||
|
|
||||||
|
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
|
||||||
|
|
||||||
|
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||||
|
const int kF = k0 + 2;
|
||||||
|
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||||
|
const int jF = j0 + 2;
|
||||||
|
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||||
|
const int iF = i0 + 2;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
const double sfx = Sfx[p];
|
||||||
|
if (sfx > ZEO) {
|
||||||
|
if (i0 <= ex1 - 4) // i+3 <= imax
|
||||||
|
f_rhs[p] += sfx * d12dx * (
|
||||||
|
-F3 *fh[idx_fh_F(iF-1, jF, kF, ex)]
|
||||||
|
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF+1, jF, kF, ex)]
|
||||||
|
-F6 *fh[idx_fh_F(iF+2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF+3, jF, kF, ex)]);
|
||||||
|
else if (i0 <= ex1 - 3) // i+2 <= imax
|
||||||
|
f_rhs[p] += sfx * d12dx * (
|
||||||
|
fh[idx_fh_F(iF-2, jF, kF, ex)]
|
||||||
|
-EIT*fh[idx_fh_F(iF-1, jF, kF, ex)]
|
||||||
|
+EIT*fh[idx_fh_F(iF+1, jF, kF, ex)]
|
||||||
|
- fh[idx_fh_F(iF+2, jF, kF, ex)]);
|
||||||
|
else if (i0 <= ex1 - 2) // i+1 <= imax → mirrored
|
||||||
|
f_rhs[p] -= sfx * d12dx * (
|
||||||
|
-F3 *fh[idx_fh_F(iF+1, jF, kF, ex)]
|
||||||
|
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF-1, jF, kF, ex)]
|
||||||
|
-F6 *fh[idx_fh_F(iF-2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF-3, jF, kF, ex)]);
|
||||||
|
} else if (sfx < ZEO) {
|
||||||
|
if ((i0 - 2) >= iminF) // i-3 >= imin
|
||||||
|
f_rhs[p] -= sfx * d12dx * (
|
||||||
|
-F3 *fh[idx_fh_F(iF+1, jF, kF, ex)]
|
||||||
|
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF-1, jF, kF, ex)]
|
||||||
|
-F6 *fh[idx_fh_F(iF-2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF-3, jF, kF, ex)]);
|
||||||
|
else if ((i0 - 1) >= iminF) // i-2 >= imin
|
||||||
|
f_rhs[p] += sfx * d12dx * (
|
||||||
|
fh[idx_fh_F(iF-2, jF, kF, ex)]
|
||||||
|
-EIT*fh[idx_fh_F(iF-1, jF, kF, ex)]
|
||||||
|
+EIT*fh[idx_fh_F(iF+1, jF, kF, ex)]
|
||||||
|
- fh[idx_fh_F(iF+2, jF, kF, ex)]);
|
||||||
|
else if (i0 >= iminF) // i-1 >= imin → mirrored
|
||||||
|
f_rhs[p] += sfx * d12dx * (
|
||||||
|
-F3 *fh[idx_fh_F(iF-1, jF, kF, ex)]
|
||||||
|
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF+1, jF, kF, ex)]
|
||||||
|
-F6 *fh[idx_fh_F(iF+2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF+3, jF, kF, ex)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const double sfy = Sfy[p];
|
||||||
|
if (sfy > ZEO) {
|
||||||
|
if (j0 <= ex2-4)
|
||||||
|
f_rhs[p] += sfy * d12dy * (
|
||||||
|
-F3*fh[idx_fh_F(iF,jF-1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF,jF+1,kF,ex)]-F6*fh[idx_fh_F(iF,jF+2,kF,ex)]
|
||||||
|
+fh[idx_fh_F(iF,jF+3,kF,ex)]);
|
||||||
|
else if (j0 <= ex2-3)
|
||||||
|
f_rhs[p] += sfy * d12dy * (fh[idx_fh_F(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F(iF,jF+1,kF,ex)]-fh[idx_fh_F(iF,jF+2,kF,ex)]);
|
||||||
|
else if (j0 <= ex2-2)
|
||||||
|
f_rhs[p] -= sfy * d12dy * (
|
||||||
|
-F3*fh[idx_fh_F(iF,jF+1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF,jF-1,kF,ex)]-F6*fh[idx_fh_F(iF,jF-2,kF,ex)]
|
||||||
|
+fh[idx_fh_F(iF,jF-3,kF,ex)]);
|
||||||
|
} else if (sfy < ZEO) {
|
||||||
|
if ((j0-2) >= jminF)
|
||||||
|
f_rhs[p] -= sfy * d12dy * (
|
||||||
|
-F3*fh[idx_fh_F(iF,jF+1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF,jF-1,kF,ex)]-F6*fh[idx_fh_F(iF,jF-2,kF,ex)]
|
||||||
|
+fh[idx_fh_F(iF,jF-3,kF,ex)]);
|
||||||
|
else if ((j0-1) >= jminF)
|
||||||
|
f_rhs[p] += sfy * d12dy * (fh[idx_fh_F(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F(iF,jF+1,kF,ex)]-fh[idx_fh_F(iF,jF+2,kF,ex)]);
|
||||||
|
else if (j0 >= jminF)
|
||||||
|
f_rhs[p] += sfy * d12dy * (
|
||||||
|
-F3*fh[idx_fh_F(iF,jF-1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF,jF+1,kF,ex)]-F6*fh[idx_fh_F(iF,jF+2,kF,ex)]
|
||||||
|
+fh[idx_fh_F(iF,jF+3,kF,ex)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const double sfz = Sfz[p];
|
||||||
|
if (sfz > ZEO) {
|
||||||
|
if (k0 <= ex3-4)
|
||||||
|
f_rhs[p] += sfz * d12dz * (
|
||||||
|
-F3*fh[idx_fh_F(iF,jF,kF-1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF,jF,kF+1,ex)]-F6*fh[idx_fh_F(iF,jF,kF+2,ex)]
|
||||||
|
+fh[idx_fh_F(iF,jF,kF+3,ex)]);
|
||||||
|
else if (k0 <= ex3-3)
|
||||||
|
f_rhs[p] += sfz * d12dz * (fh[idx_fh_F(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F(iF,jF,kF+1,ex)]-fh[idx_fh_F(iF,jF,kF+2,ex)]);
|
||||||
|
else if (k0 <= ex3-2)
|
||||||
|
f_rhs[p] -= sfz * d12dz * (
|
||||||
|
-F3*fh[idx_fh_F(iF,jF,kF+1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF,jF,kF-1,ex)]-F6*fh[idx_fh_F(iF,jF,kF-2,ex)]
|
||||||
|
+fh[idx_fh_F(iF,jF,kF-3,ex)]);
|
||||||
|
} else if (sfz < ZEO) {
|
||||||
|
if ((k0-2) >= kminF)
|
||||||
|
f_rhs[p] -= sfz * d12dz * (
|
||||||
|
-F3*fh[idx_fh_F(iF,jF,kF+1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF,jF,kF-1,ex)]-F6*fh[idx_fh_F(iF,jF,kF-2,ex)]
|
||||||
|
+fh[idx_fh_F(iF,jF,kF-3,ex)]);
|
||||||
|
else if ((k0-1) >= kminF)
|
||||||
|
f_rhs[p] += sfz * d12dz * (fh[idx_fh_F(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F(iF,jF,kF+1,ex)]-fh[idx_fh_F(iF,jF,kF+2,ex)]);
|
||||||
|
else if (k0 >= kminF)
|
||||||
|
f_rhs[p] += sfz * d12dz * (
|
||||||
|
-F3*fh[idx_fh_F(iF,jF,kF-1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
|
||||||
|
+F18*fh[idx_fh_F(iF,jF,kF+1,ex)]-F6*fh[idx_fh_F(iF,jF,kF+2,ex)]
|
||||||
|
+fh[idx_fh_F(iF,jF,kF+3,ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#elif (ghost_width == 4)
|
||||||
|
/* ---- 6th-order lopsided --------------------------------------------- */
|
||||||
|
{
|
||||||
|
const int ord = 4;
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -3;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -3;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -3;
|
||||||
|
|
||||||
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
const double d60dx = ONE / F60 / dX;
|
||||||
|
const double d60dy = ONE / F60 / dY;
|
||||||
|
const double d60dz = ONE / F60 / dZ;
|
||||||
|
const double d12dx = ONE / F12 / dX;
|
||||||
|
const double d12dy = ONE / F12 / dY;
|
||||||
|
const double d12dz = ONE / F12 / dZ;
|
||||||
|
const double d2dx = ONE / TWO / dX;
|
||||||
|
const double d2dy = ONE / TWO / dY;
|
||||||
|
const double d2dz = ONE / TWO / dZ;
|
||||||
|
|
||||||
|
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
|
||||||
|
|
||||||
|
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||||
|
const int kF = k0 + 3;
|
||||||
|
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||||
|
const int jF = j0 + 3;
|
||||||
|
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||||
|
const int iF = i0 + 3;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
/* ---- x-direction ---- */
|
||||||
|
const double sfx = Sfx[p];
|
||||||
|
if (sfx > ZEO) {
|
||||||
|
/* Primary biased: 2*f(i-2)-24*f(i-1)-35*f(i)+80*f(i+1)-30*f(i+2)+8*f(i+3)-f(i+4) */
|
||||||
|
if (i0 <= ex1-5 && (i0-1)>=iminF) // i+4<=imax && i-2>=imin
|
||||||
|
f_rhs[p] += sfx * d60dx * (
|
||||||
|
+F2*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-F24*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
|
||||||
|
-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
|
||||||
|
-F30*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]
|
||||||
|
-fh[idx_fh_F_ord4(iF+4,jF,kF,ex)]);
|
||||||
|
/* Boundary-adapted: -10*f(i-1)-77*f(i)+150*f(i+1)-100*f(i+2)+50*f(i+3)-15*f(i+4)+2*f(i+5) */
|
||||||
|
else if (i0 <= ex1-6 && i0 >= iminF) // i+5<=imax && i-1>=imin
|
||||||
|
f_rhs[p] += sfx * d60dx * (
|
||||||
|
-F10*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]
|
||||||
|
+F150*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F100*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]
|
||||||
|
+F50*fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]-F15*fh[idx_fh_F_ord4(iF+4,jF,kF,ex)]
|
||||||
|
+F2*fh[idx_fh_F_ord4(iF+5,jF,kF,ex)]);
|
||||||
|
/* Centered fallbacks */
|
||||||
|
else if (i0 <= ex1-4 && (i0-2)>=iminF) // 6th: i+3<=imax && i-3>=imin
|
||||||
|
f_rhs[p] += sfx * d60dx * (
|
||||||
|
-fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]
|
||||||
|
-F45*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
|
||||||
|
-F9*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]);
|
||||||
|
else if (i0 <= ex1-3 && (i0-1)>=iminF) // 4th
|
||||||
|
f_rhs[p] += sfx * d12dx * (
|
||||||
|
fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
|
||||||
|
+EIT*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]);
|
||||||
|
else if (i0 <= ex1-2 && i0>=iminF) // 2nd
|
||||||
|
f_rhs[p] += sfx * d2dx * (
|
||||||
|
-fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
|
||||||
|
} else if (sfx < ZEO) {
|
||||||
|
if ((i0-4)>=iminF && i0<=ex1-2) // i-4>=imin && i+2<=imax
|
||||||
|
f_rhs[p] -= sfx * d60dx * (
|
||||||
|
+F2*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]-F24*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
|
||||||
|
-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
|
||||||
|
-F30*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]
|
||||||
|
-fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]);
|
||||||
|
else if ((i0-5)>=iminF && i0<=ex1-2) // i-5>=imin && i+1<=imax
|
||||||
|
f_rhs[p] -= sfx * d60dx * (
|
||||||
|
-F10*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]
|
||||||
|
+F150*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F100*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]
|
||||||
|
+F50*fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]-F15*fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]
|
||||||
|
+F2*fh[idx_fh_F_ord4(iF-5,jF,kF,ex)]);
|
||||||
|
else if ((i0-3)>=iminF && i0<=ex1-2) // 6th centered
|
||||||
|
f_rhs[p] -= sfx * d60dx * (
|
||||||
|
-fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]
|
||||||
|
-F45*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
|
||||||
|
-F9*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]);
|
||||||
|
else if ((i0-2)>=iminF && i0<=ex1-2) // 4th
|
||||||
|
f_rhs[p] -= sfx * d12dx * (
|
||||||
|
fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
|
||||||
|
+EIT*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]);
|
||||||
|
else if ((i0-1)>=iminF && i0<=ex1-2) // 2nd
|
||||||
|
f_rhs[p] -= sfx * d2dx * (
|
||||||
|
-fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- y-direction ---- */
|
||||||
|
const double sfy = Sfy[p];
|
||||||
|
if (sfy > ZEO) {
|
||||||
|
if (j0<=ex2-5 && (j0-1)>=jminF)
|
||||||
|
f_rhs[p] += sfy * d60dy*(F2*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F24*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F30*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+4,kF,ex)]);
|
||||||
|
else if (j0<=ex2-6 && j0>=jminF)
|
||||||
|
f_rhs[p] += sfy * d60dy*(-F10*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F100*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+F50*fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]-F15*fh[idx_fh_F_ord4(iF,jF+4,kF,ex)]+F2*fh[idx_fh_F_ord4(iF,jF+5,kF,ex)]);
|
||||||
|
else if (j0<=ex2-4 && (j0-2)>=jminF)
|
||||||
|
f_rhs[p] += sfy * d60dy*(-fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]);
|
||||||
|
else if (j0<=ex2-3 && (j0-1)>=jminF)
|
||||||
|
f_rhs[p] += sfy * d12dy*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]);
|
||||||
|
else if (j0<=ex2-2 && j0>=jminF)
|
||||||
|
f_rhs[p] += sfy * d2dy*(-fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
|
||||||
|
} else if (sfy < ZEO) {
|
||||||
|
if ((j0-4)>=jminF && j0<=ex2-2)
|
||||||
|
f_rhs[p] -= sfy * d60dy*(F2*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]-F24*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F30*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]-fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]);
|
||||||
|
else if ((j0-5)>=jminF && j0<=ex2-2)
|
||||||
|
f_rhs[p] -= sfy * d60dy*(-F10*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F100*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+F50*fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]-F15*fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]+F2*fh[idx_fh_F_ord4(iF,jF-5,kF,ex)]);
|
||||||
|
else if ((j0-3)>=jminF && j0<=ex2-2)
|
||||||
|
f_rhs[p] -= sfy * d60dy*(-fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]);
|
||||||
|
else if ((j0-2)>=jminF && j0<=ex2-2)
|
||||||
|
f_rhs[p] -= sfy * d12dy*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]);
|
||||||
|
else if ((j0-1)>=jminF && j0<=ex2-2)
|
||||||
|
f_rhs[p] -= sfy * d2dy*(-fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- z-direction ---- */
|
||||||
|
const double sfz = Sfz[p];
|
||||||
|
if (sfz > ZEO) {
|
||||||
|
if (k0<=ex3-5 && (k0-1)>=kminF)
|
||||||
|
f_rhs[p] += sfz * d60dz*(F2*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F24*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F30*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+4,ex)]);
|
||||||
|
else if (k0<=ex3-6 && k0>=kminF)
|
||||||
|
f_rhs[p] += sfz * d60dz*(-F10*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F100*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+F50*fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]-F15*fh[idx_fh_F_ord4(iF,jF,kF+4,ex)]+F2*fh[idx_fh_F_ord4(iF,jF,kF+5,ex)]);
|
||||||
|
else if (k0<=ex3-4 && (k0-2)>=kminF)
|
||||||
|
f_rhs[p] += sfz * d60dz*(-fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]);
|
||||||
|
else if (k0<=ex3-3 && (k0-1)>=kminF)
|
||||||
|
f_rhs[p] += sfz * d12dz*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]);
|
||||||
|
else if (k0<=ex3-2 && k0>=kminF)
|
||||||
|
f_rhs[p] += sfz * d2dz*(-fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
|
||||||
|
} else if (sfz < ZEO) {
|
||||||
|
if ((k0-4)>=kminF && k0<=ex3-2)
|
||||||
|
f_rhs[p] -= sfz * d60dz*(F2*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]-F24*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F30*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]-fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]);
|
||||||
|
else if ((k0-5)>=kminF && k0<=ex3-2)
|
||||||
|
f_rhs[p] -= sfz * d60dz*(-F10*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F100*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+F50*fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]-F15*fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]+F2*fh[idx_fh_F_ord4(iF,jF,kF-5,ex)]);
|
||||||
|
else if ((k0-3)>=kminF && k0<=ex3-2)
|
||||||
|
f_rhs[p] -= sfz * d60dz*(-fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]);
|
||||||
|
else if ((k0-2)>=kminF && k0<=ex3-2)
|
||||||
|
f_rhs[p] -= sfz * d12dz*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]);
|
||||||
|
else if ((k0-1)>=kminF && k0<=ex3-2)
|
||||||
|
f_rhs[p] -= sfz * d2dz*(-fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#elif (ghost_width == 5)
|
||||||
|
/* ---- 8th-order lopsided --------------------------------------------- */
|
||||||
|
{
|
||||||
|
const int ord = 5;
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -4;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -4;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -4;
|
||||||
|
|
||||||
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
const double d840dx = ONE / F840 / dX;
|
||||||
|
const double d840dy = ONE / F840 / dY;
|
||||||
|
const double d840dz = ONE / F840 / dZ;
|
||||||
|
const double d60dx = ONE / F60 / dX;
|
||||||
|
const double d60dy = ONE / F60 / dY;
|
||||||
|
const double d60dz = ONE / F60 / dZ;
|
||||||
|
const double d12dx = ONE / F12 / dX;
|
||||||
|
const double d12dy = ONE / F12 / dY;
|
||||||
|
const double d12dz = ONE / F12 / dZ;
|
||||||
|
const double d2dx = ONE / TWO / dX;
|
||||||
|
const double d2dy = ONE / TWO / dY;
|
||||||
|
const double d2dz = ONE / TWO / dZ;
|
||||||
|
|
||||||
|
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
|
||||||
|
|
||||||
|
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||||
|
const int kF = k0 + 4;
|
||||||
|
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||||
|
const int jF = j0 + 4;
|
||||||
|
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||||
|
const int iF = i0 + 4;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
const double sfx = Sfx[p];
|
||||||
|
if (sfx > ZEO) {
|
||||||
|
/* 8th biased: -5*f(i-3)+60*f(i-2)-420*f(i-1)-378*f(i)+1050*f(i+1)-420*f(i+2)+140*f(i+3)-30*f(i+4)+3*f(i+5) */
|
||||||
|
if (i0 <= ex1-6 && (i0-2)>=iminF) // i+5<=imax && i-3>=imin
|
||||||
|
f_rhs[p] += sfx * d840dx * (
|
||||||
|
-F5*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F60*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
|
||||||
|
-F420*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]
|
||||||
|
+F1050*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F420*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
|
||||||
|
+F140*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F30*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]
|
||||||
|
+F3*fh[idx_fh_F_ord5(iF+5,jF,kF,ex)]);
|
||||||
|
/* 8th centered: +3*f(i-4)-32*f(i-3)+168*f(i-2)-672*f(i-1)+672*f(i+1)-168*f(i+2)+32*f(i+3)-3*f(i+4) */
|
||||||
|
else if (i0 <= ex1-5 && (i0-3)>=iminF)
|
||||||
|
f_rhs[p] += sfx * d840dx * (
|
||||||
|
+F3*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]-F32*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]
|
||||||
|
+F168*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-F672*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
|
||||||
|
+F672*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F168*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
|
||||||
|
+F32*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F3*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]);
|
||||||
|
else if (i0 <= ex1-4 && (i0-2)>=iminF) // 6th centered
|
||||||
|
f_rhs[p] += sfx * d60dx * (
|
||||||
|
-fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
|
||||||
|
-F45*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]
|
||||||
|
-F9*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]);
|
||||||
|
else if (i0 <= ex1-3 && (i0-1)>=iminF) // 4th centered
|
||||||
|
f_rhs[p] += sfx * d12dx * (
|
||||||
|
fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
|
||||||
|
+EIT*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]);
|
||||||
|
else if (i0 <= ex1-2 && i0>=iminF) // 2nd centered
|
||||||
|
f_rhs[p] += sfx * d2dx * (
|
||||||
|
-fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]);
|
||||||
|
} else if (sfx < ZEO) {
|
||||||
|
if ((i0-5)>=iminF && i0<=ex1-2) // i-5>=imin && i+3<=imax
|
||||||
|
f_rhs[p] -= sfx * d840dx * (
|
||||||
|
-F5*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]+F60*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
|
||||||
|
-F420*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]
|
||||||
|
+F1050*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]-F420*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
|
||||||
|
+F140*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]-F30*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]
|
||||||
|
+F3*fh[idx_fh_F_ord5(iF-5,jF,kF,ex)]);
|
||||||
|
else if ((i0-4)>=iminF && i0<=ex1-2) // 8th centered
|
||||||
|
f_rhs[p] -= sfx * d840dx * (
|
||||||
|
+F3*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]-F32*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]
|
||||||
|
+F168*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-F672*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
|
||||||
|
+F672*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F168*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
|
||||||
|
+F32*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F3*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]);
|
||||||
|
else if ((i0-3)>=iminF && i0<=ex1-2) // 6th centered
|
||||||
|
f_rhs[p] -= sfx * d60dx * (
|
||||||
|
-fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
|
||||||
|
-F45*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]
|
||||||
|
-F9*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]);
|
||||||
|
else if ((i0-2)>=iminF && i0<=ex1-2) // 4th centered
|
||||||
|
f_rhs[p] -= sfx * d12dx * (
|
||||||
|
fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
|
||||||
|
+EIT*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]);
|
||||||
|
else if ((i0-1)>=iminF && i0<=ex1-2) // 2nd centered
|
||||||
|
f_rhs[p] -= sfx * d2dx * (
|
||||||
|
-fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const double sfy = Sfy[p];
|
||||||
|
if (sfy > ZEO) {
|
||||||
|
if (j0<=ex2-6 && (j0-2)>=jminF)
|
||||||
|
f_rhs[p] += sfy * d840dy*(-F5*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F60*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F140*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F30*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]+F3*fh[idx_fh_F_ord5(iF,jF+5,kF,ex)]);
|
||||||
|
else if (j0<=ex2-5 && (j0-3)>=jminF)
|
||||||
|
f_rhs[p] += sfy * d840dy*(+F3*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]-F32*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F168*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F672*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F672*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F168*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F32*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F3*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]);
|
||||||
|
else if (j0<=ex2-4 && (j0-2)>=jminF)
|
||||||
|
f_rhs[p] += sfy * d60dy*(-fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]);
|
||||||
|
else if (j0<=ex2-3 && (j0-1)>=jminF)
|
||||||
|
f_rhs[p] += sfy * d12dy*(fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]);
|
||||||
|
else if (j0<=ex2-2 && j0>=jminF)
|
||||||
|
f_rhs[p] += sfy * d2dy*(-fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]);
|
||||||
|
} else if (sfy < ZEO) {
|
||||||
|
if ((j0-5)>=jminF && j0<=ex2-2)
|
||||||
|
f_rhs[p] -= sfy * d840dy*(-F5*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]+F60*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]+F140*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]-F30*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]+F3*fh[idx_fh_F_ord5(iF,jF-5,kF,ex)]);
|
||||||
|
else if ((j0-4)>=jminF && j0<=ex2-2)
|
||||||
|
f_rhs[p] -= sfy * d840dy*(+F3*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]-F32*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F168*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F672*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F672*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F168*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F32*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F3*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]);
|
||||||
|
else if ((j0-3)>=jminF && j0<=ex2-2)
|
||||||
|
f_rhs[p] -= sfy * d60dy*(-fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]);
|
||||||
|
else if ((j0-2)>=jminF && j0<=ex2-2)
|
||||||
|
f_rhs[p] -= sfy * d12dy*(fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]);
|
||||||
|
else if ((j0-1)>=jminF && j0<=ex2-2)
|
||||||
|
f_rhs[p] -= sfy * d2dy*(-fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const double sfz = Sfz[p];
|
||||||
|
if (sfz > ZEO) {
|
||||||
|
if (k0<=ex3-6 && (k0-2)>=kminF)
|
||||||
|
f_rhs[p] += sfz * d840dz*(-F5*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F60*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F140*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F30*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]+F3*fh[idx_fh_F_ord5(iF,jF,kF+5,ex)]);
|
||||||
|
else if (k0<=ex3-5 && (k0-3)>=kminF)
|
||||||
|
f_rhs[p] += sfz * d840dz*(+F3*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]-F32*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F168*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F672*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F672*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F168*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F32*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F3*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]);
|
||||||
|
else if (k0<=ex3-4 && (k0-2)>=kminF)
|
||||||
|
f_rhs[p] += sfz * d60dz*(-fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]);
|
||||||
|
else if (k0<=ex3-3 && (k0-1)>=kminF)
|
||||||
|
f_rhs[p] += sfz * d12dz*(fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]);
|
||||||
|
else if (k0<=ex3-2 && k0>=kminF)
|
||||||
|
f_rhs[p] += sfz * d2dz*(-fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]);
|
||||||
|
} else if (sfz < ZEO) {
|
||||||
|
if ((k0-5)>=kminF && k0<=ex3-2)
|
||||||
|
f_rhs[p] -= sfz * d840dz*(-F5*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]+F60*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]+F140*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]-F30*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]+F3*fh[idx_fh_F_ord5(iF,jF,kF-5,ex)]);
|
||||||
|
else if ((k0-4)>=kminF && k0<=ex3-2)
|
||||||
|
f_rhs[p] -= sfz * d840dz*(+F3*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]-F32*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F168*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F672*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F672*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F168*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F32*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F3*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]);
|
||||||
|
else if ((k0-3)>=kminF && k0<=ex3-2)
|
||||||
|
f_rhs[p] -= sfz * d60dz*(-fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]);
|
||||||
|
else if ((k0-2)>=kminF && k0<=ex3-2)
|
||||||
|
f_rhs[p] -= sfz * d12dz*(fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]);
|
||||||
|
else if ((k0-1)>=kminF && k0<=ex3-2)
|
||||||
|
f_rhs[p] -= sfz * d2dz*(-fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#error "lopsided_c.C: unsupported ghost_width (must be 2, 3, 4, or 5)"
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,15 @@
|
|||||||
|
#include "macrodef.h"
|
||||||
#include "tool.h"
|
#include "tool.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Combined advection (lopsided) + KO dissipation (kodis).
|
* C 版 lopsided_kodis — combined upwind advection + KO dissipation.
|
||||||
* Uses one shared symmetry_bd buffer per call.
|
* Uses one shared symmetry_bd buffer (ord = ghost_width for both components).
|
||||||
|
*
|
||||||
|
* FD order selection via ghost_width:
|
||||||
|
* 2 → 2nd-order advection + r=2 KO (cof=16, sign=-)
|
||||||
|
* 3 → 4th-order advection + r=3 KO (cof=64, sign=+)
|
||||||
|
* 4 → 6th-order advection + r=4 KO (cof=256, sign=-)
|
||||||
|
* 5 → 8th-order advection + r=5 KO (cof=1024, sign=+)
|
||||||
*/
|
*/
|
||||||
void lopsided_kodis(const int ex[3],
|
void lopsided_kodis(const int ex[3],
|
||||||
const double *X, const double *Y, const double *Z,
|
const double *X, const double *Y, const double *Z,
|
||||||
@@ -10,239 +17,370 @@ void lopsided_kodis(const int ex[3],
|
|||||||
const double *Sfx, const double *Sfy, const double *Sfz,
|
const double *Sfx, const double *Sfy, const double *Sfz,
|
||||||
int Symmetry, const double SoA[3], double eps)
|
int Symmetry, const double SoA[3], double eps)
|
||||||
{
|
{
|
||||||
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
|
const double ZEO = 0.0, ONE = 1.0;
|
||||||
const double F6 = 6.0, F18 = 18.0;
|
const double TWO = 2.0, F6 = 6.0, EIT = 8.0;
|
||||||
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
|
const double F3 = 3.0, F4 = 4.0, F5 = 5.0, F10 = 10.0, F12 = 12.0, F18 = 18.0;
|
||||||
const double SIX = 6.0, FIT = 15.0, TWT = 20.0;
|
const double F9 = 9.0, F45 = 45.0, F60 = 60.0;
|
||||||
const double cof = 64.0; // 2^6
|
const double F2 = 2.0, F15 = 15.0, F24 = 24.0, F30 = 30.0, F35 = 35.0;
|
||||||
|
const double F50 = 50.0, F77 = 77.0, F80 = 80.0, F100 = 100.0, F150 = 150.0;
|
||||||
|
const double F32 = 32.0, F168 = 168.0, F672 = 672.0, F840 = 840.0;
|
||||||
|
const double F140=140.0, F378=378.0, F420=420.0, F1050=1050.0;
|
||||||
|
|
||||||
const int NO_SYMM = 0, EQ_SYMM = 1;
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||||
|
|
||||||
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||||
|
|
||||||
const double dX = X[1] - X[0];
|
const double dX = X[1] - X[0];
|
||||||
const double dY = Y[1] - Y[0];
|
const double dY = Y[1] - Y[0];
|
||||||
const double dZ = Z[1] - Z[0];
|
const double dZ = Z[1] - Z[0];
|
||||||
|
|
||||||
const double d12dx = ONE / F12 / dX;
|
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
|
||||||
const double d12dy = ONE / F12 / dY;
|
|
||||||
const double d12dz = ONE / F12 / dZ;
|
|
||||||
|
|
||||||
const int imaxF = ex1;
|
#if (ghost_width == 2)
|
||||||
const int jmaxF = ex2;
|
{
|
||||||
const int kmaxF = ex3;
|
const int ord = 2;
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
||||||
|
|
||||||
int iminF = 1, jminF = 1, kminF = 1;
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
|
double *fh = (double*)malloc(nx*ny*nz*sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
|
|
||||||
// fh for Fortran-style domain (-2:ex1,-2:ex2,-2:ex3)
|
const double d2dx = ONE/TWO/dX, d2dy = ONE/TWO/dY, d2dz = ONE/TWO/dZ;
|
||||||
const size_t nx = (size_t)ex1 + 3;
|
|
||||||
const size_t ny = (size_t)ex2 + 3;
|
|
||||||
const size_t nz = (size_t)ex3 + 3;
|
|
||||||
const size_t fh_size = nx * ny * nz;
|
|
||||||
|
|
||||||
double *fh = (double*)malloc(fh_size * sizeof(double));
|
/* ---- advection (2nd-order) ---- */
|
||||||
if (!fh) return;
|
for (int k0 = 0; k0 <= ex3-2; ++k0) {
|
||||||
|
const int kF = k0+1;
|
||||||
|
for (int j0 = 0; j0 <= ex2-2; ++j0) {
|
||||||
|
const int jF = j0+1;
|
||||||
|
for (int i0 = 0; i0 <= ex1-2; ++i0) {
|
||||||
|
const int iF = i0+1;
|
||||||
|
const size_t p = idx_ex(i0,j0,k0,ex);
|
||||||
|
|
||||||
symmetry_bd(3, ex, f, fh, SoA);
|
const double sfx = Sfx[p];
|
||||||
|
if (sfx > ZEO) {
|
||||||
// Advection (same stencil logic as lopsided_c.C)
|
if (i0<=ex1-3) f_rhs[p] += sfx*d2dx*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord2(iF+2,jF,kF,ex)]);
|
||||||
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
else if (i0<=ex1-2) f_rhs[p] += sfx*d2dx*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF+1,jF,kF,ex)]);
|
||||||
const int kF = k0 + 1;
|
} else if (sfx < ZEO) {
|
||||||
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
if ((i0-1)>=iminF) f_rhs[p] -= sfx*d2dx*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF-1,jF,kF,ex)]-fh[idx_fh_F_ord2(iF-2,jF,kF,ex)]);
|
||||||
const int jF = j0 + 1;
|
else if (i0>=iminF) f_rhs[p] -= sfx*d2dx*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF-1,jF,kF,ex)]);
|
||||||
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
|
||||||
const int iF = i0 + 1;
|
|
||||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
||||||
|
|
||||||
const double sfx = Sfx[p];
|
|
||||||
if (sfx > ZEO) {
|
|
||||||
if (i0 <= ex1 - 4) {
|
|
||||||
f_rhs[p] += sfx * d12dx *
|
|
||||||
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
|
||||||
} else if (i0 <= ex1 - 3) {
|
|
||||||
f_rhs[p] += sfx * d12dx *
|
|
||||||
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
|
||||||
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
|
||||||
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
|
||||||
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
|
||||||
} else if (i0 <= ex1 - 2) {
|
|
||||||
f_rhs[p] -= sfx * d12dx *
|
|
||||||
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
|
|
||||||
}
|
}
|
||||||
} else if (sfx < ZEO) {
|
const double sfy = Sfy[p];
|
||||||
if ((i0 - 2) >= iminF) {
|
if (sfy > ZEO) {
|
||||||
f_rhs[p] -= sfx * d12dx *
|
if (j0<=ex2-3) f_rhs[p] += sfy*d2dy*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord2(iF,jF+2,kF,ex)]);
|
||||||
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
else if (j0<=ex2-2) f_rhs[p] += sfy*d2dy*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF,jF+1,kF,ex)]);
|
||||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
} else if (sfy < ZEO) {
|
||||||
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
if ((j0-1)>=jminF) f_rhs[p] -= sfy*d2dy*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF,jF-1,kF,ex)]-fh[idx_fh_F_ord2(iF,jF-2,kF,ex)]);
|
||||||
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
else if (j0>=jminF) f_rhs[p] -= sfy*d2dy*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF,jF-1,kF,ex)]);
|
||||||
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
|
|
||||||
} else if ((i0 - 1) >= iminF) {
|
|
||||||
f_rhs[p] += sfx * d12dx *
|
|
||||||
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
|
||||||
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
|
||||||
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
|
||||||
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
|
||||||
} else if (i0 >= iminF) {
|
|
||||||
f_rhs[p] += sfx * d12dx *
|
|
||||||
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
|
||||||
}
|
}
|
||||||
}
|
const double sfz = Sfz[p];
|
||||||
|
if (sfz > ZEO) {
|
||||||
const double sfy = Sfy[p];
|
if (k0<=ex3-3) f_rhs[p] += sfz*d2dz*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord2(iF,jF,kF+2,ex)]);
|
||||||
if (sfy > ZEO) {
|
else if (k0<=ex3-2) f_rhs[p] += sfz*d2dz*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF,jF,kF+1,ex)]);
|
||||||
if (j0 <= ex2 - 4) {
|
} else if (sfz < ZEO) {
|
||||||
f_rhs[p] += sfy * d12dy *
|
if ((k0-1)>=kminF) f_rhs[p] -= sfz*d2dz*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF,jF,kF-1,ex)]-fh[idx_fh_F_ord2(iF,jF,kF-2,ex)]);
|
||||||
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
else if (k0>=kminF) f_rhs[p] -= sfz*d2dz*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF,jF,kF-1,ex)]);
|
||||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
|
||||||
} else if (j0 <= ex2 - 3) {
|
|
||||||
f_rhs[p] += sfy * d12dy *
|
|
||||||
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
|
||||||
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
|
||||||
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
|
||||||
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
|
||||||
} else if (j0 <= ex2 - 2) {
|
|
||||||
f_rhs[p] -= sfy * d12dy *
|
|
||||||
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
|
|
||||||
}
|
|
||||||
} else if (sfy < ZEO) {
|
|
||||||
if ((j0 - 2) >= jminF) {
|
|
||||||
f_rhs[p] -= sfy * d12dy *
|
|
||||||
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
|
|
||||||
} else if ((j0 - 1) >= jminF) {
|
|
||||||
f_rhs[p] += sfy * d12dy *
|
|
||||||
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
|
||||||
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
|
||||||
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
|
||||||
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
|
||||||
} else if (j0 >= jminF) {
|
|
||||||
f_rhs[p] += sfy * d12dy *
|
|
||||||
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const double sfz = Sfz[p];
|
|
||||||
if (sfz > ZEO) {
|
|
||||||
if (k0 <= ex3 - 4) {
|
|
||||||
f_rhs[p] += sfz * d12dz *
|
|
||||||
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
|
||||||
} else if (k0 <= ex3 - 3) {
|
|
||||||
f_rhs[p] += sfz * d12dz *
|
|
||||||
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
|
||||||
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
|
||||||
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
|
||||||
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
|
||||||
} else if (k0 <= ex3 - 2) {
|
|
||||||
f_rhs[p] -= sfz * d12dz *
|
|
||||||
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
|
|
||||||
}
|
|
||||||
} else if (sfz < ZEO) {
|
|
||||||
if ((k0 - 2) >= kminF) {
|
|
||||||
f_rhs[p] -= sfz * d12dz *
|
|
||||||
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
|
|
||||||
} else if ((k0 - 1) >= kminF) {
|
|
||||||
f_rhs[p] += sfz * d12dz *
|
|
||||||
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
|
||||||
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
|
||||||
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
|
||||||
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
|
||||||
} else if (k0 >= kminF) {
|
|
||||||
f_rhs[p] += sfz * d12dz *
|
|
||||||
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
|
||||||
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
|
||||||
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
|
||||||
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
|
|
||||||
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ---- KO dissipation (r=2, cof=16, sign=-) ---- */
|
||||||
|
if (eps > ZEO) {
|
||||||
|
const double cof = 16.0;
|
||||||
|
const double F4k = 4.0, F6k = 6.0;
|
||||||
|
const int i0_lo = (iminF+1>0)?iminF+1:0, j0_lo=(jminF+1>0)?jminF+1:0, k0_lo=(kminF+1>0)?kminF+1:0;
|
||||||
|
const int i0_hi=imaxF-3, j0_hi=jmaxF-3, k0_hi=kmaxF-3;
|
||||||
|
if (!(i0_lo>i0_hi||j0_lo>j0_hi||k0_lo>k0_hi)) {
|
||||||
|
for (int k0=k0_lo;k0<=k0_hi;++k0) { const int kF=k0+1;
|
||||||
|
for (int j0=j0_lo;j0<=j0_hi;++j0) { const int jF=j0+1;
|
||||||
|
for (int i0=i0_lo;i0<=i0_hi;++i0) { const int iF=i0+1;
|
||||||
|
const size_t p=idx_ex(i0,j0,k0,ex);
|
||||||
|
const double Dx=((fh[idx_fh_F_ord2(iF-2,jF,kF,ex)]+fh[idx_fh_F_ord2(iF+2,jF,kF,ex)])-F4k*(fh[idx_fh_F_ord2(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord2(iF+1,jF,kF,ex)])+F6k*fh[idx_fh_F_ord2(iF,jF,kF,ex)])/dX;
|
||||||
|
const double Dy=((fh[idx_fh_F_ord2(iF,jF-2,kF,ex)]+fh[idx_fh_F_ord2(iF,jF+2,kF,ex)])-F4k*(fh[idx_fh_F_ord2(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord2(iF,jF+1,kF,ex)])+F6k*fh[idx_fh_F_ord2(iF,jF,kF,ex)])/dY;
|
||||||
|
const double Dz=((fh[idx_fh_F_ord2(iF,jF,kF-2,ex)]+fh[idx_fh_F_ord2(iF,jF,kF+2,ex)])-F4k*(fh[idx_fh_F_ord2(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord2(iF,jF,kF+1,ex)])+F6k*fh[idx_fh_F_ord2(iF,jF,kF,ex)])/dZ;
|
||||||
|
f_rhs[p] -= (eps/cof)*(Dx+Dy+Dz);
|
||||||
|
}}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
#elif (ghost_width == 3)
|
||||||
|
/* ---- 4th-order advection + r=3 KO (original code) ----------------- */
|
||||||
|
{
|
||||||
|
const int ord = 3;
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
|
||||||
|
|
||||||
// KO dissipation (same domain restriction as kodiss_c.C)
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
if (eps > ZEO) {
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
|
double *fh = (double*)malloc(nx*ny*nz*sizeof(double));
|
||||||
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
|
if (!fh) return;
|
||||||
const int i0_hi = imaxF - 4; // inclusive
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
const int j0_hi = jmaxF - 4;
|
|
||||||
const int k0_hi = kmaxF - 4;
|
|
||||||
|
|
||||||
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
|
const double d12dx = ONE/F12/dX, d12dy = ONE/F12/dY, d12dz = ONE/F12/dZ;
|
||||||
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
|
|
||||||
const int kF = k0 + 1;
|
|
||||||
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
|
|
||||||
const int jF = j0 + 1;
|
|
||||||
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
|
|
||||||
const int iF = i0 + 1;
|
|
||||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
||||||
|
|
||||||
const double Dx_term =
|
/* ---- advection ---- */
|
||||||
((fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
|
for (int k0 = 0; k0 <= ex3-2; ++k0) {
|
||||||
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
|
const int kF = k0+2;
|
||||||
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
|
for (int j0 = 0; j0 <= ex2-2; ++j0) {
|
||||||
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dX;
|
const int jF = j0+2;
|
||||||
|
for (int i0 = 0; i0 <= ex1-2; ++i0) {
|
||||||
|
const int iF = i0+2;
|
||||||
|
const size_t p = idx_ex(i0,j0,k0,ex);
|
||||||
|
|
||||||
const double Dy_term =
|
const double sfx = Sfx[p];
|
||||||
((fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
|
if (sfx > ZEO) {
|
||||||
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
|
if (i0 <= ex1-4)
|
||||||
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
|
f_rhs[p] += sfx*d12dx*(-F3*fh[idx_fh_F(iF-1,jF,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF+1,jF,kF,ex)]-F6*fh[idx_fh_F(iF+2,jF,kF,ex)]+fh[idx_fh_F(iF+3,jF,kF,ex)]);
|
||||||
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dY;
|
else if (i0 <= ex1-3)
|
||||||
|
f_rhs[p] += sfx*d12dx*(fh[idx_fh_F(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F(iF-1,jF,kF,ex)]+EIT*fh[idx_fh_F(iF+1,jF,kF,ex)]-fh[idx_fh_F(iF+2,jF,kF,ex)]);
|
||||||
const double Dz_term =
|
else if (i0 <= ex1-2)
|
||||||
((fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
|
f_rhs[p] -= sfx*d12dx*(-F3*fh[idx_fh_F(iF+1,jF,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF-1,jF,kF,ex)]-F6*fh[idx_fh_F(iF-2,jF,kF,ex)]+fh[idx_fh_F(iF-3,jF,kF,ex)]);
|
||||||
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
|
} else if (sfx < ZEO) {
|
||||||
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
|
if ((i0-2) >= iminF)
|
||||||
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dZ;
|
f_rhs[p] -= sfx*d12dx*(-F3*fh[idx_fh_F(iF+1,jF,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF-1,jF,kF,ex)]-F6*fh[idx_fh_F(iF-2,jF,kF,ex)]+fh[idx_fh_F(iF-3,jF,kF,ex)]);
|
||||||
|
else if ((i0-1) >= iminF)
|
||||||
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
|
f_rhs[p] += sfx*d12dx*(fh[idx_fh_F(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F(iF-1,jF,kF,ex)]+EIT*fh[idx_fh_F(iF+1,jF,kF,ex)]-fh[idx_fh_F(iF+2,jF,kF,ex)]);
|
||||||
|
else if (i0 >= iminF)
|
||||||
|
f_rhs[p] += sfx*d12dx*(-F3*fh[idx_fh_F(iF-1,jF,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF+1,jF,kF,ex)]-F6*fh[idx_fh_F(iF+2,jF,kF,ex)]+fh[idx_fh_F(iF+3,jF,kF,ex)]);
|
||||||
|
}
|
||||||
|
const double sfy = Sfy[p];
|
||||||
|
if (sfy > ZEO) {
|
||||||
|
if (j0<=ex2-4) f_rhs[p] += sfy*d12dy*(-F3*fh[idx_fh_F(iF,jF-1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF+1,kF,ex)]-F6*fh[idx_fh_F(iF,jF+2,kF,ex)]+fh[idx_fh_F(iF,jF+3,kF,ex)]);
|
||||||
|
else if (j0<=ex2-3) f_rhs[p] += sfy*d12dy*(fh[idx_fh_F(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F(iF,jF+1,kF,ex)]-fh[idx_fh_F(iF,jF+2,kF,ex)]);
|
||||||
|
else if (j0<=ex2-2) f_rhs[p] -= sfy*d12dy*(-F3*fh[idx_fh_F(iF,jF+1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF-1,kF,ex)]-F6*fh[idx_fh_F(iF,jF-2,kF,ex)]+fh[idx_fh_F(iF,jF-3,kF,ex)]);
|
||||||
|
} else if (sfy < ZEO) {
|
||||||
|
if ((j0-2)>=jminF) f_rhs[p] -= sfy*d12dy*(-F3*fh[idx_fh_F(iF,jF+1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF-1,kF,ex)]-F6*fh[idx_fh_F(iF,jF-2,kF,ex)]+fh[idx_fh_F(iF,jF-3,kF,ex)]);
|
||||||
|
else if ((j0-1)>=jminF) f_rhs[p] += sfy*d12dy*(fh[idx_fh_F(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F(iF,jF+1,kF,ex)]-fh[idx_fh_F(iF,jF+2,kF,ex)]);
|
||||||
|
else if (j0>=jminF) f_rhs[p] += sfy*d12dy*(-F3*fh[idx_fh_F(iF,jF-1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF+1,kF,ex)]-F6*fh[idx_fh_F(iF,jF+2,kF,ex)]+fh[idx_fh_F(iF,jF+3,kF,ex)]);
|
||||||
|
}
|
||||||
|
const double sfz = Sfz[p];
|
||||||
|
if (sfz > ZEO) {
|
||||||
|
if (k0<=ex3-4) f_rhs[p] += sfz*d12dz*(-F3*fh[idx_fh_F(iF,jF,kF-1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF,kF+1,ex)]-F6*fh[idx_fh_F(iF,jF,kF+2,ex)]+fh[idx_fh_F(iF,jF,kF+3,ex)]);
|
||||||
|
else if (k0<=ex3-3) f_rhs[p] += sfz*d12dz*(fh[idx_fh_F(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F(iF,jF,kF+1,ex)]-fh[idx_fh_F(iF,jF,kF+2,ex)]);
|
||||||
|
else if (k0<=ex3-2) f_rhs[p] -= sfz*d12dz*(-F3*fh[idx_fh_F(iF,jF,kF+1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF,kF-1,ex)]-F6*fh[idx_fh_F(iF,jF,kF-2,ex)]+fh[idx_fh_F(iF,jF,kF-3,ex)]);
|
||||||
|
} else if (sfz < ZEO) {
|
||||||
|
if ((k0-2)>=kminF) f_rhs[p] -= sfz*d12dz*(-F3*fh[idx_fh_F(iF,jF,kF+1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF,kF-1,ex)]-F6*fh[idx_fh_F(iF,jF,kF-2,ex)]+fh[idx_fh_F(iF,jF,kF-3,ex)]);
|
||||||
|
else if ((k0-1)>=kminF) f_rhs[p] += sfz*d12dz*(fh[idx_fh_F(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F(iF,jF,kF+1,ex)]-fh[idx_fh_F(iF,jF,kF+2,ex)]);
|
||||||
|
else if (k0>=kminF) f_rhs[p] += sfz*d12dz*(-F3*fh[idx_fh_F(iF,jF,kF-1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF,kF+1,ex)]-F6*fh[idx_fh_F(iF,jF,kF+2,ex)]+fh[idx_fh_F(iF,jF,kF+3,ex)]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
free(fh);
|
/* ---- KO dissipation (r=3, cof=64, sign=+) ---- */
|
||||||
|
if (eps > ZEO) {
|
||||||
|
const double cof = 64.0;
|
||||||
|
const double SIX = 6.0, FIT = 15.0, TWT = 20.0;
|
||||||
|
const int i0_lo=(iminF+2>0)?iminF+2:0, j0_lo=(jminF+2>0)?jminF+2:0, k0_lo=(kminF+2>0)?kminF+2:0;
|
||||||
|
const int i0_hi=imaxF-4, j0_hi=jmaxF-4, k0_hi=kmaxF-4;
|
||||||
|
if (!(i0_lo>i0_hi||j0_lo>j0_hi||k0_lo>k0_hi)) {
|
||||||
|
for (int k0=k0_lo;k0<=k0_hi;++k0) { const int kF=k0+2;
|
||||||
|
for (int j0=j0_lo;j0<=j0_hi;++j0) { const int jF=j0+2;
|
||||||
|
for (int i0=i0_lo;i0<=i0_hi;++i0) { const int iF=i0+2;
|
||||||
|
const size_t p=idx_ex(i0,j0,k0,ex);
|
||||||
|
const double Dx=((fh[idx_fh_F(iF-3,jF,kF,ex)]+fh[idx_fh_F(iF+3,jF,kF,ex)])-SIX*(fh[idx_fh_F(iF-2,jF,kF,ex)]+fh[idx_fh_F(iF+2,jF,kF,ex)])+FIT*(fh[idx_fh_F(iF-1,jF,kF,ex)]+fh[idx_fh_F(iF+1,jF,kF,ex)])-TWT*fh[idx_fh_F(iF,jF,kF,ex)])/dX;
|
||||||
|
const double Dy=((fh[idx_fh_F(iF,jF-3,kF,ex)]+fh[idx_fh_F(iF,jF+3,kF,ex)])-SIX*(fh[idx_fh_F(iF,jF-2,kF,ex)]+fh[idx_fh_F(iF,jF+2,kF,ex)])+FIT*(fh[idx_fh_F(iF,jF-1,kF,ex)]+fh[idx_fh_F(iF,jF+1,kF,ex)])-TWT*fh[idx_fh_F(iF,jF,kF,ex)])/dY;
|
||||||
|
const double Dz=((fh[idx_fh_F(iF,jF,kF-3,ex)]+fh[idx_fh_F(iF,jF,kF+3,ex)])-SIX*(fh[idx_fh_F(iF,jF,kF-2,ex)]+fh[idx_fh_F(iF,jF,kF+2,ex)])+FIT*(fh[idx_fh_F(iF,jF,kF-1,ex)]+fh[idx_fh_F(iF,jF,kF+1,ex)])-TWT*fh[idx_fh_F(iF,jF,kF,ex)])/dZ;
|
||||||
|
f_rhs[p] += (eps/cof)*(Dx+Dy+Dz);
|
||||||
|
}}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#elif (ghost_width == 4)
|
||||||
|
{
|
||||||
|
const int ord = 4;
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -3;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -3;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -3;
|
||||||
|
|
||||||
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
|
double *fh = (double*)malloc(nx*ny*nz*sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
const double d60dx=ONE/F60/dX, d60dy=ONE/F60/dY, d60dz=ONE/F60/dZ;
|
||||||
|
const double d12dx=ONE/F12/dX, d12dy=ONE/F12/dY, d12dz=ONE/F12/dZ;
|
||||||
|
const double d2dx=ONE/TWO/dX, d2dy=ONE/TWO/dY, d2dz=ONE/TWO/dZ;
|
||||||
|
|
||||||
|
/* ---- advection (6th-order lopsided) ---- */
|
||||||
|
for (int k0=0;k0<=ex3-2;++k0) { const int kF=k0+3;
|
||||||
|
for (int j0=0;j0<=ex2-2;++j0) { const int jF=j0+3;
|
||||||
|
for (int i0=0;i0<=ex1-2;++i0) { const int iF=i0+3;
|
||||||
|
const size_t p=idx_ex(i0,j0,k0,ex);
|
||||||
|
/* x */
|
||||||
|
const double sfx=Sfx[p];
|
||||||
|
if (sfx>ZEO) {
|
||||||
|
if (i0<=ex1-5&&(i0-1)>=iminF) f_rhs[p]+=sfx*d60dx*(+F2*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-F24*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F30*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+4,jF,kF,ex)]);
|
||||||
|
else if (i0<=ex1-6&&i0>=iminF) f_rhs[p]+=sfx*d60dx*(-F10*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F100*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+F50*fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]-F15*fh[idx_fh_F_ord4(iF+4,jF,kF,ex)]+F2*fh[idx_fh_F_ord4(iF+5,jF,kF,ex)]);
|
||||||
|
else if (i0<=ex1-4&&(i0-2)>=iminF) f_rhs[p]+=sfx*d60dx*(-fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-F45*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F9*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]);
|
||||||
|
else if (i0<=ex1-3&&(i0-1)>=iminF) f_rhs[p]+=sfx*d12dx*(fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]);
|
||||||
|
else if (i0<=ex1-2&&i0>=iminF) f_rhs[p]+=sfx*d2dx*(-fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
|
||||||
|
} else if (sfx<ZEO) {
|
||||||
|
if ((i0-4)>=iminF&&i0<=ex1-2) f_rhs[p]-=sfx*d60dx*(+F2*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]-F24*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F30*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]-fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]);
|
||||||
|
else if ((i0-5)>=iminF&&i0<=ex1-2) f_rhs[p]-=sfx*d60dx*(-F10*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F100*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]+F50*fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]-F15*fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]+F2*fh[idx_fh_F_ord4(iF-5,jF,kF,ex)]);
|
||||||
|
else if ((i0-3)>=iminF&&i0<=ex1-2) f_rhs[p]-=sfx*d60dx*(-fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-F45*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F9*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]);
|
||||||
|
else if ((i0-2)>=iminF&&i0<=ex1-2) f_rhs[p]-=sfx*d12dx*(fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]);
|
||||||
|
else if ((i0-1)>=iminF&&i0<=ex1-2) f_rhs[p]-=sfx*d2dx*(-fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
|
||||||
|
}
|
||||||
|
/* y */
|
||||||
|
const double sfy=Sfy[p];
|
||||||
|
if (sfy>ZEO) {
|
||||||
|
if (j0<=ex2-5&&(j0-1)>=jminF) f_rhs[p]+=sfy*d60dy*(F2*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F24*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F30*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+4,kF,ex)]);
|
||||||
|
else if (j0<=ex2-6&&j0>=jminF) f_rhs[p]+=sfy*d60dy*(-F10*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F100*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+F50*fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]-F15*fh[idx_fh_F_ord4(iF,jF+4,kF,ex)]+F2*fh[idx_fh_F_ord4(iF,jF+5,kF,ex)]);
|
||||||
|
else if (j0<=ex2-4&&(j0-2)>=jminF) f_rhs[p]+=sfy*d60dy*(-fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]);
|
||||||
|
else if (j0<=ex2-3&&(j0-1)>=jminF) f_rhs[p]+=sfy*d12dy*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]);
|
||||||
|
else if (j0<=ex2-2&&j0>=jminF) f_rhs[p]+=sfy*d2dy*(-fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
|
||||||
|
} else if (sfy<ZEO) {
|
||||||
|
if ((j0-4)>=jminF&&j0<=ex2-2) f_rhs[p]-=sfy*d60dy*(F2*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]-F24*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F30*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]-fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]);
|
||||||
|
else if ((j0-5)>=jminF&&j0<=ex2-2) f_rhs[p]-=sfy*d60dy*(-F10*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F100*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+F50*fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]-F15*fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]+F2*fh[idx_fh_F_ord4(iF,jF-5,kF,ex)]);
|
||||||
|
else if ((j0-3)>=jminF&&j0<=ex2-2) f_rhs[p]-=sfy*d60dy*(-fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]);
|
||||||
|
else if ((j0-2)>=jminF&&j0<=ex2-2) f_rhs[p]-=sfy*d12dy*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]);
|
||||||
|
else if ((j0-1)>=jminF&&j0<=ex2-2) f_rhs[p]-=sfy*d2dy*(-fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
|
||||||
|
}
|
||||||
|
/* z */
|
||||||
|
const double sfz=Sfz[p];
|
||||||
|
if (sfz>ZEO) {
|
||||||
|
if (k0<=ex3-5&&(k0-1)>=kminF) f_rhs[p]+=sfz*d60dz*(F2*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F24*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F30*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+4,ex)]);
|
||||||
|
else if (k0<=ex3-6&&k0>=kminF) f_rhs[p]+=sfz*d60dz*(-F10*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F100*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+F50*fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]-F15*fh[idx_fh_F_ord4(iF,jF,kF+4,ex)]+F2*fh[idx_fh_F_ord4(iF,jF,kF+5,ex)]);
|
||||||
|
else if (k0<=ex3-4&&(k0-2)>=kminF) f_rhs[p]+=sfz*d60dz*(-fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]);
|
||||||
|
else if (k0<=ex3-3&&(k0-1)>=kminF) f_rhs[p]+=sfz*d12dz*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]);
|
||||||
|
else if (k0<=ex3-2&&k0>=kminF) f_rhs[p]+=sfz*d2dz*(-fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
|
||||||
|
} else if (sfz<ZEO) {
|
||||||
|
if ((k0-4)>=kminF&&k0<=ex3-2) f_rhs[p]-=sfz*d60dz*(F2*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]-F24*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F30*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]-fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]);
|
||||||
|
else if ((k0-5)>=kminF&&k0<=ex3-2) f_rhs[p]-=sfz*d60dz*(-F10*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F100*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+F50*fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]-F15*fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]+F2*fh[idx_fh_F_ord4(iF,jF,kF-5,ex)]);
|
||||||
|
else if ((k0-3)>=kminF&&k0<=ex3-2) f_rhs[p]-=sfz*d60dz*(-fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]);
|
||||||
|
else if ((k0-2)>=kminF&&k0<=ex3-2) f_rhs[p]-=sfz*d12dz*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]);
|
||||||
|
else if ((k0-1)>=kminF&&k0<=ex3-2) f_rhs[p]-=sfz*d2dz*(-fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
|
||||||
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
|
/* ---- KO dissipation (r=4, cof=256, sign=-) ---- */
|
||||||
|
if (eps > ZEO) {
|
||||||
|
const double cof = 256.0;
|
||||||
|
const double F8k = 8.0, F28 = 28.0, F56 = 56.0, F70 = 70.0;
|
||||||
|
const int i0_lo=(iminF+3>0)?iminF+3:0, j0_lo=(jminF+3>0)?jminF+3:0, k0_lo=(kminF+3>0)?kminF+3:0;
|
||||||
|
const int i0_hi=imaxF-5, j0_hi=jmaxF-5, k0_hi=kmaxF-5;
|
||||||
|
if (!(i0_lo>i0_hi||j0_lo>j0_hi||k0_lo>k0_hi)) {
|
||||||
|
for (int k0=k0_lo;k0<=k0_hi;++k0) { const int kF=k0+3;
|
||||||
|
for (int j0=j0_lo;j0<=j0_hi;++j0) { const int jF=j0+3;
|
||||||
|
for (int i0=i0_lo;i0<=i0_hi;++i0) { const int iF=i0+3;
|
||||||
|
const size_t p=idx_ex(i0,j0,k0,ex);
|
||||||
|
const double Dx=((fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+4,jF,kF,ex)])-F8k*(fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)])+F28*(fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+2,jF,kF,ex)])-F56*(fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)])+F70*fh[idx_fh_F_ord4(iF,jF,kF,ex)])/dX;
|
||||||
|
const double Dy=((fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+4,kF,ex)])-F8k*(fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)])+F28*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+2,kF,ex)])-F56*(fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)])+F70*fh[idx_fh_F_ord4(iF,jF,kF,ex)])/dY;
|
||||||
|
const double Dz=((fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+4,ex)])-F8k*(fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)])+F28*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+2,ex)])-F56*(fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)])+F70*fh[idx_fh_F_ord4(iF,jF,kF,ex)])/dZ;
|
||||||
|
f_rhs[p] -= (eps/cof)*(Dx+Dy+Dz);
|
||||||
|
}}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#elif (ghost_width == 5)
|
||||||
|
{
|
||||||
|
const int ord = 5;
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -4;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -4;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -4;
|
||||||
|
|
||||||
|
const size_t nx = (size_t)ex1 + ord;
|
||||||
|
const size_t ny = (size_t)ex2 + ord;
|
||||||
|
const size_t nz = (size_t)ex3 + ord;
|
||||||
|
double *fh = (double*)malloc(nx*ny*nz*sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
symmetry_bd(ord, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
const double d840dx=ONE/F840/dX, d840dy=ONE/F840/dY, d840dz=ONE/F840/dZ;
|
||||||
|
const double d60dx=ONE/F60/dX, d60dy=ONE/F60/dY, d60dz=ONE/F60/dZ;
|
||||||
|
const double d12dx=ONE/F12/dX, d12dy=ONE/F12/dY, d12dz=ONE/F12/dZ;
|
||||||
|
const double d2dx=ONE/TWO/dX, d2dy=ONE/TWO/dY, d2dz=ONE/TWO/dZ;
|
||||||
|
|
||||||
|
/* ---- advection (8th-order lopsided) ---- */
|
||||||
|
for (int k0=0;k0<=ex3-2;++k0) { const int kF=k0+4;
|
||||||
|
for (int j0=0;j0<=ex2-2;++j0) { const int jF=j0+4;
|
||||||
|
for (int i0=0;i0<=ex1-2;++i0) { const int iF=i0+4;
|
||||||
|
const size_t p=idx_ex(i0,j0,k0,ex);
|
||||||
|
const double sfx=Sfx[p];
|
||||||
|
if (sfx>ZEO) {
|
||||||
|
if (i0<=ex1-6&&(i0-2)>=iminF) f_rhs[p]+=sfx*d840dx*(-F5*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F60*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-F420*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F420*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]+F140*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F30*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]+F3*fh[idx_fh_F_ord5(iF+5,jF,kF,ex)]);
|
||||||
|
else if (i0<=ex1-5&&(i0-3)>=iminF) f_rhs[p]+=sfx*d840dx*(+F3*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]-F32*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F168*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-F672*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+F672*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F168*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]+F32*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F3*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]);
|
||||||
|
else if (i0<=ex1-4&&(i0-2)>=iminF) f_rhs[p]+=sfx*d60dx*(-fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-F45*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F9*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]);
|
||||||
|
else if (i0<=ex1-3&&(i0-1)>=iminF) f_rhs[p]+=sfx*d12dx*(fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+EIT*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]);
|
||||||
|
else if (i0<=ex1-2&&i0>=iminF) f_rhs[p]+=sfx*d2dx*(-fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]);
|
||||||
|
} else if (sfx<ZEO) {
|
||||||
|
if ((i0-5)>=iminF&&i0<=ex1-2) f_rhs[p]-=sfx*d840dx*(-F5*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]+F60*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]-F420*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]-F420*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]+F140*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]-F30*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]+F3*fh[idx_fh_F_ord5(iF-5,jF,kF,ex)]);
|
||||||
|
else if ((i0-4)>=iminF&&i0<=ex1-2) f_rhs[p]-=sfx*d840dx*(+F3*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]-F32*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F168*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-F672*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+F672*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F168*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]+F32*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F3*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]);
|
||||||
|
else if ((i0-3)>=iminF&&i0<=ex1-2) f_rhs[p]-=sfx*d60dx*(-fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-F45*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F9*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]);
|
||||||
|
else if ((i0-2)>=iminF&&i0<=ex1-2) f_rhs[p]-=sfx*d12dx*(fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+EIT*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]);
|
||||||
|
else if ((i0-1)>=iminF&&i0<=ex1-2) f_rhs[p]-=sfx*d2dx*(-fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]);
|
||||||
|
}
|
||||||
|
const double sfy=Sfy[p];
|
||||||
|
if (sfy>ZEO) {
|
||||||
|
if (j0<=ex2-6&&(j0-2)>=jminF) f_rhs[p]+=sfy*d840dy*(-F5*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F60*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F140*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F30*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]+F3*fh[idx_fh_F_ord5(iF,jF+5,kF,ex)]);
|
||||||
|
else if (j0<=ex2-5&&(j0-3)>=jminF) f_rhs[p]+=sfy*d840dy*(+F3*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]-F32*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F168*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F672*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F672*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F168*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F32*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F3*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]);
|
||||||
|
else if (j0<=ex2-4&&(j0-2)>=jminF) f_rhs[p]+=sfy*d60dy*(-fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]);
|
||||||
|
else if (j0<=ex2-3&&(j0-1)>=jminF) f_rhs[p]+=sfy*d12dy*(fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]);
|
||||||
|
else if (j0<=ex2-2&&j0>=jminF) f_rhs[p]+=sfy*d2dy*(-fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]);
|
||||||
|
} else if (sfy<ZEO) {
|
||||||
|
if ((j0-5)>=jminF&&j0<=ex2-2) f_rhs[p]-=sfy*d840dy*(-F5*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]+F60*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]+F140*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]-F30*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]+F3*fh[idx_fh_F_ord5(iF,jF-5,kF,ex)]);
|
||||||
|
else if ((j0-4)>=jminF&&j0<=ex2-2) f_rhs[p]-=sfy*d840dy*(+F3*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]-F32*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F168*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F672*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F672*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F168*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F32*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F3*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]);
|
||||||
|
else if ((j0-3)>=jminF&&j0<=ex2-2) f_rhs[p]-=sfy*d60dy*(-fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]);
|
||||||
|
else if ((j0-2)>=jminF&&j0<=ex2-2) f_rhs[p]-=sfy*d12dy*(fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]);
|
||||||
|
else if ((j0-1)>=jminF&&j0<=ex2-2) f_rhs[p]-=sfy*d2dy*(-fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]);
|
||||||
|
}
|
||||||
|
const double sfz=Sfz[p];
|
||||||
|
if (sfz>ZEO) {
|
||||||
|
if (k0<=ex3-6&&(k0-2)>=kminF) f_rhs[p]+=sfz*d840dz*(-F5*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F60*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F140*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F30*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]+F3*fh[idx_fh_F_ord5(iF,jF,kF+5,ex)]);
|
||||||
|
else if (k0<=ex3-5&&(k0-3)>=kminF) f_rhs[p]+=sfz*d840dz*(+F3*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]-F32*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F168*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F672*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F672*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F168*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F32*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F3*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]);
|
||||||
|
else if (k0<=ex3-4&&(k0-2)>=kminF) f_rhs[p]+=sfz*d60dz*(-fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]);
|
||||||
|
else if (k0<=ex3-3&&(k0-1)>=kminF) f_rhs[p]+=sfz*d12dz*(fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]);
|
||||||
|
else if (k0<=ex3-2&&k0>=kminF) f_rhs[p]+=sfz*d2dz*(-fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]);
|
||||||
|
} else if (sfz<ZEO) {
|
||||||
|
if ((k0-5)>=kminF&&k0<=ex3-2) f_rhs[p]-=sfz*d840dz*(-F5*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]+F60*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]+F140*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]-F30*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]+F3*fh[idx_fh_F_ord5(iF,jF,kF-5,ex)]);
|
||||||
|
else if ((k0-4)>=kminF&&k0<=ex3-2) f_rhs[p]-=sfz*d840dz*(+F3*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]-F32*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F168*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F672*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F672*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F168*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F32*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F3*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]);
|
||||||
|
else if ((k0-3)>=kminF&&k0<=ex3-2) f_rhs[p]-=sfz*d60dz*(-fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]);
|
||||||
|
else if ((k0-2)>=kminF&&k0<=ex3-2) f_rhs[p]-=sfz*d12dz*(fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]);
|
||||||
|
else if ((k0-1)>=kminF&&k0<=ex3-2) f_rhs[p]-=sfz*d2dz*(-fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]);
|
||||||
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
|
/* ---- KO dissipation (r=5, cof=1024, sign=+) ---- */
|
||||||
|
if (eps > ZEO) {
|
||||||
|
const double cof = 1024.0;
|
||||||
|
const double F10k=10.0, F45k=45.0, F120=120.0, F210=210.0, F252=252.0;
|
||||||
|
const int i0_lo=(iminF+4>0)?iminF+4:0, j0_lo=(jminF+4>0)?jminF+4:0, k0_lo=(kminF+4>0)?kminF+4:0;
|
||||||
|
const int i0_hi=imaxF-6, j0_hi=jmaxF-6, k0_hi=kmaxF-6;
|
||||||
|
if (!(i0_lo>i0_hi||j0_lo>j0_hi||k0_lo>k0_hi)) {
|
||||||
|
for (int k0=k0_lo;k0<=k0_hi;++k0) { const int kF=k0+4;
|
||||||
|
for (int j0=j0_lo;j0<=j0_hi;++j0) { const int jF=j0+4;
|
||||||
|
for (int i0=i0_lo;i0<=i0_hi;++i0) { const int iF=i0+4;
|
||||||
|
const size_t p=idx_ex(i0,j0,k0,ex);
|
||||||
|
const double Dx=((fh[idx_fh_F_ord5(iF-5,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+5,jF,kF,ex)])-F10k*(fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+4,jF,kF,ex)])+F45k*(fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+3,jF,kF,ex)])-F120*(fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+2,jF,kF,ex)])+F210*(fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+1,jF,kF,ex)])-F252*fh[idx_fh_F_ord5(iF,jF,kF,ex)])/dX;
|
||||||
|
const double Dy=((fh[idx_fh_F_ord5(iF,jF-5,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+5,kF,ex)])-F10k*(fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+4,kF,ex)])+F45k*(fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+3,kF,ex)])-F120*(fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+2,kF,ex)])+F210*(fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+1,kF,ex)])-F252*fh[idx_fh_F_ord5(iF,jF,kF,ex)])/dY;
|
||||||
|
const double Dz=((fh[idx_fh_F_ord5(iF,jF,kF-5,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+5,ex)])-F10k*(fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+4,ex)])+F45k*(fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+3,ex)])-F120*(fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+2,ex)])+F210*(fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+1,ex)])-F252*fh[idx_fh_F_ord5(iF,jF,kF,ex)])/dZ;
|
||||||
|
f_rhs[p] += (eps/cof)*(Dx+Dy+Dz);
|
||||||
|
}}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#error "lopsided_kodis_c.C: unsupported ghost_width (must be 2, 3, 4, or 5)"
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,6 +46,45 @@ static inline size_t idx_fh_F(int iF, int jF, int kF, const int ex[3]) {
|
|||||||
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fh 对应 Fortran: fh(0:ex1, 0:ex2, 0:ex3)
|
||||||
|
* ord=1 => shift=0
|
||||||
|
* iF/jF/kF 为 Fortran 索引 (0..ex)
|
||||||
|
*/
|
||||||
|
static inline size_t idx_fh_F_ord1(int iF, int jF, int kF, const int ex[3]) {
|
||||||
|
const int nx = ex[0] + 1; // ex1 + ord
|
||||||
|
const int ny = ex[1] + 1;
|
||||||
|
return (size_t)iF + (size_t)jF * (size_t)nx + (size_t)kF * (size_t)nx * (size_t)ny;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fh 对应 Fortran: fh(-3:ex1, -3:ex2, -3:ex3)
|
||||||
|
* ord=4 => shift=3
|
||||||
|
*/
|
||||||
|
static inline size_t idx_fh_F_ord4(int iF, int jF, int kF, const int ex[3]) {
|
||||||
|
const int shift = 3;
|
||||||
|
const int nx = ex[0] + 4; // ex1 + ord
|
||||||
|
const int ny = ex[1] + 4;
|
||||||
|
const int ii = iF + shift; // 0..ex1+3
|
||||||
|
const int jj = jF + shift; // 0..ex2+3
|
||||||
|
const int kk = kF + shift; // 0..ex3+3
|
||||||
|
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fh 对应 Fortran: fh(-4:ex1, -4:ex2, -4:ex3)
|
||||||
|
* ord=5 => shift=4
|
||||||
|
*/
|
||||||
|
static inline size_t idx_fh_F_ord5(int iF, int jF, int kF, const int ex[3]) {
|
||||||
|
const int shift = 4;
|
||||||
|
const int nx = ex[0] + 5; // ex1 + ord
|
||||||
|
const int ny = ex[1] + 5;
|
||||||
|
const int ii = iF + shift; // 0..ex1+4
|
||||||
|
const int jj = jF + shift; // 0..ex2+4
|
||||||
|
const int kk = kF + shift; // 0..ex3+4
|
||||||
|
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* func: (1..extc1, 1..extc2, 1..extc3) 1-based in Fortran
|
* func: (1..extc1, 1..extc2, 1..extc3) 1-based in Fortran
|
||||||
* funcc: (-ord+1..extc1, -ord+1..extc2, -ord+1..extc3) in Fortran
|
* funcc: (-ord+1..extc1, -ord+1..extc2, -ord+1..extc3) in Fortran
|
||||||
@@ -231,7 +270,10 @@ static inline void symmetry_bd(int ord,
|
|||||||
{
|
{
|
||||||
if (ord <= 0) return;
|
if (ord <= 0) return;
|
||||||
|
|
||||||
/* Fast paths used by current C kernels: ord=2 (derivs), ord=3 (lopsided/KO). */
|
if (ord == 1) {
|
||||||
|
symmetry_bd_impl(1, 0, extc, func, funcc, SoA);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (ord == 2) {
|
if (ord == 2) {
|
||||||
symmetry_bd_impl(2, 1, extc, func, funcc, SoA);
|
symmetry_bd_impl(2, 1, extc, func, funcc, SoA);
|
||||||
return;
|
return;
|
||||||
@@ -240,6 +282,10 @@ static inline void symmetry_bd(int ord,
|
|||||||
symmetry_bd_impl(3, 2, extc, func, funcc, SoA);
|
symmetry_bd_impl(3, 2, extc, func, funcc, SoA);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (ord == 4) {
|
||||||
|
symmetry_bd_impl(4, 3, extc, func, funcc, SoA);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
symmetry_bd_impl(ord, ord - 1, extc, func, funcc, SoA);
|
symmetry_bd_impl(ord, ord - 1, extc, func, funcc, SoA);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user