Shell C functions must export Fortran-compatible symbols with trailing underscore so bssn_rhs_ss.f90 and getnp4.f90 can link when WithShell is active and USE_CXX_SHELL_KERNELS=1 replaces Fortran diff_new_sh.o. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
322 lines
23 KiB
C
322 lines
23 KiB
C
#include "macrodef.h"
|
|
#include "share_func.h"
|
|
|
|
/*
|
|
* fdderivs_sh — second derivatives on shell patch in (rho, sigma, R) coords.
|
|
* Same stencil coefficients as Cartesian fdderivs. Uses symmetry_stbd.
|
|
*/
|
|
extern "C" void fdderivs_sh_(const int ex[3],
|
|
const double *f,
|
|
double *fxx, double *fxy, double *fxz,
|
|
double *fyy, double *fyz, double *fzz,
|
|
const double *X, const double *Y, const double *Z,
|
|
double SYM1, double SYM2, double SYM3,
|
|
int Symmetry, int onoff, int sst)
|
|
{
|
|
(void)SYM3; (void)onoff; (void)sst;
|
|
|
|
const int NO_SYMM=0, EQ_SYMM=1, OCTANT=2;
|
|
const double ZEO=0.0, ONE=1.0, TWO=2.0, F1o4=2.5e-1;
|
|
const double F8=8.0, F16=16.0, F30=30.0, F1o12=ONE/12.0, F1o144=ONE/144.0;
|
|
const double F9=9.0, F45=45.0, F60=60.0, F27=27.0, F270=270.0, F490=490.0;
|
|
const double F1o180=ONE/180.0, F1o3600=ONE/3600.0;
|
|
const double F32=32.0, F128=128.0, F168=168.0, F672=672.0, F840=840.0;
|
|
const double F1008=1008.0, F8064=8064.0, F14350=14350.0;
|
|
const double F1o5040=ONE/5040.0, F1o705600=ONE/705600.0;
|
|
|
|
const int ex1=ex[0], ex2=ex[1], ex3=ex[2];
|
|
const double dX=X[1]-X[0], dY=Y[1]-Y[0], dZ=Z[1]-Z[0];
|
|
const int imaxF=ex1, jmaxF=ex2, kmaxF=ex3;
|
|
const double SoA[2]={SYM1,SYM2};
|
|
|
|
#if (ghost_width == 2)
|
|
{
|
|
const int ord=1;
|
|
int iminF=1,jminF=1,kminF=1;
|
|
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=0;
|
|
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=0;
|
|
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=0;
|
|
|
|
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
|
|
static double *fh_buf=NULL;static size_t cap=0;
|
|
if(fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;}
|
|
double *fh=fh_buf;if(!fh)return;
|
|
symmetry_stbd(ord,ex,f,fh,SoA);
|
|
|
|
const double Sdxdx=ONE/(dX*dX),Sdydy=ONE/(dY*dY),Sdzdz=ONE/(dZ*dZ);
|
|
const double Sdxdy=F1o4/(dX*dY),Sdxdz=F1o4/(dX*dZ),Sdydz=F1o4/(dY*dZ);
|
|
const size_t all=(size_t)ex1*ex2*ex3;
|
|
for(size_t p=0;p<all;++p){fxx[p]=fyy[p]=fzz[p]=ZEO;fxy[p]=fxz[p]=fyz[p]=ZEO;}
|
|
|
|
const int i2_lo=(iminF>0)?iminF:0,j2_lo=(jminF>0)?jminF:0,k2_lo=0,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2;
|
|
#define FH(iF,jF,kF) fh[idx_fh_stbd(iF,jF,kF,ord,ex)]
|
|
if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){
|
|
for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1;
|
|
for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1;
|
|
for(int i0=i2_lo;i0<=i2_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
|
|
fxx[p]=Sdxdx*(FH(iF-1,jF,kF)-TWO*FH(iF,jF,kF)+FH(iF+1,jF,kF));
|
|
fyy[p]=Sdydy*(FH(iF,jF-1,kF)-TWO*FH(iF,jF,kF)+FH(iF,jF+1,kF));
|
|
fzz[p]=Sdzdz*(FH(iF,jF,kF-1)-TWO*FH(iF,jF,kF)+FH(iF,jF,kF+1));
|
|
fxy[p]=Sdxdy*(FH(iF-1,jF-1,kF)-FH(iF+1,jF-1,kF)-FH(iF-1,jF+1,kF)+FH(iF+1,jF+1,kF));
|
|
fxz[p]=Sdxdz*(FH(iF-1,jF,kF-1)-FH(iF+1,jF,kF-1)-FH(iF-1,jF,kF+1)+FH(iF+1,jF,kF+1));
|
|
fyz[p]=Sdydz*(FH(iF,jF-1,kF-1)-FH(iF,jF+1,kF-1)-FH(iF,jF-1,kF+1)+FH(iF,jF+1,kF+1));
|
|
}}}
|
|
}
|
|
#undef FH
|
|
return;
|
|
}
|
|
#elif (ghost_width == 3)
|
|
{
|
|
const int ord=2;
|
|
int iminF=1,jminF=1,kminF=1;
|
|
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-1;
|
|
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-1;
|
|
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-1;
|
|
|
|
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
|
|
static double *fh_buf=NULL;static size_t cap=0;
|
|
if(fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;}
|
|
double *fh=fh_buf;if(!fh)return;
|
|
symmetry_stbd(ord,ex,f,fh,SoA);
|
|
|
|
const double Sdxdx=ONE/(dX*dX),Sdydy=ONE/(dY*dY),Sdzdz=ONE/(dZ*dZ);
|
|
const double Fdxdx=F1o12/(dX*dX),Fdydy=F1o12/(dY*dY),Fdzdz=F1o12/(dZ*dZ);
|
|
const double Sdxdy=F1o4/(dX*dY),Sdxdz=F1o4/(dX*dZ),Sdydz=F1o4/(dY*dZ);
|
|
const double Fdxdy=F1o144/(dX*dY),Fdxdz=F1o144/(dX*dZ),Fdydz=F1o144/(dY*dZ);
|
|
const size_t all=(size_t)ex1*ex2*ex3;
|
|
for(size_t p=0;p<all;++p){fxx[p]=fyy[p]=fzz[p]=fxy[p]=fxz[p]=fyz[p]=ZEO;}
|
|
|
|
const int i2_lo=(iminF>0)?iminF:0,j2_lo=(jminF>0)?jminF:0,k2_lo=0,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2;
|
|
const int i4_lo=(iminF+1>0)?iminF+1:0,j4_lo=(jminF+1>0)?jminF+1:0,k4_lo=0,i4_hi=ex1-3,j4_hi=ex2-3,k4_hi=ex3-3;
|
|
const int has4=(i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi);
|
|
#define FH(iF,jF,kF) fh[idx_fh_stbd(iF,jF,kF,ord,ex)]
|
|
|
|
if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){
|
|
for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1;
|
|
for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1;
|
|
for(int i0=i2_lo;i0<=i2_hi;++i0){
|
|
if(has4&&i0>=i4_lo&&i0<=i4_hi&&j0>=j4_lo&&j0<=j4_hi&&k0>=k4_lo&&k0<=k4_hi)continue;
|
|
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
|
|
fxx[p]=Sdxdx*(FH(iF-1,jF,kF)-TWO*FH(iF,jF,kF)+FH(iF+1,jF,kF));
|
|
fyy[p]=Sdydy*(FH(iF,jF-1,kF)-TWO*FH(iF,jF,kF)+FH(iF,jF+1,kF));
|
|
fzz[p]=Sdzdz*(FH(iF,jF,kF-1)-TWO*FH(iF,jF,kF)+FH(iF,jF,kF+1));
|
|
fxy[p]=Sdxdy*(FH(iF-1,jF-1,kF)-FH(iF+1,jF-1,kF)-FH(iF-1,jF+1,kF)+FH(iF+1,jF+1,kF));
|
|
fxz[p]=Sdxdz*(FH(iF-1,jF,kF-1)-FH(iF+1,jF,kF-1)-FH(iF-1,jF,kF+1)+FH(iF+1,jF,kF+1));
|
|
fyz[p]=Sdydz*(FH(iF,jF-1,kF-1)-FH(iF,jF+1,kF-1)-FH(iF,jF-1,kF+1)+FH(iF,jF+1,kF+1));
|
|
}}}
|
|
}
|
|
if(has4){
|
|
for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1;
|
|
for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1;
|
|
for(int i0=i4_lo;i0<=i4_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
|
|
fxx[p]=Fdxdx*(-FH(iF-2,jF,kF)+F16*FH(iF-1,jF,kF)-F30*FH(iF,jF,kF)-FH(iF+2,jF,kF)+F16*FH(iF+1,jF,kF));
|
|
fyy[p]=Fdydy*(-FH(iF,jF-2,kF)+F16*FH(iF,jF-1,kF)-F30*FH(iF,jF,kF)-FH(iF,jF+2,kF)+F16*FH(iF,jF+1,kF));
|
|
fzz[p]=Fdzdz*(-FH(iF,jF,kF-2)+F16*FH(iF,jF,kF-1)-F30*FH(iF,jF,kF)-FH(iF,jF,kF+2)+F16*FH(iF,jF,kF+1));
|
|
{const double t_jm2=(FH(iF-2,jF-2,kF)-F8*FH(iF-1,jF-2,kF)+F8*FH(iF+1,jF-2,kF)-FH(iF+2,jF-2,kF));
|
|
const double t_jm1=(FH(iF-2,jF-1,kF)-F8*FH(iF-1,jF-1,kF)+F8*FH(iF+1,jF-1,kF)-FH(iF+2,jF-1,kF));
|
|
const double t_jp1=(FH(iF-2,jF+1,kF)-F8*FH(iF-1,jF+1,kF)+F8*FH(iF+1,jF+1,kF)-FH(iF+2,jF+1,kF));
|
|
const double t_jp2=(FH(iF-2,jF+2,kF)-F8*FH(iF-1,jF+2,kF)+F8*FH(iF+1,jF+2,kF)-FH(iF+2,jF+2,kF));
|
|
fxy[p]=Fdxdy*(t_jm2-F8*t_jm1+F8*t_jp1-t_jp2);}
|
|
{const double t_km2=(FH(iF-2,jF,kF-2)-F8*FH(iF-1,jF,kF-2)+F8*FH(iF+1,jF,kF-2)-FH(iF+2,jF,kF-2));
|
|
const double t_km1=(FH(iF-2,jF,kF-1)-F8*FH(iF-1,jF,kF-1)+F8*FH(iF+1,jF,kF-1)-FH(iF+2,jF,kF-1));
|
|
const double t_kp1=(FH(iF-2,jF,kF+1)-F8*FH(iF-1,jF,kF+1)+F8*FH(iF+1,jF,kF+1)-FH(iF+2,jF,kF+1));
|
|
const double t_kp2=(FH(iF-2,jF,kF+2)-F8*FH(iF-1,jF,kF+2)+F8*FH(iF+1,jF,kF+2)-FH(iF+2,jF,kF+2));
|
|
fxz[p]=Fdxdz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
|
|
{const double t_km2=(FH(iF,jF-2,kF-2)-F8*FH(iF,jF-1,kF-2)+F8*FH(iF,jF+1,kF-2)-FH(iF,jF+2,kF-2));
|
|
const double t_km1=(FH(iF,jF-2,kF-1)-F8*FH(iF,jF-1,kF-1)+F8*FH(iF,jF+1,kF-1)-FH(iF,jF+2,kF-1));
|
|
const double t_kp1=(FH(iF,jF-2,kF+1)-F8*FH(iF,jF-1,kF+1)+F8*FH(iF,jF+1,kF+1)-FH(iF,jF+2,kF+1));
|
|
const double t_kp2=(FH(iF,jF-2,kF+2)-F8*FH(iF,jF-1,kF+2)+F8*FH(iF,jF+1,kF+2)-FH(iF,jF+2,kF+2));
|
|
fyz[p]=Fdydz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
|
|
}}}
|
|
}
|
|
#undef FH
|
|
return;
|
|
}
|
|
#elif (ghost_width == 4)
|
|
{
|
|
const int ord=3;
|
|
int iminF=1,jminF=1,kminF=1;
|
|
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-2;
|
|
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-2;
|
|
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-2;
|
|
|
|
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
|
|
static double *fh_buf=NULL;static size_t cap=0;
|
|
if(fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;}
|
|
double *fh=fh_buf;if(!fh)return;
|
|
symmetry_stbd(ord,ex,f,fh,SoA);
|
|
|
|
const double Sdxdx=ONE/(dX*dX),Sdydy=ONE/(dY*dY),Sdzdz=ONE/(dZ*dZ);
|
|
const double Fdxdx=F1o12/(dX*dX),Fdydy=F1o12/(dY*dY),Fdzdz=F1o12/(dZ*dZ);
|
|
const double Xdxdx=F1o180/(dX*dX),Xdydy=F1o180/(dY*dY),Xdzdz=F1o180/(dZ*dZ);
|
|
const double Sdxdy=F1o4/(dX*dY),Sdxdz=F1o4/(dX*dZ),Sdydz=F1o4/(dY*dZ);
|
|
const double Fdxdy=F1o144/(dX*dY),Fdxdz=F1o144/(dX*dZ),Fdydz=F1o144/(dY*dZ);
|
|
const double Xdxdy=F1o3600/(dX*dY),Xdxdz=F1o3600/(dX*dZ),Xdydz=F1o3600/(dY*dZ);
|
|
const size_t all=(size_t)ex1*ex2*ex3;
|
|
for(size_t p=0;p<all;++p){fxx[p]=fyy[p]=fzz[p]=fxy[p]=fxz[p]=fyz[p]=ZEO;}
|
|
|
|
const int i2_lo=(iminF+1>0)?iminF+1:0,j2_lo=(jminF+1>0)?jminF+1:0,k2_lo=0,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2;
|
|
const int i4_lo=(iminF+2>0)?iminF+2:0,j4_lo=(jminF+2>0)?jminF+2:0,k4_lo=0,i4_hi=ex1-3,j4_hi=ex2-3,k4_hi=ex3-3;
|
|
const int i6_lo=(iminF+3>0)?iminF+3:0,j6_lo=(jminF+3>0)?jminF+3:0,k6_lo=0,i6_hi=ex1-4,j6_hi=ex2-4,k6_hi=ex3-4;
|
|
const int has4=(i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi),has6=(i6_lo<=i6_hi&&j6_lo<=j6_hi&&k6_lo<=k6_hi);
|
|
#define FH(iF,jF,kF) fh[idx_fh_stbd(iF,jF,kF,ord,ex)]
|
|
|
|
if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1;
|
|
for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1;
|
|
for(int i0=i2_lo;i0<=i2_hi;++i0){_Bool in4=has4&&i0>=i4_lo&&i0<=i4_hi&&j0>=j4_lo&&j0<=j4_hi&&k0>=k4_lo&&k0<=k4_hi;if(in4)continue;
|
|
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
|
|
fxx[p]=Sdxdx*(FH(iF-1,jF,kF)-TWO*FH(iF,jF,kF)+FH(iF+1,jF,kF));
|
|
fyy[p]=Sdydy*(FH(iF,jF-1,kF)-TWO*FH(iF,jF,kF)+FH(iF,jF+1,kF));
|
|
fzz[p]=Sdzdz*(FH(iF,jF,kF-1)-TWO*FH(iF,jF,kF)+FH(iF,jF,kF+1));
|
|
fxy[p]=Sdxdy*(FH(iF-1,jF-1,kF)-FH(iF+1,jF-1,kF)-FH(iF-1,jF+1,kF)+FH(iF+1,jF+1,kF));
|
|
fxz[p]=Sdxdz*(FH(iF-1,jF,kF-1)-FH(iF+1,jF,kF-1)-FH(iF-1,jF,kF+1)+FH(iF+1,jF,kF+1));
|
|
fyz[p]=Sdydz*(FH(iF,jF-1,kF-1)-FH(iF,jF+1,kF-1)-FH(iF,jF-1,kF+1)+FH(iF,jF+1,kF+1));
|
|
}}}}
|
|
if(has4){for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1;
|
|
for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1;
|
|
for(int i0=i4_lo;i0<=i4_hi;++i0){if(has6&&i0>=i6_lo&&i0<=i6_hi&&j0>=j6_lo&&j0<=j6_hi&&k0>=k6_lo&&k0<=k6_hi)continue;
|
|
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
|
|
fxx[p]=Fdxdx*(-FH(iF-2,jF,kF)+F16*FH(iF-1,jF,kF)-F30*FH(iF,jF,kF)-FH(iF+2,jF,kF)+F16*FH(iF+1,jF,kF));
|
|
fyy[p]=Fdydy*(-FH(iF,jF-2,kF)+F16*FH(iF,jF-1,kF)-F30*FH(iF,jF,kF)-FH(iF,jF+2,kF)+F16*FH(iF,jF+1,kF));
|
|
fzz[p]=Fdzdz*(-FH(iF,jF,kF-2)+F16*FH(iF,jF,kF-1)-F30*FH(iF,jF,kF)-FH(iF,jF,kF+2)+F16*FH(iF,jF,kF+1));
|
|
{const double t_jm2=(FH(iF-2,jF-2,kF)-F8*FH(iF-1,jF-2,kF)+F8*FH(iF+1,jF-2,kF)-FH(iF+2,jF-2,kF));
|
|
const double t_jm1=(FH(iF-2,jF-1,kF)-F8*FH(iF-1,jF-1,kF)+F8*FH(iF+1,jF-1,kF)-FH(iF+2,jF-1,kF));
|
|
const double t_jp1=(FH(iF-2,jF+1,kF)-F8*FH(iF-1,jF+1,kF)+F8*FH(iF+1,jF+1,kF)-FH(iF+2,jF+1,kF));
|
|
const double t_jp2=(FH(iF-2,jF+2,kF)-F8*FH(iF-1,jF+2,kF)+F8*FH(iF+1,jF+2,kF)-FH(iF+2,jF+2,kF));
|
|
fxy[p]=Fdxdy*(t_jm2-F8*t_jm1+F8*t_jp1-t_jp2);}
|
|
{const double t_km2=(FH(iF-2,jF,kF-2)-F8*FH(iF-1,jF,kF-2)+F8*FH(iF+1,jF,kF-2)-FH(iF+2,jF,kF-2));
|
|
const double t_km1=(FH(iF-2,jF,kF-1)-F8*FH(iF-1,jF,kF-1)+F8*FH(iF+1,jF,kF-1)-FH(iF+2,jF,kF-1));
|
|
const double t_kp1=(FH(iF-2,jF,kF+1)-F8*FH(iF-1,jF,kF+1)+F8*FH(iF+1,jF,kF+1)-FH(iF+2,jF,kF+1));
|
|
const double t_kp2=(FH(iF-2,jF,kF+2)-F8*FH(iF-1,jF,kF+2)+F8*FH(iF+1,jF,kF+2)-FH(iF+2,jF,kF+2));
|
|
fxz[p]=Fdxdz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
|
|
{const double t_km2=(FH(iF,jF-2,kF-2)-F8*FH(iF,jF-1,kF-2)+F8*FH(iF,jF+1,kF-2)-FH(iF,jF+2,kF-2));
|
|
const double t_km1=(FH(iF,jF-2,kF-1)-F8*FH(iF,jF-1,kF-1)+F8*FH(iF,jF+1,kF-1)-FH(iF,jF+2,kF-1));
|
|
const double t_kp1=(FH(iF,jF-2,kF+1)-F8*FH(iF,jF-1,kF+1)+F8*FH(iF,jF+1,kF+1)-FH(iF,jF+2,kF+1));
|
|
const double t_kp2=(FH(iF,jF-2,kF+2)-F8*FH(iF,jF-1,kF+2)+F8*FH(iF,jF+1,kF+2)-FH(iF,jF+2,kF+2));
|
|
fyz[p]=Fdydz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
|
|
}}}}
|
|
if(has6){for(int k0=k6_lo;k0<=k6_hi;++k0){const int kF=k0+1;
|
|
for(int j0=j6_lo;j0<=j6_hi;++j0){const int jF=j0+1;
|
|
for(int i0=i6_lo;i0<=i6_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
|
|
fxx[p]=Xdxdx*(TWO*FH(iF-3,jF,kF)-F27*FH(iF-2,jF,kF)+F270*FH(iF-1,jF,kF)-F490*FH(iF,jF,kF)+F270*FH(iF+1,jF,kF)-F27*FH(iF+2,jF,kF)+TWO*FH(iF+3,jF,kF));
|
|
fyy[p]=Xdydy*(TWO*FH(iF,jF-3,kF)-F27*FH(iF,jF-2,kF)+F270*FH(iF,jF-1,kF)-F490*FH(iF,jF,kF)+F270*FH(iF,jF+1,kF)-F27*FH(iF,jF+2,kF)+TWO*FH(iF,jF+3,kF));
|
|
fzz[p]=Xdzdz*(TWO*FH(iF,jF,kF-3)-F27*FH(iF,jF,kF-2)+F270*FH(iF,jF,kF-1)-F490*FH(iF,jF,kF)+F270*FH(iF,jF,kF+1)-F27*FH(iF,jF,kF+2)+TWO*FH(iF,jF,kF+3));
|
|
#define XS6(JF,KFDUMMY) (-FH(iF-3,JF,KFDUMMY)+F9*FH(iF-2,JF,KFDUMMY)-F45*FH(iF-1,JF,KFDUMMY)+F45*FH(iF+1,JF,KFDUMMY)-F9*FH(iF+2,JF,KFDUMMY)+FH(iF+3,JF,KFDUMMY))
|
|
fxy[p]=Xdxdy*(-XS6(jF-3,kF)+F9*XS6(jF-2,kF)-F45*XS6(jF-1,kF)+F45*XS6(jF+1,kF)-F9*XS6(jF+2,kF)+XS6(jF+3,kF));
|
|
fxz[p]=Xdxdz*(-XS6(jF,kF-3)+F9*XS6(jF,kF-2)-F45*XS6(jF,kF-1)+F45*XS6(jF,kF+1)-F9*XS6(jF,kF+2)+XS6(jF,kF+3));
|
|
#undef XS6
|
|
#define YS6(JF,KFDUMMY) (-FH(iF,JF-3,KFDUMMY)+F9*FH(iF,JF-2,KFDUMMY)-F45*FH(iF,JF-1,KFDUMMY)+F45*FH(iF,JF+1,KFDUMMY)-F9*FH(iF,JF+2,KFDUMMY)+FH(iF,JF+3,KFDUMMY))
|
|
fyz[p]=Xdydz*(-YS6(jF,kF-3)+F9*YS6(jF,kF-2)-F45*YS6(jF,kF-1)+F45*YS6(jF,kF+1)-F9*YS6(jF,kF+2)+YS6(jF,kF+3));
|
|
#undef YS6
|
|
}}}}
|
|
#undef FH
|
|
return;
|
|
}
|
|
#elif (ghost_width == 5)
|
|
{
|
|
/* 8th-order shell second derivatives — inherits 8th-order stencil coeffs from Cartesian */
|
|
const int ord=4;
|
|
int iminF=1,jminF=1,kminF=1;
|
|
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-3;
|
|
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-3;
|
|
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-3;
|
|
|
|
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
|
|
static double *fh_buf=NULL;static size_t cap=0;
|
|
if(fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;}
|
|
double *fh=fh_buf;if(!fh)return;
|
|
symmetry_stbd(ord,ex,f,fh,SoA);
|
|
|
|
const double Sdxdx=ONE/(dX*dX),Sdydy=ONE/(dY*dY),Sdzdz=ONE/(dZ*dZ);
|
|
const double Fdxdx=F1o12/(dX*dX),Fdydy=F1o12/(dY*dY),Fdzdz=F1o12/(dZ*dZ);
|
|
const double Xdxdx=F1o180/(dX*dX),Xdydy=F1o180/(dY*dY),Xdzdz=F1o180/(dZ*dZ);
|
|
const double Edxdx=F1o5040/(dX*dX),Edydy=F1o5040/(dY*dY),Edzdz=F1o5040/(dZ*dZ);
|
|
const double Sdxdy=F1o4/(dX*dY),Sdxdz=F1o4/(dX*dZ),Sdydz=F1o4/(dY*dZ);
|
|
const double Fdxdy=F1o144/(dX*dY),Fdxdz=F1o144/(dX*dZ),Fdydz=F1o144/(dY*dZ);
|
|
const double Xdxdy=F1o3600/(dX*dY),Xdxdz=F1o3600/(dX*dZ),Xdydz=F1o3600/(dY*dZ);
|
|
const double Edxdy=F1o705600/(dX*dY),Edxdz=F1o705600/(dX*dZ),Edydz=F1o705600/(dY*dZ);
|
|
const size_t all=(size_t)ex1*ex2*ex3;
|
|
for(size_t p=0;p<all;++p){fxx[p]=fyy[p]=fzz[p]=fxy[p]=fxz[p]=fyz[p]=ZEO;}
|
|
|
|
const int i2_lo=(iminF+2>0)?iminF+2:0,j2_lo=(jminF+2>0)?jminF+2:0,k2_lo=0,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2;
|
|
const int i4_lo=(iminF+3>0)?iminF+3:0,j4_lo=(jminF+3>0)?jminF+3:0,k4_lo=0,i4_hi=ex1-3,j4_hi=ex2-3,k4_hi=ex3-3;
|
|
const int i6_lo=(iminF+4>0)?iminF+4:0,j6_lo=(jminF+4>0)?jminF+4:0,k6_lo=0,i6_hi=ex1-4,j6_hi=ex2-4,k6_hi=ex3-4;
|
|
const int i8_lo=(iminF+5>0)?iminF+5:0,j8_lo=(jminF+5>0)?jminF+5:0,k8_lo=0,i8_hi=ex1-5,j8_hi=ex2-5,k8_hi=ex3-5;
|
|
const int has4=(i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi),has6=(i6_lo<=i6_hi&&j6_lo<=j6_hi&&k6_lo<=k6_hi),has8=(i8_lo<=i8_hi&&j8_lo<=j8_hi&&k8_lo<=k8_hi);
|
|
#define FH(iF,jF,kF) fh[idx_fh_stbd(iF,jF,kF,ord,ex)]
|
|
|
|
/* 2nd-order pass */
|
|
if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1;
|
|
for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1;
|
|
for(int i0=i2_lo;i0<=i2_hi;++i0){_Bool in4=has4&&i0>=i4_lo&&i0<=i4_hi&&j0>=j4_lo&&j0<=j4_hi&&k0>=k4_lo&&k0<=k4_hi;if(in4)continue;
|
|
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
|
|
fxx[p]=Sdxdx*(FH(iF-1,jF,kF)-TWO*FH(iF,jF,kF)+FH(iF+1,jF,kF));
|
|
fyy[p]=Sdydy*(FH(iF,jF-1,kF)-TWO*FH(iF,jF,kF)+FH(iF,jF+1,kF));
|
|
fzz[p]=Sdzdz*(FH(iF,jF,kF-1)-TWO*FH(iF,jF,kF)+FH(iF,jF,kF+1));
|
|
fxy[p]=Sdxdy*(FH(iF-1,jF-1,kF)-FH(iF+1,jF-1,kF)-FH(iF-1,jF+1,kF)+FH(iF+1,jF+1,kF));
|
|
fxz[p]=Sdxdz*(FH(iF-1,jF,kF-1)-FH(iF+1,jF,kF-1)-FH(iF-1,jF,kF+1)+FH(iF+1,jF,kF+1));
|
|
fyz[p]=Sdydz*(FH(iF,jF-1,kF-1)-FH(iF,jF+1,kF-1)-FH(iF,jF-1,kF+1)+FH(iF,jF+1,kF+1));
|
|
}}}}
|
|
/* 4th-order pass */
|
|
if(has4){for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1;
|
|
for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1;
|
|
for(int i0=i4_lo;i0<=i4_hi;++i0){_Bool in6=has6&&i0>=i6_lo&&i0<=i6_hi&&j0>=j6_lo&&j0<=j6_hi&&k0>=k6_lo&&k0<=k6_hi;if(in6)continue;
|
|
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
|
|
fxx[p]=Fdxdx*(-FH(iF-2,jF,kF)+F16*FH(iF-1,jF,kF)-F30*FH(iF,jF,kF)-FH(iF+2,jF,kF)+F16*FH(iF+1,jF,kF));
|
|
fyy[p]=Fdydy*(-FH(iF,jF-2,kF)+F16*FH(iF,jF-1,kF)-F30*FH(iF,jF,kF)-FH(iF,jF+2,kF)+F16*FH(iF,jF+1,kF));
|
|
fzz[p]=Fdzdz*(-FH(iF,jF,kF-2)+F16*FH(iF,jF,kF-1)-F30*FH(iF,jF,kF)-FH(iF,jF,kF+2)+F16*FH(iF,jF,kF+1));
|
|
{const double t_jm2=(FH(iF-2,jF-2,kF)-F8*FH(iF-1,jF-2,kF)+F8*FH(iF+1,jF-2,kF)-FH(iF+2,jF-2,kF));
|
|
const double t_jm1=(FH(iF-2,jF-1,kF)-F8*FH(iF-1,jF-1,kF)+F8*FH(iF+1,jF-1,kF)-FH(iF+2,jF-1,kF));
|
|
const double t_jp1=(FH(iF-2,jF+1,kF)-F8*FH(iF-1,jF+1,kF)+F8*FH(iF+1,jF+1,kF)-FH(iF+2,jF+1,kF));
|
|
const double t_jp2=(FH(iF-2,jF+2,kF)-F8*FH(iF-1,jF+2,kF)+F8*FH(iF+1,jF+2,kF)-FH(iF+2,jF+2,kF));
|
|
fxy[p]=Fdxdy*(t_jm2-F8*t_jm1+F8*t_jp1-t_jp2);}
|
|
{const double t_km2=(FH(iF-2,jF,kF-2)-F8*FH(iF-1,jF,kF-2)+F8*FH(iF+1,jF,kF-2)-FH(iF+2,jF,kF-2));
|
|
const double t_km1=(FH(iF-2,jF,kF-1)-F8*FH(iF-1,jF,kF-1)+F8*FH(iF+1,jF,kF-1)-FH(iF+2,jF,kF-1));
|
|
const double t_kp1=(FH(iF-2,jF,kF+1)-F8*FH(iF-1,jF,kF+1)+F8*FH(iF+1,jF,kF+1)-FH(iF+2,jF,kF+1));
|
|
const double t_kp2=(FH(iF-2,jF,kF+2)-F8*FH(iF-1,jF,kF+2)+F8*FH(iF+1,jF,kF+2)-FH(iF+2,jF,kF+2));
|
|
fxz[p]=Fdxdz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
|
|
{const double t_km2=(FH(iF,jF-2,kF-2)-F8*FH(iF,jF-1,kF-2)+F8*FH(iF,jF+1,kF-2)-FH(iF,jF+2,kF-2));
|
|
const double t_km1=(FH(iF,jF-2,kF-1)-F8*FH(iF,jF-1,kF-1)+F8*FH(iF,jF+1,kF-1)-FH(iF,jF+2,kF-1));
|
|
const double t_kp1=(FH(iF,jF-2,kF+1)-F8*FH(iF,jF-1,kF+1)+F8*FH(iF,jF+1,kF+1)-FH(iF,jF+2,kF+1));
|
|
const double t_kp2=(FH(iF,jF-2,kF+2)-F8*FH(iF,jF-1,kF+2)+F8*FH(iF,jF+1,kF+2)-FH(iF,jF+2,kF+2));
|
|
fyz[p]=Fdydz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
|
|
}}}}
|
|
/* 6th-order pass */
|
|
if(has6){for(int k0=k6_lo;k0<=k6_hi;++k0){const int kF=k0+1;
|
|
for(int j0=j6_lo;j0<=j6_hi;++j0){const int jF=j0+1;
|
|
for(int i0=i6_lo;i0<=i6_hi;++i0){if(has8&&i0>=i8_lo&&i0<=i8_hi&&j0>=j8_lo&&j0<=j8_hi&&k0>=k8_lo&&k0<=k8_hi)continue;
|
|
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
|
|
fxx[p]=Xdxdx*(TWO*FH(iF-3,jF,kF)-F27*FH(iF-2,jF,kF)+F270*FH(iF-1,jF,kF)-F490*FH(iF,jF,kF)+F270*FH(iF+1,jF,kF)-F27*FH(iF+2,jF,kF)+TWO*FH(iF+3,jF,kF));
|
|
fyy[p]=Xdydy*(TWO*FH(iF,jF-3,kF)-F27*FH(iF,jF-2,kF)+F270*FH(iF,jF-1,kF)-F490*FH(iF,jF,kF)+F270*FH(iF,jF+1,kF)-F27*FH(iF,jF+2,kF)+TWO*FH(iF,jF+3,kF));
|
|
fzz[p]=Xdzdz*(TWO*FH(iF,jF,kF-3)-F27*FH(iF,jF,kF-2)+F270*FH(iF,jF,kF-1)-F490*FH(iF,jF,kF)+F270*FH(iF,jF,kF+1)-F27*FH(iF,jF,kF+2)+TWO*FH(iF,jF,kF+3));
|
|
#define XS6_8(JF,KFDUMMY) (-FH(iF-3,JF,KFDUMMY)+F9*FH(iF-2,JF,KFDUMMY)-F45*FH(iF-1,JF,KFDUMMY)+F45*FH(iF+1,JF,KFDUMMY)-F9*FH(iF+2,JF,KFDUMMY)+FH(iF+3,JF,KFDUMMY))
|
|
fxy[p]=Xdxdy*(-XS6_8(jF-3,kF)+F9*XS6_8(jF-2,kF)-F45*XS6_8(jF-1,kF)+F45*XS6_8(jF+1,kF)-F9*XS6_8(jF+2,kF)+XS6_8(jF+3,kF));
|
|
fxz[p]=Xdxdz*(-XS6_8(jF,kF-3)+F9*XS6_8(jF,kF-2)-F45*XS6_8(jF,kF-1)+F45*XS6_8(jF,kF+1)-F9*XS6_8(jF,kF+2)+XS6_8(jF,kF+3));
|
|
#undef XS6_8
|
|
#define YS6_8(JF,KFDUMMY) (-FH(iF,JF-3,KFDUMMY)+F9*FH(iF,JF-2,KFDUMMY)-F45*FH(iF,JF-1,KFDUMMY)+F45*FH(iF,JF+1,KFDUMMY)-F9*FH(iF,JF+2,KFDUMMY)+FH(iF,JF+3,KFDUMMY))
|
|
fyz[p]=Xdydz*(-YS6_8(jF,kF-3)+F9*YS6_8(jF,kF-2)-F45*YS6_8(jF,kF-1)+F45*YS6_8(jF,kF+1)-F9*YS6_8(jF,kF+2)+YS6_8(jF,kF+3));
|
|
#undef YS6_8
|
|
}}}}
|
|
/* 8th-order pass */
|
|
if(has8){for(int k0=k8_lo;k0<=k8_hi;++k0){const int kF=k0+1;
|
|
for(int j0=j8_lo;j0<=j8_hi;++j0){const int jF=j0+1;
|
|
for(int i0=i8_lo;i0<=i8_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
|
|
fxx[p]=Edxdx*(-(double)9*FH(iF-4,jF,kF)+F128*FH(iF-3,jF,kF)-F1008*FH(iF-2,jF,kF)+F8064*FH(iF-1,jF,kF)-F14350*FH(iF,jF,kF)+F8064*FH(iF+1,jF,kF)-F1008*FH(iF+2,jF,kF)+F128*FH(iF+3,jF,kF)-(double)9*FH(iF+4,jF,kF));
|
|
fyy[p]=Edydy*(-(double)9*FH(iF,jF-4,kF)+F128*FH(iF,jF-3,kF)-F1008*FH(iF,jF-2,kF)+F8064*FH(iF,jF-1,kF)-F14350*FH(iF,jF,kF)+F8064*FH(iF,jF+1,kF)-F1008*FH(iF,jF+2,kF)+F128*FH(iF,jF+3,kF)-(double)9*FH(iF,jF+4,kF));
|
|
fzz[p]=Edzdz*(-(double)9*FH(iF,jF,kF-4)+F128*FH(iF,jF,kF-3)-F1008*FH(iF,jF,kF-2)+F8064*FH(iF,jF,kF-1)-F14350*FH(iF,jF,kF)+F8064*FH(iF,jF,kF+1)-F1008*FH(iF,jF,kF+2)+F128*FH(iF,jF,kF+3)-(double)9*FH(iF,jF,kF+4));
|
|
#define XS8(JF,KFDUMMY) (+(double)3*FH(iF-4,JF,KFDUMMY)-F32*FH(iF-3,JF,KFDUMMY)+F168*FH(iF-2,JF,KFDUMMY)-F672*FH(iF-1,JF,KFDUMMY)+F672*FH(iF+1,JF,KFDUMMY)-F168*FH(iF+2,JF,KFDUMMY)+F32*FH(iF+3,JF,KFDUMMY)-(double)3*FH(iF+4,JF,KFDUMMY))
|
|
fxy[p]=Edxdy*(+(double)3*XS8(jF-4,kF)-F32*XS8(jF-3,kF)+F168*XS8(jF-2,kF)-F672*XS8(jF-1,kF)+F672*XS8(jF+1,kF)-F168*XS8(jF+2,kF)+F32*XS8(jF+3,kF)-(double)3*XS8(jF+4,kF));
|
|
fxz[p]=Edxdz*(+(double)3*XS8(jF,kF-4)-F32*XS8(jF,kF-3)+F168*XS8(jF,kF-2)-F672*XS8(jF,kF-1)+F672*XS8(jF,kF+1)-F168*XS8(jF,kF+2)+F32*XS8(jF,kF+3)-(double)3*XS8(jF,kF+4));
|
|
#undef XS8
|
|
#define YS8(JF,KFDUMMY) (+(double)3*FH(iF,JF-4,KFDUMMY)-F32*FH(iF,JF-3,KFDUMMY)+F168*FH(iF,JF-2,KFDUMMY)-F672*FH(iF,JF-1,KFDUMMY)+F672*FH(iF,JF+1,KFDUMMY)-F168*FH(iF,JF+2,KFDUMMY)+F32*FH(iF,JF+3,KFDUMMY)-(double)3*FH(iF,JF+4,KFDUMMY))
|
|
fyz[p]=Edydz*(+(double)3*YS8(jF,kF-4)-F32*YS8(jF,kF-3)+F168*YS8(jF,kF-2)-F672*YS8(jF,kF-1)+F672*YS8(jF,kF+1)-F168*YS8(jF,kF+2)+F32*YS8(jF,kF+3)-(double)3*YS8(jF,kF+4));
|
|
#undef YS8
|
|
}}}}
|
|
#undef FH
|
|
return;
|
|
}
|
|
#else
|
|
#error "fdderivs_sh_c.C: unsupported ghost_width"
|
|
#endif
|
|
}
|