#include "macrodef.h" #include "share_func.h" /* * C 版 fderivs_sh — first derivatives on shell patch in (rho, sigma, R) coords. * * Same stencil coefficients as Cartesian fderivs, but: * - Uses symmetry_stbd (ghost on BOTH sides of x/y, none in z) * - fh buffer: (-ord+1:ex+ord) in x/y, (1:ex) in z * - SoA is 2-element only (x/y), no z-symmetry * - sst parameter (shell surface type, not used in stencil computation) */ extern "C" void fderivs_sh_(const int ex[3], const double *f, double *fx, double *fy, double *fz, const double *X, const double *Y, const double *Z, double SYM1, double SYM2, double SYM3, int Symmetry, int onoff, int sst) { (void)SYM3; (void)onoff; (void)sst; const double ZEO = 0.0, ONE = 1.0, TWO = 2.0, EIT = 8.0; const double F9 = 9.0, F12 = 12.0, F45 = 45.0, F60 = 60.0; const double F32 = 32.0, F168 = 168.0, F672 = 672.0, F840 = 840.0; const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2; const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2]; const double dX = X[1] - X[0]; const double dY = Y[1] - Y[0]; const double dZ = Z[1] - Z[0]; const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3; const double SoA[2] = { SYM1, SYM2 }; #if (ghost_width == 2) { const int ord = 1; int iminF = 1, jminF = 1, kminF = 1; if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = 0; if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = 0; if ((sst==2||sst==4) && fabs(Y[0]) < dY) jminF = 0; // EQ reflection const size_t nx = (size_t)ex1 + 2 * ord; const size_t ny = (size_t)ex2 + 2 * ord; const size_t nz = (size_t)ex3; const size_t fh_size = nx * ny * nz; static double *fh_buf = NULL; static size_t cap = 0; if (fh_size > cap) { free(fh_buf); fh_buf = (double*)aligned_alloc(64, fh_size*sizeof(double)); cap = fh_size; } double *fh = fh_buf; if (!fh) return; symmetry_stbd(ord, ex, f, fh, SoA); const double d2dx = ONE/TWO/dX, d2dy = ONE/TWO/dY, d2dz = ONE/TWO/dZ; const size_t all = (size_t)ex1*ex2*ex3; for (size_t p=0;p0)?iminF:0, j2_lo=(jminF>0)?jminF:0, k2_lo=0; const int i2_hi=ex1-2, j2_hi=ex2-2, k2_hi=ex3-2; if (i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi) { for (int k0=k2_lo;k0<=k2_hi;++k0) { const int kF=k0+1; for (int j0=j2_lo;j0<=j2_hi;++j0) { const int jF=j0+1; for (int i0=i2_lo;i0<=i2_hi;++i0) { const int iF=i0+1; const size_t p=idx_ex(i0,j0,k0,ex); fx[p]=d2dx*(-fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]); fy[p]=d2dy*(-fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]); fz[p]=d2dz*(-fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]); }}} } return; } #elif (ghost_width == 3) { const int ord = 2; int iminF = 1, jminF = 1, kminF = 1; if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -1; if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -1; if ((sst==2||sst==4) && fabs(Y[0]) < dY) jminF = -1; const size_t nx=(size_t)ex1+2*ord, ny=(size_t)ex2+2*ord, nz=(size_t)ex3; const size_t fh_size=nx*ny*nz; static double *fh_buf=NULL; static size_t cap=0; if (fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;} double *fh=fh_buf; if(!fh)return; symmetry_stbd(ord,ex,f,fh,SoA); const double d12dx=ONE/F12/dX, d12dy=ONE/F12/dY, d12dz=ONE/F12/dZ; const double d2dx=ONE/TWO/dX, d2dy=ONE/TWO/dY, d2dz=ONE/TWO/dZ; const size_t all=(size_t)ex1*ex2*ex3; for(size_t p=0;p0)?iminF:0, j2_lo=(jminF>0)?jminF:0, k2_lo=0; const int i2_hi=ex1-2, j2_hi=ex2-2, k2_hi=ex3-2; const int i4_lo=(iminF+1>0)?iminF+1:0, j4_lo=(jminF+1>0)?jminF+1:0, k4_lo=0; const int i4_hi=ex1-3, j4_hi=ex2-3, k4_hi=ex3-3; if (i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi) { for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1; for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1; for(int i0=i2_lo;i0<=i2_hi;++i0){const int iF=i0+1; const size_t p=idx_ex(i0,j0,k0,ex); fx[p]=d2dx*(-fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]); fy[p]=d2dy*(-fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]); fz[p]=d2dz*(-fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]); }}} } if (i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi) { for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1; for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1; for(int i0=i4_lo;i0<=i4_hi;++i0){const int iF=i0+1; const size_t p=idx_ex(i0,j0,k0,ex); fx[p]=d12dx*(fh[idx_fh_stbd(iF-2,jF,kF,ord,ex)]-EIT*fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+EIT*fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]-fh[idx_fh_stbd(iF+2,jF,kF,ord,ex)]); fy[p]=d12dy*(fh[idx_fh_stbd(iF,jF-2,kF,ord,ex)]-EIT*fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+EIT*fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]-fh[idx_fh_stbd(iF,jF+2,kF,ord,ex)]); fz[p]=d12dz*(fh[idx_fh_stbd(iF,jF,kF-2,ord,ex)]-EIT*fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+EIT*fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]-fh[idx_fh_stbd(iF,jF,kF+2,ord,ex)]); }}} } return; } #elif (ghost_width == 4) { const int ord = 3; int iminF=1,jminF=1,kminF=1; if(Symmetry==OCTANT&&fabs(X[0])cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;} double *fh=fh_buf;if(!fh)return; symmetry_stbd(ord,ex,f,fh,SoA); const double d60dx=ONE/F60/dX,d60dy=ONE/F60/dY,d60dz=ONE/F60/dZ; const double d12dx=ONE/F12/dX,d12dy=ONE/F12/dY,d12dz=ONE/F12/dZ; const double d2dx=ONE/TWO/dX,d2dy=ONE/TWO/dY,d2dz=ONE/TWO/dZ; const size_t all=(size_t)ex1*ex2*ex3; for(size_t p=0;p0)?iminF+1:0,j2_lo=(jminF+1>0)?jminF+1:0,k2_lo=0,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2; const int i4_lo=(iminF+2>0)?iminF+2:0,j4_lo=(jminF+2>0)?jminF+2:0,k4_lo=0,i4_hi=ex1-3,j4_hi=ex2-3,k4_hi=ex3-3; const int i6_lo=(iminF+3>0)?iminF+3:0,j6_lo=(jminF+3>0)?jminF+3:0,k6_lo=0,i6_hi=ex1-4,j6_hi=ex2-4,k6_hi=ex3-4; if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){ for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1; for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1; for(int i0=i2_lo;i0<=i2_hi;++i0){const int iF=i0+1; const size_t p=idx_ex(i0,j0,k0,ex); fx[p]=d2dx*(-fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]); fy[p]=d2dy*(-fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]); fz[p]=d2dz*(-fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]); }}} } if(i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi){ for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1; for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1; for(int i0=i4_lo;i0<=i4_hi;++i0){const int iF=i0+1; const size_t p=idx_ex(i0,j0,k0,ex); fx[p]=d12dx*(fh[idx_fh_stbd(iF-2,jF,kF,ord,ex)]-EIT*fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+EIT*fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]-fh[idx_fh_stbd(iF+2,jF,kF,ord,ex)]); fy[p]=d12dy*(fh[idx_fh_stbd(iF,jF-2,kF,ord,ex)]-EIT*fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+EIT*fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]-fh[idx_fh_stbd(iF,jF+2,kF,ord,ex)]); fz[p]=d12dz*(fh[idx_fh_stbd(iF,jF,kF-2,ord,ex)]-EIT*fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+EIT*fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]-fh[idx_fh_stbd(iF,jF,kF+2,ord,ex)]); }}} } if(i6_lo<=i6_hi&&j6_lo<=j6_hi&&k6_lo<=k6_hi){ for(int k0=k6_lo;k0<=k6_hi;++k0){const int kF=k0+1; for(int j0=j6_lo;j0<=j6_hi;++j0){const int jF=j0+1; for(int i0=i6_lo;i0<=i6_hi;++i0){const int iF=i0+1; const size_t p=idx_ex(i0,j0,k0,ex); fx[p]=d60dx*(-fh[idx_fh_stbd(iF-3,jF,kF,ord,ex)]+F9*fh[idx_fh_stbd(iF-2,jF,kF,ord,ex)]-F45*fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+F45*fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]-F9*fh[idx_fh_stbd(iF+2,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+3,jF,kF,ord,ex)]); fy[p]=d60dy*(-fh[idx_fh_stbd(iF,jF-3,kF,ord,ex)]+F9*fh[idx_fh_stbd(iF,jF-2,kF,ord,ex)]-F45*fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+F45*fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]-F9*fh[idx_fh_stbd(iF,jF+2,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+3,kF,ord,ex)]); fz[p]=d60dz*(-fh[idx_fh_stbd(iF,jF,kF-3,ord,ex)]+F9*fh[idx_fh_stbd(iF,jF,kF-2,ord,ex)]-F45*fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+F45*fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]-F9*fh[idx_fh_stbd(iF,jF,kF+2,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+3,ord,ex)]); }}} } return; } #elif (ghost_width == 5) { const int ord = 4; int iminF=1,jminF=1,kminF=1; if(Symmetry==OCTANT&&fabs(X[0])cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;} double *fh=fh_buf;if(!fh)return; symmetry_stbd(ord,ex,f,fh,SoA); const double d840dx=ONE/F840/dX,d840dy=ONE/F840/dY,d840dz=ONE/F840/dZ; const double d60dx=ONE/F60/dX,d60dy=ONE/F60/dY,d60dz=ONE/F60/dZ; const double d12dx=ONE/F12/dX,d12dy=ONE/F12/dY,d12dz=ONE/F12/dZ; const double d2dx=ONE/TWO/dX,d2dy=ONE/TWO/dY,d2dz=ONE/TWO/dZ; const size_t all=(size_t)ex1*ex2*ex3; for(size_t p=0;p0)?iminF+2:0,j2_lo=(jminF+2>0)?jminF+2:0,k2_lo=0,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2; const int i4_lo=(iminF+3>0)?iminF+3:0,j4_lo=(jminF+3>0)?jminF+3:0,k4_lo=0,i4_hi=ex1-3,j4_hi=ex2-3,k4_hi=ex3-3; const int i6_lo=(iminF+4>0)?iminF+4:0,j6_lo=(jminF+4>0)?jminF+4:0,k6_lo=0,i6_hi=ex1-4,j6_hi=ex2-4,k6_hi=ex3-4; const int i8_lo=(iminF+5>0)?iminF+5:0,j8_lo=(jminF+5>0)?jminF+5:0,k8_lo=0,i8_hi=ex1-5,j8_hi=ex2-5,k8_hi=ex3-5; #define FH_S(iF,jF,kF) fh[idx_fh_stbd(iF,jF,kF,ord,ex)] if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1; for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1; for(int i0=i2_lo;i0<=i2_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex); fx[p]=d2dx*(-FH_S(iF-1,jF,kF)+FH_S(iF+1,jF,kF)); fy[p]=d2dy*(-FH_S(iF,jF-1,kF)+FH_S(iF,jF+1,kF)); fz[p]=d2dz*(-FH_S(iF,jF,kF-1)+FH_S(iF,jF,kF+1));}}}} if(i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi){for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1; for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1; for(int i0=i4_lo;i0<=i4_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex); fx[p]=d12dx*(FH_S(iF-2,jF,kF)-EIT*FH_S(iF-1,jF,kF)+EIT*FH_S(iF+1,jF,kF)-FH_S(iF+2,jF,kF)); fy[p]=d12dy*(FH_S(iF,jF-2,kF)-EIT*FH_S(iF,jF-1,kF)+EIT*FH_S(iF,jF+1,kF)-FH_S(iF,jF+2,kF)); fz[p]=d12dz*(FH_S(iF,jF,kF-2)-EIT*FH_S(iF,jF,kF-1)+EIT*FH_S(iF,jF,kF+1)-FH_S(iF,jF,kF+2));}}}} if(i6_lo<=i6_hi&&j6_lo<=j6_hi&&k6_lo<=k6_hi){for(int k0=k6_lo;k0<=k6_hi;++k0){const int kF=k0+1; for(int j0=j6_lo;j0<=j6_hi;++j0){const int jF=j0+1; for(int i0=i6_lo;i0<=i6_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex); fx[p]=d60dx*(-FH_S(iF-3,jF,kF)+F9*FH_S(iF-2,jF,kF)-F45*FH_S(iF-1,jF,kF)+F45*FH_S(iF+1,jF,kF)-F9*FH_S(iF+2,jF,kF)+FH_S(iF+3,jF,kF)); fy[p]=d60dy*(-FH_S(iF,jF-3,kF)+F9*FH_S(iF,jF-2,kF)-F45*FH_S(iF,jF-1,kF)+F45*FH_S(iF,jF+1,kF)-F9*FH_S(iF,jF+2,kF)+FH_S(iF,jF+3,kF)); fz[p]=d60dz*(-FH_S(iF,jF,kF-3)+F9*FH_S(iF,jF,kF-2)-F45*FH_S(iF,jF,kF-1)+F45*FH_S(iF,jF,kF+1)-F9*FH_S(iF,jF,kF+2)+FH_S(iF,jF,kF+3));}}}} if(i8_lo<=i8_hi&&j8_lo<=j8_hi&&k8_lo<=k8_hi){for(int k0=k8_lo;k0<=k8_hi;++k0){const int kF=k0+1; for(int j0=j8_lo;j0<=j8_hi;++j0){const int jF=j0+1; for(int i0=i8_lo;i0<=i8_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex); fx[p]=d840dx*(+(double)3*FH_S(iF-4,jF,kF)-F32*FH_S(iF-3,jF,kF)+F168*FH_S(iF-2,jF,kF)-F672*FH_S(iF-1,jF,kF)+F672*FH_S(iF+1,jF,kF)-F168*FH_S(iF+2,jF,kF)+F32*FH_S(iF+3,jF,kF)-(double)3*FH_S(iF+4,jF,kF)); fy[p]=d840dy*(+(double)3*FH_S(iF,jF-4,kF)-F32*FH_S(iF,jF-3,kF)+F168*FH_S(iF,jF-2,kF)-F672*FH_S(iF,jF-1,kF)+F672*FH_S(iF,jF+1,kF)-F168*FH_S(iF,jF+2,kF)+F32*FH_S(iF,jF+3,kF)-(double)3*FH_S(iF,jF+4,kF)); fz[p]=d840dz*(+(double)3*FH_S(iF,jF,kF-4)-F32*FH_S(iF,jF,kF-3)+F168*FH_S(iF,jF,kF-2)-F672*FH_S(iF,jF,kF-1)+F672*FH_S(iF,jF,kF+1)-F168*FH_S(iF,jF,kF+2)+F32*FH_S(iF,jF,kF+3)-(double)3*FH_S(iF,jF,kF+4));}}}} #undef FH_S return; } #else #error "fderivs_sh_c.C: unsupported ghost_width" #endif }