Compare commits

..

1 Commits

Author SHA1 Message Date
0992e2219a Migrate build system from Intel oneAPI to AMD AOCC/AOCL/OpenMPI
Replace Intel compilers (ifx/icpx/icx) with AOCC (flang/clang++/clang),
Intel MPI (mpiicpx) with AOCC-built OpenMPI (mpicxx), and Intel MKL
with AOCL BLIS/libFLAME. Replace -xHost with -march=znver4, -ipo with
-flto, -fp-model fast=2 with -ffast-math, -qopenmp with -fopenmp.
Remove PGO, TBB allocator, and Intel-specific runtime libraries.
Fix MKL-specific includes in TwoPunctures.C and gaussj.C to use
standard CBLAS/LAPACKE headers from AOCL.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-28 02:19:00 +08:00
25 changed files with 4642 additions and 10050 deletions

View File

@@ -1,100 +0,0 @@
##################################################################
##
## AMSS-NCKU Plot-Only Restart Script
## Author: Xiaoqu / Claude
## 2026/05/12
##
## This script checks for existing output data from AMSS_NCKU_Program.py.
## If data exists, it skips all computation and goes directly to plotting,
## saving time when plotting was interrupted.
## If no data is found, it exits with a message.
##
##################################################################
## Guard against re-execution by multiprocessing child processes.
if __name__ != '__main__':
import sys as _sys
_sys.exit(0)
import os
import sys
import AMSS_NCKU_Input as input_data
##################################################################
## Construct paths from input configuration
File_directory = os.path.join(input_data.File_directory)
output_directory = os.path.join(File_directory, "AMSS_NCKU_output")
binary_results_directory = os.path.join(output_directory, input_data.Output_directory)
figure_directory = os.path.join(File_directory, "figure")
##################################################################
## Check whether the required output data files exist
required_files = [
os.path.join(binary_results_directory, "bssn_BH.dat"),
os.path.join(binary_results_directory, "bssn_ADMQs.dat"),
os.path.join(binary_results_directory, "bssn_psi4.dat"),
os.path.join(binary_results_directory, "bssn_constraint.dat"),
]
missing_files = [f for f in required_files if not os.path.exists(f)]
if missing_files:
print(" No existing AMSS_NCKU_Program.py output data found. ")
print(" The following required files are missing: ")
for f in missing_files:
print(f" {f}")
print()
print(" Please run AMSS_NCKU_Program.py first to generate the simulation data. ")
print(" Exiting. ")
sys.exit(1)
print(" Found existing AMSS_NCKU_Program.py output data. " )
print(" Skipping all computation and going directly to plotting. " )
print()
## Ensure the figure directory exists (it should, but be safe)
os.makedirs(figure_directory, exist_ok=True)
##################################################################
## Plot the AMSS-NCKU program results
import plot_xiaoqu
import plot_GW_strain_amplitude_xiaoqu
from parallel_plot_helper import run_plot_tasks_parallel
plot_tasks = []
## Plot black hole trajectory
plot_tasks.append((plot_xiaoqu.generate_puncture_orbit_plot, (binary_results_directory, figure_directory)))
plot_tasks.append((plot_xiaoqu.generate_puncture_orbit_plot3D, (binary_results_directory, figure_directory)))
## Plot black hole separation vs. time
plot_tasks.append((plot_xiaoqu.generate_puncture_distence_plot, (binary_results_directory, figure_directory)))
## Plot gravitational waveforms (psi4 and strain amplitude)
for i in range(input_data.Detector_Number):
plot_tasks.append((plot_xiaoqu.generate_gravitational_wave_psi4_plot, (binary_results_directory, figure_directory, i)))
plot_tasks.append((plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot, (binary_results_directory, figure_directory, i)))
## Plot ADM mass evolution
for i in range(input_data.Detector_Number):
plot_tasks.append((plot_xiaoqu.generate_ADMmass_plot, (binary_results_directory, figure_directory, i)))
## Plot Hamiltonian constraint violation over time
for i in range(input_data.grid_level):
plot_tasks.append((plot_xiaoqu.generate_constraint_check_plot, (binary_results_directory, figure_directory, i)))
run_plot_tasks_parallel(plot_tasks)
## Plot stored binary data (runs serially, not in the parallel pool)
plot_xiaoqu.generate_binary_data_plot(binary_results_directory, figure_directory)
print()
print(" Plotting completed successfully. ")
print()

File diff suppressed because it is too large Load Diff

View File

@@ -102,16 +102,6 @@ public:
//-1: means no dumy dimension at all; 0: means rho; 1: means sigma
};
// Thread-safe search result (no pointers to shared mutable state)
struct PointSearchResult
{
bool found;
Block *Bg;
double gx, gy, gz; // global Cartesian coordinates
double lx, ly, lz; // local coordinates within the found block
int ssst; // source shell-patch type (-1 = Cartesian)
};
int myrank;
int shape[dim]; // for (rho, sigma, R), for rho and sigma means number of points for every pi/2
double Rrange[2]; // for Rmin and Rmax
@@ -185,12 +175,6 @@ public:
MyList<Patch> *Pp, double CDH[dim], MyList<pointstru> *pss);
bool prolongpointstru(MyList<pointstru> *&psul, bool ssyn, int tsst, MyList<ss_patch> *sPp, double DH[dim],
MyList<Patch> *Pp, double CDH[dim], double x, double y, double z, int Symmetry, int rank_in);
// Read-only point search — thread-safe (no shared mutable state modified)
PointSearchResult prolongpointstru_search(bool ssyn, int tsst, MyList<ss_patch> *sPp, double DH[dim],
MyList<Patch> *Pp, double CDH[dim], double x, double y, double z,
int Symmetry, int rank_in);
// Append a search result to a linked list — use inside omp critical section
void prolongpointstru_append(MyList<pointstru> *&psul, const PointSearchResult &sr, int tsst);
void setupintintstuff(int cpusize, MyList<Patch> *CPatL, int Symmetry);
void intertransfer(MyList<pointstru> **src, MyList<pointstru> **dst,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
@@ -211,11 +195,11 @@ public:
bool Interp_One_Point(MyList<var> *VarList,
double *XX, /*input global Cartesian coordinate*/
double *Shellf, int Symmetry);
void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
char *filename, int sst);
double L2Norm(var *vf);
void L2Norm7(var **vf, double *norms);
void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf);
};
void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
char *filename, int sst);
double L2Norm(var *vf);
void L2Norm7(var **vf, double *norms);
void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf);
};
#endif /* SHELLPATCH_H */

View File

@@ -27,7 +27,7 @@ using namespace std;
#endif
#include "TwoPunctures.h"
#include <mkl_cblas.h>
#include <cblas.h>
TwoPunctures::TwoPunctures(double mp, double mm, double b,
double P_plusx, double P_plusy, double P_plusz,

View File

@@ -94,31 +94,29 @@
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
Symmetry,Lev,eps,co)
if (co == 0) then
#if (ABV == 0)
call ricci_gamma(ex, X, Y, Z, &
chi, &
dxx , gxy , gxz , dyy , gyz , dzz,&
Gamx , Gamy , Gamz , &
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
Symmetry)
#endif
call constraint_bssn(ex, X, Y, Z,&
chi,trK, &
dxx,gxy,gxz,dyy,gyz,dzz, &
Axx,Axy,Axz,Ayy,Ayz,Azz, &
Gamx,Gamy,Gamz,&
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
Symmetry)
endif
#if (ABV == 0)
call ricci_gamma(ex, X, Y, Z, &
chi, &
dxx , gxy , gxz , dyy , gyz , dzz,&
Gamx , Gamy , Gamz , &
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
Symmetry)
#endif
call constraint_bssn(ex, X, Y, Z,&
chi,trK, &
dxx,gxy,gxz,dyy,gyz,dzz, &
Axx,Axy,Axz,Ayy,Ayz,Azz, &
Gamx,Gamy,Gamz,&
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
Symmetry)
return
@@ -228,12 +226,11 @@
call get_Z4cparameters(kappa1,kappa2,kappa3,FF,eta)
!!! sanity check
#ifdef DEBUG
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
!!! sanity check
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
+sum(TZ)
if(dX.ne.dX) then
if(sum(chi).ne.sum(chi))write(*,*)"Z4c_rhs.f90: find NaN in chi"
@@ -260,11 +257,10 @@
if(sum(dtSfx).ne.sum(dtSfx))write(*,*)"Z4c_rhs.f90: find NaN in dtSfx"
if(sum(dtSfy).ne.sum(dtSfy))write(*,*)"Z4c_rhs.f90: find NaN in dtSfy"
if(sum(dtSfz).ne.sum(dtSfz))write(*,*)"Z4c_rhs.f90: find NaN in dtSfz"
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs.f90: find NaN in TZ"
gont = 1
return
endif
#endif
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs.f90: find NaN in TZ"
gont = 1
return
endif
PI = dacos(-ONE)
@@ -1267,32 +1263,30 @@
endif
if (co == 0) then
#if (ABV == 0)
call ricci_gamma(ex, X, Y, Z, &
chi, &
dxx , gxy , gxz , dyy , gyz , dzz,&
Gamx , Gamy , Gamz , &
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
Symmetry)
#endif
call constraint_bssn(ex, X, Y, Z,&
chi,trK, &
dxx,gxy,gxz,dyy,gyz,dzz, &
Axx,Axy,Axz,Ayy,Ayz,Azz, &
Gamx,Gamy,Gamz,&
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
Symmetry)
endif
#if (ABV == 0)
call ricci_gamma(ex, X, Y, Z, &
chi, &
dxx , gxy , gxz , dyy , gyz , dzz,&
Gamx , Gamy , Gamz , &
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
Symmetry)
#endif
call constraint_bssn(ex, X, Y, Z,&
chi,trK, &
dxx,gxy,gxz,dyy,gyz,dzz, &
Axx,Axy,Axz,Ayy,Ayz,Azz, &
Gamx,Gamy,Gamz,&
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
Symmetry)
gont = 0

View File

@@ -121,12 +121,11 @@
call get_Z4cparameters(kappa1,kappa2,kappa3,FF,eta)
!!! sanity check
#ifdef DEBUG
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
!!! sanity check
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)+sum(dtSfx)+sum(dtSfy)+sum(dtSfz) &
+sum(TZ)
if(dX.ne.dX) then
if(sum(chi).ne.sum(chi))write(*,*)"Z4c_rhs_ss.f90: find NaN in chi"
@@ -153,11 +152,10 @@
if(sum(dtSfx).ne.sum(dtSfx))write(*,*)"Z4c_rhs_ss.f90: find NaN in dtSfx"
if(sum(dtSfy).ne.sum(dtSfy))write(*,*)"Z4c_rhs_ss.f90: find NaN in dtSfy"
if(sum(dtSfz).ne.sum(dtSfz))write(*,*)"Z4c_rhs_ss.f90: find NaN in dtSfz"
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs_ss.f90: find NaN in TZ"
gont = 1
return
endif
#endif
if(sum(TZ).ne.sum(Tz))write(*,*)"Z4c_rhs_ss.f90: find NaN in TZ"
gont = 1
return
endif
PI = dacos(-ONE)
@@ -1390,43 +1388,41 @@
call kodis_sh(ex,crho,sigma,R,TZ,TZ_rhs,SSS,Symmetry,eps,sst)
endif
if (co == 0) then
#if (ABV == 1)
call ricci_gamma_ss(ex,crho,sigma,R,X, Y, Z, &
drhodx, drhody, drhodz, &
dsigmadx,dsigmady,dsigmadz, &
dRdx,dRdy,dRdz, &
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
chi, &
dxx , gxy , gxz , dyy , gyz , dzz,&
Gamx , Gamy , Gamz , &
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
Symmetry,Lev,sst)
#endif
call constraint_bssn_ss(ex,crho,sigma,R,X, Y, Z, &
drhodx, drhody, drhodz, &
dsigmadx,dsigmady,dsigmadz, &
dRdx,dRdy,dRdz, &
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
chi,trK, &
dxx,gxy,gxz,dyy,gyz,dzz, &
Axx,Axy,Axz,Ayy,Ayz,Azz, &
Gamx,Gamy,Gamz,&
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
Symmetry,Lev,sst)
endif
#if (ABV == 1)
call ricci_gamma_ss(ex,crho,sigma,R,X, Y, Z, &
drhodx, drhody, drhodz, &
dsigmadx,dsigmady,dsigmadz, &
dRdx,dRdy,dRdz, &
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
chi, &
dxx , gxy , gxz , dyy , gyz , dzz,&
Gamx , Gamy , Gamz , &
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz,&
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz,&
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz,&
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz,&
Symmetry,Lev,sst)
call constraint_bssn_ss(ex,crho,sigma,R,X, Y, Z, &
drhodx, drhody, drhodz, &
dsigmadx,dsigmady,dsigmadz, &
dRdx,dRdy,dRdz, &
drhodxx,drhodxy,drhodxz,drhodyy,drhodyz,drhodzz, &
dsigmadxx,dsigmadxy,dsigmadxz,dsigmadyy,dsigmadyz,dsigmadzz, &
dRdxx,dRdxy,dRdxz,dRdyy,dRdyz,dRdzz, &
chi,trK, &
dxx,gxy,gxz,dyy,gyz,dzz, &
Axx,Axy,Axz,Ayy,Ayz,Azz, &
Gamx,Gamy,Gamz,&
Lap,betax,betay,betaz,rho,Sx,Sy,Sz,&
Gamxxx, Gamxxy, Gamxxz,Gamxyy, Gamxyz, Gamxzz, &
Gamyxx, Gamyxy, Gamyxz,Gamyyy, Gamyyz, Gamyzz, &
Gamzxx, Gamzxy, Gamzxz,Gamzyy, Gamzyz, Gamzzz, &
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
Hcon,Mxcon,Mycon,Mzcon,Gmxcon,Gmycon,Gmzcon, &
Symmetry,Lev,sst)
#endif
gont = 0

View File

@@ -280,9 +280,9 @@ namespace rhs_kernel_timing_report
//================================================================================================
bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei,
bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei,
double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
int Symmetryi, int checkruni, char *checkfilenamei,
int Symmetryi, int checkruni, char *checkfilenamei,
double numepssi, double numepsbi, double numepshi,
int a_levi, int maxli, int decni, double maxrexi, double drexi)
: Courant(Couranti), StartTime(StartTimei), TotalTime(TotalTimei),
@@ -6074,7 +6074,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
//
// SynchList_cor old -----------
{
#if (ABEtype == 1 || ABEtype == 2)
#if (ABEtype == 1)
#if (PSTR == 1 || PSTR == 2)
// stringstream a_stream;
// a_stream.setf(ios::left);
@@ -6294,7 +6294,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#endif
}
sync_evolution(lev, SL, sync_cache_rp_fine);
sync_evolution(lev, SL, sync_cache_rp_fine);
#if (PSTR == 1 || PSTR == 2)
// a_stream.clear();
@@ -6323,7 +6323,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
//
// SynchList_cor old -----------
{
#if (ABEtype == 1 || ABEtype == 2)
#if (ABEtype == 1)
if (lev >= GH->levels - 1)
return;
lev = lev + 1;
@@ -6433,17 +6433,17 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
}
#if (RPB == 0)
restrict_evolution(lev, SL, SynchList_pre);
restrict_evolution(lev, SL, SynchList_pre);
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
#endif
sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
#if (RPB == 0)
#if (MIXOUTB == 0)
outbdlow2hi_evolution(lev, SynchList_pre, SL);
outbdlow2hi_evolution(lev, SynchList_pre, SL);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#endif
@@ -6455,17 +6455,17 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
else // no time refinement levels and for all same time levels
{
#if (RPB == 0)
restrict_evolution(lev, SL, SL);
restrict_evolution(lev, SL, SL);
#elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
#endif
sync_evolution(lev - 1, SL, sync_cache_rp_coarse);
sync_evolution(lev - 1, SL, sync_cache_rp_coarse);
#if (RPB == 0)
#if (MIXOUTB == 0)
outbdlow2hi_evolution(lev, SL, SL);
outbdlow2hi_evolution(lev, SL, SL);
#elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#endif
@@ -6475,7 +6475,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
#endif
}
#if (ABEtype == 1 || ABEtype == 2)
#if (ABEtype == 1)
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
#else
sync_evolution(lev, SL, sync_cache_rp_fine);
@@ -6493,7 +6493,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
void bssn_class::RestrictProlong(int lev, int YN, bool BB)
{
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
#if (ABEtype == 1 || ABEtype == 2)
#if (ABEtype == 1)
if (lev > 0)
{
MyList<Patch> *Pp, *Ppc;
@@ -8283,7 +8283,7 @@ void bssn_class::AH_Prepare_derivatives()
bool bssn_class::AH_Interp_Points(MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetryi)
double *Shellf, int Symmetryi)
{
MyList<var> *varl;
int num_var = 0;

View File

@@ -1,323 +0,0 @@
#include "macrodef.h"
#include "bssn_rhs.h"
#include "share_func.h"
#include "tool.h"
#include <cstddef>
/*
* C 版 BSSN-EM RHS kernel — replaces empart.f90 + bssn_rhs.f90 for BSSN+Maxwell.
*
* Computes:
* 1. All metric and EM field derivatives
* 2. Physical metric, Christoffel-like terms
* 3. EM field RHS (E, B, Kpsi, Kphi)
* 4. Stress-energy tensor (rho, Si, Sij)
* 5. Calls f_compute_rhs_bssn (C BSSN RHS) with stress-energy
* 6. Advection + KO dissipation for EM fields
* 7. NaN check
*/
int f_compute_rhs_bssn_em_c(int *ex, double &T,
double *X, double *Y, double *Z,
double *chi, double *trK,
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
double *Gamx, double *Gamy, double *Gamz,
double *Lap, double *betax, double *betay, double *betaz,
double *dtSfx, double *dtSfy, double *dtSfz,
double *Ex, double *Ey, double *Ez,
double *Bx, double *By, double *Bz,
double *Kpsi, double *Kphi,
double *Jx, double *Jy, double *Jz, double *qchar,
double *chi_rhs, double *trK_rhs,
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs,
double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs,
double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
double *Ex_rhs, double *Ey_rhs, double *Ez_rhs,
double *Bx_rhs, double *By_rhs, double *Bz_rhs,
double *Kpsi_rhs, double *Kphi_rhs,
double *rho, double *Sx, double *Sy, double *Sz,
double *Sxx, double *Sxy, double *Sxz,
double *Syy, double *Syz, double *Szz,
double *Gamxxx, double *Gamxxy, double *Gamxxz,
double *Gamxyy, double *Gamxyz, double *Gamxzz,
double *Gamyxx, double *Gamyxy, double *Gamyxz,
double *Gamyyy, double *Gamyyz, double *Gamyzz,
double *Gamzxx, double *Gamzxy, double *Gamzxz,
double *Gamzyy, double *Gamzyz, double *Gamzzz,
double *Rxx, double *Rxy, double *Rxz,
double *Ryy, double *Ryz, double *Rzz,
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
int &Symmetry, int &Lev, double &eps, int &co)
{
(void)T;
int gont = 0;
const int nx = ex[0], ny = ex[1], nz = ex[2];
const int all = nx * ny * nz;
const size_t n = (size_t)all;
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0, FOUR = 4.0, EIT = 8.0;
const double HALF = 0.5, THR = 3.0, F3o2 = 1.5, PI = 3.14159265358979323846;
const double SYM = 1.0, ANTI = -1.0;
const double kappa = 1.0;
const double SSS[3]={SYM,SYM,SYM}, AAS[3]={ANTI,ANTI,SYM};
const double ASA[3]={ANTI,SYM,ANTI}, SAA[3]={SYM,ANTI,ANTI};
const double ASS[3]={ANTI,SYM,SYM}, SAS[3]={SYM,ANTI,SYM};
const double SSA[3]={SYM,SYM,ANTI};
/* ---- allocate temporary arrays ---- */
double *chix = (double*)malloc(n*sizeof(double));
double *chiy = (double*)malloc(n*sizeof(double));
double *chiz = (double*)malloc(n*sizeof(double));
double *Exx=(double*)malloc(n*sizeof(double)),*Exy=(double*)malloc(n*sizeof(double)),*Exz=(double*)malloc(n*sizeof(double));
double *Eyx=(double*)malloc(n*sizeof(double)),*Eyy=(double*)malloc(n*sizeof(double)),*Eyz=(double*)malloc(n*sizeof(double));
double *Ezx=(double*)malloc(n*sizeof(double)),*Ezy=(double*)malloc(n*sizeof(double)),*Ezz=(double*)malloc(n*sizeof(double));
double *Bxx=(double*)malloc(n*sizeof(double)),*Bxy=(double*)malloc(n*sizeof(double)),*Bxz=(double*)malloc(n*sizeof(double));
double *Byx=(double*)malloc(n*sizeof(double)),*Byy=(double*)malloc(n*sizeof(double)),*Byz=(double*)malloc(n*sizeof(double));
double *Bzx=(double*)malloc(n*sizeof(double)),*Bzy=(double*)malloc(n*sizeof(double)),*Bzz=(double*)malloc(n*sizeof(double));
double *Kpsix=(double*)malloc(n*sizeof(double)),*Kpsiy=(double*)malloc(n*sizeof(double)),*Kpsiz=(double*)malloc(n*sizeof(double));
double *Kphix=(double*)malloc(n*sizeof(double)),*Kphiy=(double*)malloc(n*sizeof(double)),*Kphiz=(double*)malloc(n*sizeof(double));
double *Lapx=(double*)malloc(n*sizeof(double)),*Lapy=(double*)malloc(n*sizeof(double)),*Lapz=(double*)malloc(n*sizeof(double));
double *betaxx=(double*)malloc(n*sizeof(double)),*betaxy=(double*)malloc(n*sizeof(double)),*betaxz=(double*)malloc(n*sizeof(double));
double *betayx=(double*)malloc(n*sizeof(double)),*betayy=(double*)malloc(n*sizeof(double)),*betayz=(double*)malloc(n*sizeof(double));
double *betazx=(double*)malloc(n*sizeof(double)),*betazy=(double*)malloc(n*sizeof(double)),*betazz=(double*)malloc(n*sizeof(double));
double *gxxx=(double*)malloc(n*sizeof(double)),*gxxy=(double*)malloc(n*sizeof(double)),*gxxz=(double*)malloc(n*sizeof(double));
double *gxyx=(double*)malloc(n*sizeof(double)),*gxyy=(double*)malloc(n*sizeof(double)),*gxyz=(double*)malloc(n*sizeof(double));
double *gxzx=(double*)malloc(n*sizeof(double)),*gxzy=(double*)malloc(n*sizeof(double)),*gxzz=(double*)malloc(n*sizeof(double));
double *gyyx=(double*)malloc(n*sizeof(double)),*gyyy=(double*)malloc(n*sizeof(double)),*gyyz=(double*)malloc(n*sizeof(double));
double *gyzx=(double*)malloc(n*sizeof(double)),*gyzy=(double*)malloc(n*sizeof(double)),*gyzz=(double*)malloc(n*sizeof(double));
double *gzzx=(double*)malloc(n*sizeof(double)),*gzzy=(double*)malloc(n*sizeof(double)),*gzzz=(double*)malloc(n*sizeof(double));
double *gupxx=(double*)malloc(n*sizeof(double)),*gupxy=(double*)malloc(n*sizeof(double)),*gupxz=(double*)malloc(n*sizeof(double));
double *gupyy=(double*)malloc(n*sizeof(double)),*gupyz=(double*)malloc(n*sizeof(double)),*gupzz=(double*)malloc(n*sizeof(double));
if (!chix||!chiy||!chiz||!Exx||!Exy||!Exz||!Eyx||!Eyy||!Eyz||!Ezx||!Ezy||!Ezz||
!Bxx||!Bxy||!Bxz||!Byx||!Byy||!Byz||!Bzx||!Bzy||!Bzz||
!Kpsix||!Kpsiy||!Kpsiz||!Kphix||!Kphiy||!Kphiz||
!Lapx||!Lapy||!Lapz||
!betaxx||!betaxy||!betaxz||!betayx||!betayy||!betayz||!betazx||!betazy||!betazz||
!gxxx||!gxxy||!gxxz||!gxyx||!gxyy||!gxyz||!gxzx||!gxzy||!gxzz||
!gyyx||!gyyy||!gyyz||!gyzx||!gyzy||!gyzz||!gzzx||!gzzy||!gzzz||
!gupxx||!gupxy||!gupxz||!gupyy||!gupyz||!gupzz) {
gont = 1;
}
/* ==== 1. Compute all derivatives ==== */
if (!gont) {
/* metric derivatives */
fderivs(ex, Lap, Lapx, Lapy, Lapz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev);
fderivs(ex, betax, betaxx, betaxy, betaxz, X, Y, Z, ANTI, SYM, SYM, Symmetry, Lev);
fderivs(ex, betay, betayx, betayy, betayz, X, Y, Z, SYM, ANTI, SYM, Symmetry, Lev);
fderivs(ex, betaz, betazx, betazy, betazz, X, Y, Z, SYM, SYM, ANTI, Symmetry, Lev);
fderivs(ex, chi, chix, chiy, chiz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev);
fderivs(ex, dxx, gxxx, gxxy, gxxz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev);
fderivs(ex, gxy, gxyx, gxyy, gxyz, X, Y, Z, ANTI, ANTI, SYM, Symmetry, Lev);
fderivs(ex, gxz, gxzx, gxzy, gxzz, X, Y, Z, ANTI, SYM, ANTI, Symmetry, Lev);
fderivs(ex, dyy, gyyx, gyyy, gyyz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev);
fderivs(ex, gyz, gyzx, gyzy, gyzz, X, Y, Z, SYM, ANTI, ANTI, Symmetry, Lev);
fderivs(ex, dzz, gzzx, gzzy, gzzz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev);
/* EM field derivatives */
fderivs(ex, Kpsi, Kpsix, Kpsiy, Kpsiz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev);
fderivs(ex, Kphi, Kphix, Kphiy, Kphiz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev);
fderivs(ex, Ex, Exx, Exy, Exz, X, Y, Z, ANTI, SYM, SYM, Symmetry, Lev);
fderivs(ex, Ey, Eyx, Eyy, Eyz, X, Y, Z, SYM, ANTI, SYM, Symmetry, Lev);
fderivs(ex, Ez, Ezx, Ezy, Ezz, X, Y, Z, SYM, SYM, ANTI, Symmetry, Lev);
fderivs(ex, Bx, Bxx, Bxy, Bxz, X, Y, Z, SYM, ANTI, ANTI, Symmetry, Lev);
fderivs(ex, By, Byx, Byy, Byz, X, Y, Z, ANTI, SYM, ANTI, Symmetry, Lev);
fderivs(ex, Bz, Bzx, Bzy, Bzz, X, Y, Z, ANTI, ANTI, SYM, Symmetry, Lev);
/* ==== 2. Compute EM RHS and stress-energy ==== */
const double F1o4PI = ONE / (FOUR * PI);
for (size_t i = 0; i < n; ++i) {
const double alpn1 = Lap[i] + ONE;
const double chin1 = chi[i] + ONE;
const double chi3o2 = sqrt(chin1) * chin1; // chi^{3/2}
const double ichi = ONE / chin1;
/* physical metric */
const double pgxx = (dxx[i] + ONE) * ichi;
const double pgyy = (dyy[i] + ONE) * ichi;
const double pgzz = (dzz[i] + ONE) * ichi;
const double pgxy = gxy[i] * ichi;
const double pgxz = gxz[i] * ichi;
const double pgyz = gyz[i] * ichi;
/* inverse physical metric */
const double det = pgxx * pgyy * pgzz + pgxy * pgyz * pgxz + pgxz * pgxy * pgyz
- pgxz * pgyy * pgxz - pgxy * pgxy * pgzz - pgxx * pgyz * pgyz;
const double idet = ONE / det;
const double upxx = (pgyy * pgzz - pgyz * pgyz) * idet;
const double upxy = -(pgxy * pgzz - pgyz * pgxz) * idet;
const double upxz = (pgxy * pgyz - pgyy * pgxz) * idet;
const double upyy = (pgxx * pgzz - pgxz * pgxz) * idet;
const double upyz = -(pgxx * pgyz - pgxy * pgxz) * idet;
const double upzz = (pgxx * pgyy - pgxy * pgxy) * idet;
gupxx[i]=upxx; gupxy[i]=upxy; gupxz[i]=upxz;
gupyy[i]=upyy; gupyz[i]=upyz; gupzz[i]=upzz;
/* E-field RHS */
/* curl(B) part: epsilon^{ijk} ∂_j (alpha * B_k) in coordinate basis */
/* Using lower-index B fields: B_i_lower = pg_{ij} * B^j */
const double BxL = pgxx*Bx[i] + pgxy*By[i] + pgxz*Bz[i];
const double ByL = pgxy*Bx[i] + pgyy*By[i] + pgyz*Bz[i];
const double BzL = pgxz*Bx[i] + pgyz*By[i] + pgzz*Bz[i];
/* Physical metric derivatives (chain rule from conformal) */
const double pgxx_x = (gxxx[i] - pgxx*chix[i]) * ichi;
/* const double pgxx_y = (gxxy[i] - pgxx*chiy[i]) * ichi; */
const double pgxy_x = (gxyx[i] - pgxy*chix[i]) * ichi;
const double pgxy_y = (gxyy[i] - pgxy*chiy[i]) * ichi;
const double pgxz_x = (gxzx[i] - pgxz*chix[i]) * ichi;
const double pgxz_z = (gxzz[i] - pgxz*chiz[i]) * ichi;
const double pgyy_y = (gyyy[i] - pgyy*chiy[i]) * ichi;
const double pgyz_y = (gyzy[i] - pgyz*chiy[i]) * ichi;
const double pgyz_z = (gyzz[i] - pgyz*chiz[i]) * ichi;
const double pgzz_z = (gzzz[i] - pgzz*chiz[i]) * ichi;
/* Curl_x(B) = ∂_y (alpha*BzL) - ∂_z (alpha*ByL) */
const double aBx = alpn1*BxL, aBy = alpn1*ByL, aBz = alpn1*BzL;
const double curlBx = (aBz*Lapy[i] + alpn1*(pgxz*Bxy[i]+pgyz*Byy[i]+pgzz*Bzy[i]) + alpn1*(Bx[i]*gxzy[i]+By[i]*gyzy[i]+Bz[i]*gzzy[i]))
- (aBy*Lapz[i] + alpn1*(pgxy*Bxz[i]+pgyy*Byz[i]+pgyz*Bzz[i]) + alpn1*(Bx[i]*gxyz[i]+By[i]*gyyz[i]+Bz[i]*gyzz[i]));
double curlBy = (aBx*Lapz[i] + alpn1*(pgxx*Bxz[i]+pgxy*Byz[i]+pgxz*Bzz[i]) + alpn1*(Bx[i]*gxxz[i]+By[i]*gxyz[i]+Bz[i]*gxzz[i]))
- (aBz*Lapx[i] + alpn1*(pgxz*Bxx[i]+pgyz*Byx[i]+pgzz*Bzx[i]) + alpn1*(Bx[i]*gxzx[i]+By[i]*gyzx[i]+Bz[i]*gzzx[i]));
double curlBz = (aBy*Lapx[i] + alpn1*(pgxy*Bxx[i]+pgyy*Byx[i]+pgyz*Bzx[i]) + alpn1*(Bx[i]*gxyx[i]+By[i]*gyyx[i]+Bz[i]*gyzx[i]))
- (aBx*Lapy[i] + alpn1*(pgxx*Bxy[i]+pgxy*Byy[i]+pgxz*Bzy[i]) + alpn1*(Bx[i]*gxxy[i]+By[i]*gxyy[i]+Bz[i]*gxzy[i]));
/* Advection part: -beta^j * ∂_j E^i */
const double advEx = Ex[i]*betaxx[i] + Ey[i]*betaxy[i] + Ez[i]*betaxz[i];
const double advEy = Ex[i]*betayx[i] + Ey[i]*betayy[i] + Ez[i]*betayz[i];
const double advEz = Ex[i]*betazx[i] + Ey[i]*betazy[i] + Ez[i]*betazz[i];
/* grad(Kpsi) contracted with inverse metric */
const double gupKx = upxx*Kpsix[i] + upxy*Kpsiy[i] + upxz*Kpsiz[i];
const double gupKy = upxy*Kpsix[i] + upyy*Kpsiy[i] + upyz*Kpsiz[i];
const double gupKz = upxz*Kpsix[i] + upyz*Kpsiy[i] + upzz*Kpsiz[i];
Ex_rhs[i] = alpn1*trK[i]*Ex[i] - advEx - FOUR*PI*alpn1*Jx[i] - alpn1*gupKx + chi3o2*curlBx;
Ey_rhs[i] = alpn1*trK[i]*Ey[i] - advEy - FOUR*PI*alpn1*Jy[i] - alpn1*gupKy + chi3o2*curlBy;
Ez_rhs[i] = alpn1*trK[i]*Ez[i] - advEz - FOUR*PI*alpn1*Jz[i] - alpn1*gupKz + chi3o2*curlBz;
/* B-field RHS: similar but with -chi^{3/2} * curl(E) and grad(Kphi) */
const double ExL = pgxx*Ex[i] + pgxy*Ey[i] + pgxz*Ez[i];
const double EyL = pgxy*Ex[i] + pgyy*Ey[i] + pgyz*Ez[i];
const double EzL = pgxz*Ex[i] + pgyz*Ey[i] + pgzz*Ez[i];
const double aEx = alpn1*ExL, aEy = alpn1*EyL, aEz = alpn1*EzL;
const double curlEx = (aEz*Lapy[i] + alpn1*(pgxz*Exy[i]+pgyz*Eyy[i]+pgzz*Ezy[i]) + alpn1*(Ex[i]*gxzy[i]+Ey[i]*gyzy[i]+Ez[i]*gzzy[i]))
- (aEy*Lapz[i] + alpn1*(pgxy*Exz[i]+pgyy*Eyz[i]+pgyz*Ezz[i]) + alpn1*(Ex[i]*gxyz[i]+Ey[i]*gyyz[i]+Ez[i]*gyzz[i]));
double curlEy = (aEx*Lapz[i] + alpn1*(pgxx*Exz[i]+pgxy*Eyz[i]+pgxz*Ezz[i]) + alpn1*(Ex[i]*gxxz[i]+Ey[i]*gxyz[i]+Ez[i]*gxzz[i]))
- (aEz*Lapx[i] + alpn1*(pgxz*Exx[i]+pgyz*Eyx[i]+pgzz*Ezx[i]) + alpn1*(Ex[i]*gxzx[i]+Ey[i]*gyzx[i]+Ez[i]*gzzx[i]));
double curlEz = (aEy*Lapx[i] + alpn1*(pgxy*Exx[i]+pgyy*Eyx[i]+pgyz*Ezx[i]) + alpn1*(Ex[i]*gxyx[i]+Ey[i]*gyyx[i]+Ez[i]*gyzx[i]))
- (aEx*Lapy[i] + alpn1*(pgxx*Exy[i]+pgxy*Eyy[i]+pgxz*Ezy[i]) + alpn1*(Ex[i]*gxxy[i]+Ey[i]*gxyy[i]+Ez[i]*gxzy[i]));
const double advBx = Bx[i]*betaxx[i] + By[i]*betaxy[i] + Bz[i]*betaxz[i];
const double advBy = Bx[i]*betayx[i] + By[i]*betayy[i] + Bz[i]*betayz[i];
const double advBz = Bx[i]*betazx[i] + By[i]*betazy[i] + Bz[i]*betazz[i];
const double gupKphix = upxx*Kphix[i] + upxy*Kphiy[i] + upxz*Kphiz[i];
const double gupKphiy = upxy*Kphix[i] + upyy*Kphiy[i] + upyz*Kphiz[i];
const double gupKphiz = upxz*Kphix[i] + upyz*Kphiy[i] + upzz*Kphiz[i];
Bx_rhs[i] = alpn1*trK[i]*Bx[i] - advBx - alpn1*gupKphix - chi3o2*curlEx;
By_rhs[i] = alpn1*trK[i]*By[i] - advBy - alpn1*gupKphiy - chi3o2*curlEy;
Bz_rhs[i] = alpn1*trK[i]*Bz[i] - advBz - alpn1*gupKphiz - chi3o2*curlEz;
/* Scalar potential RHS */
const double divE = Exx[i] + Eyy[i] + Ezz[i];
const double divB = Bxx[i] + Byy[i] + Bzz[i];
const double chiCont = F3o2 * ichi * (chix[i]*Ex[i] + chiy[i]*Ey[i] + chiz[i]*Ez[i]);
Kpsi_rhs[i] = FOUR*PI*alpn1*qchar[i] - alpn1*kappa*Kpsi[i] - alpn1*(divE - chiCont);
Kphi_rhs[i] = -alpn1*kappa*Kphi[i] - alpn1*(divB - F3o2*ichi*(chix[i]*Bx[i] + chiy[i]*By[i] + chiz[i]*Bz[i]));
/* Stress-energy tensor */
const double E2 = pgxx*Ex[i]*Ex[i] + pgyy*Ey[i]*Ey[i] + pgzz*Ez[i]*Ez[i]
+ TWO*(pgxy*Ex[i]*Ey[i] + pgxz*Ex[i]*Ez[i] + pgyz*Ey[i]*Ez[i]);
const double B2 = pgxx*Bx[i]*Bx[i] + pgyy*By[i]*By[i] + pgzz*Bz[i]*Bz[i]
+ TWO*(pgxy*Bx[i]*By[i] + pgxz*Bx[i]*Bz[i] + pgyz*By[i]*Bz[i]);
rho[i] = (E2 + B2) / (EIT * PI);
const double ichi3o2 = ONE / chi3o2;
Sx[i] = (Ey[i]*Bz[i] - Ez[i]*By[i]) * F1o4PI * ichi3o2;
Sy[i] = (Ez[i]*Bx[i] - Ex[i]*Bz[i]) * F1o4PI * ichi3o2;
Sz[i] = (Ex[i]*By[i] - Ey[i]*Bx[i]) * F1o4PI * ichi3o2;
const double lExi = pgxx*Ex[i] + pgxy*Ey[i] + pgxz*Ez[i];
const double lEyi = pgxy*Ex[i] + pgyy*Ey[i] + pgyz*Ez[i];
const double lEzi = pgxz*Ex[i] + pgyz*Ey[i] + pgzz*Ez[i];
const double lBxi = pgxx*Bx[i] + pgxy*By[i] + pgxz*Bz[i];
const double lByi = pgxy*Bx[i] + pgyy*By[i] + pgyz*Bz[i];
const double lBzi = pgxz*Bx[i] + pgyz*By[i] + pgzz*Bz[i];
Sxx[i] = rho[i]*pgxx - (lExi*lExi + lBxi*lBxi) * F1o4PI;
Sxy[i] = rho[i]*pgxy - (lExi*lEyi + lBxi*lByi) * F1o4PI;
Sxz[i] = rho[i]*pgxz - (lExi*lEzi + lBxi*lBzi) * F1o4PI;
Syy[i] = rho[i]*pgyy - (lEyi*lEyi + lByi*lByi) * F1o4PI;
Syz[i] = rho[i]*pgyz - (lEyi*lEzi + lByi*lBzi) * F1o4PI;
Szz[i] = rho[i]*pgzz - (lEzi*lEzi + lBzi*lBzi) * F1o4PI;
}
/* ==== 3. Call BSSN RHS with EM stress-energy ==== */
gont = f_compute_rhs_bssn(ex, T, X, Y, Z,
chi, trK, dxx, gxy, gxz, dyy, gyz, dzz,
Axx, Axy, Axz, Ayy, Ayz, Azz,
Gamx, Gamy, Gamz, Lap, betax, betay, betaz, dtSfx, dtSfy, dtSfz,
chi_rhs, trK_rhs,
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
Gamx_rhs, Gamy_rhs, Gamz_rhs, Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
rho, Sx, Sy, Sz, Sxx, Sxy, Sxz, Syy, Syz, Szz,
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
ham_Res, movx_Res, movy_Res, movz_Res,
Gmx_Res, Gmy_Res, Gmz_Res,
Symmetry, Lev, eps, co);
if (!gont) {
/* ==== 4. Advection terms for EM fields ==== */
lopsided(ex, X, Y, Z, Kpsi, Kpsi_rhs, betax, betay, betaz, Symmetry, SSS);
lopsided(ex, X, Y, Z, Kphi, Kphi_rhs, betax, betay, betaz, Symmetry, SSS);
lopsided(ex, X, Y, Z, Ex, Ex_rhs, betax, betay, betaz, Symmetry, ASS);
lopsided(ex, X, Y, Z, Ey, Ey_rhs, betax, betay, betaz, Symmetry, SAS);
lopsided(ex, X, Y, Z, Ez, Ez_rhs, betax, betay, betaz, Symmetry, SSA);
lopsided(ex, X, Y, Z, Bx, Bx_rhs, betax, betay, betaz, Symmetry, SAA);
lopsided(ex, X, Y, Z, By, By_rhs, betax, betay, betaz, Symmetry, ASA);
lopsided(ex, X, Y, Z, Bz, Bz_rhs, betax, betay, betaz, Symmetry, AAS);
/* ==== 5. KO dissipation for EM fields ==== */
if (eps > ZEO) {
kodis(ex, X, Y, Z, Kpsi, Kpsi_rhs, SSS, Symmetry, eps);
kodis(ex, X, Y, Z, Kphi, Kphi_rhs, SSS, Symmetry, eps);
kodis(ex, X, Y, Z, Ex, Ex_rhs, ASS, Symmetry, eps);
kodis(ex, X, Y, Z, Ey, Ey_rhs, SAS, Symmetry, eps);
kodis(ex, X, Y, Z, Ez, Ez_rhs, SSA, Symmetry, eps);
kodis(ex, X, Y, Z, Bx, Bx_rhs, SAA, Symmetry, eps);
kodis(ex, X, Y, Z, By, By_rhs, ASA, Symmetry, eps);
kodis(ex, X, Y, Z, Bz, Bz_rhs, AAS, Symmetry, eps);
}
/* ==== 6. NaN check ==== */
for (int i = 0; i < all; ++i) {
if (!isfinite(Ex_rhs[i]+Ey_rhs[i]+Ez_rhs[i]+Bx_rhs[i]+By_rhs[i]+Bz_rhs[i]+Kpsi_rhs[i]+Kphi_rhs[i])) {
gont = 1; break;
}
}
} /* inner if (!gont) */
} /* outer if (!gont) */
free(chix);free(chiy);free(chiz);
free(Exx);free(Exy);free(Exz);free(Eyx);free(Eyy);free(Eyz);free(Ezx);free(Ezy);free(Ezz);
free(Bxx);free(Bxy);free(Bxz);free(Byx);free(Byy);free(Byz);free(Bzx);free(Bzy);free(Bzz);
free(Kpsix);free(Kpsiy);free(Kpsiz);
free(Kphix);free(Kphiy);free(Kphiz);
free(Lapx);free(Lapy);free(Lapz);
free(betaxx);free(betaxy);free(betaxz);free(betayx);free(betayy);free(betayz);free(betazx);free(betazy);free(betazz);
free(gxxx);free(gxxy);free(gxxz);free(gxyx);free(gxyy);free(gxyz);free(gxzx);free(gxzy);free(gxzz);
free(gyyx);free(gyyy);free(gyyz);free(gyzx);free(gyzy);free(gyzz);free(gzzx);free(gzzy);free(gzzz);
free(gupxx);free(gupxy);free(gupxz);free(gupyy);free(gupyz);free(gupzz);
return gont;
}

View File

@@ -22,32 +22,32 @@
#define f_compute_rhs_Z4c_ss COMPUTE_RHS_Z4C_SS
#define f_compute_constraint_fr COMPUTE_CONSTRAINT_FR
#endif
#ifdef fortran3
#define f_compute_rhs_bssn compute_rhs_bssn_
#ifdef fortran3
#define f_compute_rhs_bssn compute_rhs_bssn_
#define f_compute_rhs_bssn_ss compute_rhs_bssn_ss_
#define f_compute_rhs_bssn_escalar compute_rhs_bssn_escalar_
#define f_compute_rhs_bssn_escalar_ss compute_rhs_bssn_escalar_ss_
#define f_compute_rhs_Z4c compute_rhs_z4c_
#define f_compute_rhs_Z4cnot compute_rhs_z4cnot_
#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
#define f_compute_constraint_fr compute_constraint_fr_
#endif
#ifdef __cplusplus
extern "C"
{
#endif
void f_bssn_rhs_kernel_timing_reset();
int f_bssn_rhs_kernel_timing_bucket_count();
const double *f_bssn_rhs_kernel_timing_local_seconds();
const char *f_bssn_rhs_kernel_timing_label(int);
#ifdef __cplusplus
}
#endif
extern "C"
{
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
#define f_compute_constraint_fr compute_constraint_fr_
#endif
#ifdef __cplusplus
extern "C"
{
#endif
void f_bssn_rhs_kernel_timing_reset();
int f_bssn_rhs_kernel_timing_bucket_count();
const double *f_bssn_rhs_kernel_timing_local_seconds();
const char *f_bssn_rhs_kernel_timing_label(int);
#ifdef __cplusplus
}
#endif
extern "C"
{
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
double *, double *, // chi, trK
double *, double *, double *, double *, double *, double *, // gij
double *, double *, double *, double *, double *, double *, // Aij
@@ -63,34 +63,34 @@ extern "C"
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Ricci
double *, double *, double *, double *, double *, double *, double *, // constraint violation
int &, int &, double &, int &);
}
int f_compute_rhs_bssn_escalar_c(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
double *, double *, // chi, trK
double *, double *, double *, double *, double *, double *, // gij
double *, double *, double *, double *, double *, double *, // Aij
double *, double *, double *, // Gam
double *, double *, double *, double *, double *, double *, double *, // Gauge
double *, double *, // Sphi, Spi
double *, double *, // chi, trK
double *, double *, double *, double *, double *, double *, // gij
double *, double *, double *, double *, double *, double *, // Aij
double *, double *, double *, // Gam
double *, double *, double *, double *, double *, double *, double *, // Gauge
double *, double *, // Sphi, Spi
double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Ricci
double *, double *, double *, double *, double *, double *, double *, // constraint violation
int &, int &, double &, int &);
extern "C"
{
int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R
double *, double *, double *, double *, double *, double *, double *, // constraint violation
int &, int &, double &, int &);
}
int f_compute_rhs_bssn_escalar_c(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
double *, double *, // chi, trK
double *, double *, double *, double *, double *, double *, // gij
double *, double *, double *, double *, double *, double *, // Aij
double *, double *, double *, // Gam
double *, double *, double *, double *, double *, double *, double *, // Gauge
double *, double *, // Sphi, Spi
double *, double *, // chi, trK
double *, double *, double *, double *, double *, double *, // gij
double *, double *, double *, double *, double *, double *, // Aij
double *, double *, double *, // Gam
double *, double *, double *, double *, double *, double *, double *, // Gauge
double *, double *, // Sphi, Spi
double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Ricci
double *, double *, double *, double *, double *, double *, double *, // constraint violation
int &, int &, double &, int &);
extern "C"
{
int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R
double *, double *, double *, // X,Y,Z
double *, double *, double *, // drhodx,drhody,drhodz
double *, double *, double *, // dsigmadx,dsigmady,dsigmadz
@@ -117,10 +117,10 @@ extern "C"
int &, int &, double &, int &, int &);
}
extern "C"
{
int f_compute_rhs_bssn_escalar(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
double *, double *, // chi, trK
extern "C"
{
int f_compute_rhs_bssn_escalar(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
double *, double *, // chi, trK
double *, double *, double *, double *, double *, double *, // gij
double *, double *, double *, double *, double *, double *, // Aij
double *, double *, double *, // Gam
@@ -137,14 +137,14 @@ extern "C"
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Christoffel
double *, double *, double *, double *, double *, double *, // Ricci
double *, double *, double *, double *, double *, double *, double *, // constraint violation
int &, int &, double &, int &);
}
extern "C"
{
int f_compute_rhs_bssn_escalar_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R
double *, double *, double *, // X,Y,Z
double *, double *, double *, double *, double *, double *, double *, // constraint violation
int &, int &, double &, int &);
}
extern "C"
{
int f_compute_rhs_bssn_escalar_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R
double *, double *, double *, // X,Y,Z
double *, double *, double *, // drhodx,drhody,drhodz
double *, double *, double *, // dsigmadx,dsigmady,dsigmadz
double *, double *, double *, // dRdx,dRdy,dRdz
@@ -262,31 +262,4 @@ extern "C"
double *);
} // FR_cons
// BSSN-EM C kernel (replaces empart.f90 + bssn_rhs.f90 for BSSN+Maxwell)
int f_compute_rhs_bssn_em_c(int *, double &, double *, double *, double *,
double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *,
double *, double *, double *, double *, double *, double *, double *,
double *, double *, double *,
double *, double *, double *, double *, double *, double *, double *, double *,
double *, double *, double *,
double *, double *,
double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *,
double *, double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *, double *, double *,
double *, double *, double *,
double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
int &, int &, double &, int &);
#endif /* BSSN_H */

View File

@@ -1075,10 +1075,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
}
#endif
#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5)
fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
#endif
for (int i = 0; i < all; i += 1) {
#if (GAUGE == 0)
betax_rhs[i] = FF * dtSfx[i];
@@ -1164,17 +1160,11 @@ int f_compute_rhs_bssn(int *ex, double &T,
lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps);
lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps);
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps);
#endif
lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps);
#endif
lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps);
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
#endif
lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps);
lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps);
lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps);

File diff suppressed because it is too large Load Diff

View File

@@ -1,321 +0,0 @@
#include "macrodef.h"
#include "share_func.h"
/*
* fdderivs_sh — second derivatives on shell patch in (rho, sigma, R) coords.
* Same stencil coefficients as Cartesian fdderivs. Uses symmetry_stbd.
*/
extern "C" void fdderivs_sh_(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff, int sst)
{
(void)SYM3; (void)onoff; (void)sst;
const int NO_SYMM=0, EQ_SYMM=1, OCTANT=2;
const double ZEO=0.0, ONE=1.0, TWO=2.0, F1o4=2.5e-1;
const double F8=8.0, F16=16.0, F30=30.0, F1o12=ONE/12.0, F1o144=ONE/144.0;
const double F9=9.0, F45=45.0, F60=60.0, F27=27.0, F270=270.0, F490=490.0;
const double F1o180=ONE/180.0, F1o3600=ONE/3600.0;
const double F32=32.0, F128=128.0, F168=168.0, F672=672.0, F840=840.0;
const double F1008=1008.0, F8064=8064.0, F14350=14350.0;
const double F1o5040=ONE/5040.0, F1o705600=ONE/705600.0;
const int ex1=ex[0], ex2=ex[1], ex3=ex[2];
const double dX=X[1]-X[0], dY=Y[1]-Y[0], dZ=Z[1]-Z[0];
const int imaxF=ex1, jmaxF=ex2, kmaxF=ex3;
const double SoA[2]={SYM1,SYM2};
#if (ghost_width == 2)
{
const int ord=1;
int iminF=1,jminF=1,kminF=1;
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=0;
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=0;
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=0;
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
static double *fh_buf=NULL;static size_t cap=0;
if(fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;}
double *fh=fh_buf;if(!fh)return;
symmetry_stbd(ord,ex,f,fh,SoA);
const double Sdxdx=ONE/(dX*dX),Sdydy=ONE/(dY*dY),Sdzdz=ONE/(dZ*dZ);
const double Sdxdy=F1o4/(dX*dY),Sdxdz=F1o4/(dX*dZ),Sdydz=F1o4/(dY*dZ);
const size_t all=(size_t)ex1*ex2*ex3;
for(size_t p=0;p<all;++p){fxx[p]=fyy[p]=fzz[p]=ZEO;fxy[p]=fxz[p]=fyz[p]=ZEO;}
const int i2_lo=(iminF>0)?iminF:0,j2_lo=(jminF>0)?jminF:0,k2_lo=1,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2;
#define FH(iF,jF,kF) fh[idx_fh_stbd(iF,jF,kF,ord,ex)]
if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){
for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1;
for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1;
for(int i0=i2_lo;i0<=i2_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fxx[p]=Sdxdx*(FH(iF-1,jF,kF)-TWO*FH(iF,jF,kF)+FH(iF+1,jF,kF));
fyy[p]=Sdydy*(FH(iF,jF-1,kF)-TWO*FH(iF,jF,kF)+FH(iF,jF+1,kF));
fzz[p]=Sdzdz*(FH(iF,jF,kF-1)-TWO*FH(iF,jF,kF)+FH(iF,jF,kF+1));
fxy[p]=Sdxdy*(FH(iF-1,jF-1,kF)-FH(iF+1,jF-1,kF)-FH(iF-1,jF+1,kF)+FH(iF+1,jF+1,kF));
fxz[p]=Sdxdz*(FH(iF-1,jF,kF-1)-FH(iF+1,jF,kF-1)-FH(iF-1,jF,kF+1)+FH(iF+1,jF,kF+1));
fyz[p]=Sdydz*(FH(iF,jF-1,kF-1)-FH(iF,jF+1,kF-1)-FH(iF,jF-1,kF+1)+FH(iF,jF+1,kF+1));
}}}
}
#undef FH
return;
}
#elif (ghost_width == 3)
{
const int ord=2;
int iminF=1,jminF=1,kminF=1;
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-1;
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-1;
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-1;
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
static double *fh_buf=NULL;static size_t cap=0;
if(fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;}
double *fh=fh_buf;if(!fh)return;
symmetry_stbd(ord,ex,f,fh,SoA);
const double Sdxdx=ONE/(dX*dX),Sdydy=ONE/(dY*dY),Sdzdz=ONE/(dZ*dZ);
const double Fdxdx=F1o12/(dX*dX),Fdydy=F1o12/(dY*dY),Fdzdz=F1o12/(dZ*dZ);
const double Sdxdy=F1o4/(dX*dY),Sdxdz=F1o4/(dX*dZ),Sdydz=F1o4/(dY*dZ);
const double Fdxdy=F1o144/(dX*dY),Fdxdz=F1o144/(dX*dZ),Fdydz=F1o144/(dY*dZ);
const size_t all=(size_t)ex1*ex2*ex3;
for(size_t p=0;p<all;++p){fxx[p]=fyy[p]=fzz[p]=fxy[p]=fxz[p]=fyz[p]=ZEO;}
const int i2_lo=(iminF>0)?iminF:0,j2_lo=(jminF>0)?jminF:0,k2_lo=1,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2;
const int i4_lo=(iminF+1>0)?iminF+1:0,j4_lo=(jminF+1>0)?jminF+1:0,k4_lo=2,i4_hi=ex1-3,j4_hi=ex2-3,k4_hi=ex3-3;
const int has4=(i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi);
#define FH(iF,jF,kF) fh[idx_fh_stbd(iF,jF,kF,ord,ex)]
if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){
for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1;
for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1;
for(int i0=i2_lo;i0<=i2_hi;++i0){
if(has4&&i0>=i4_lo&&i0<=i4_hi&&j0>=j4_lo&&j0<=j4_hi&&k0>=k4_lo&&k0<=k4_hi)continue;
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fxx[p]=Sdxdx*(FH(iF-1,jF,kF)-TWO*FH(iF,jF,kF)+FH(iF+1,jF,kF));
fyy[p]=Sdydy*(FH(iF,jF-1,kF)-TWO*FH(iF,jF,kF)+FH(iF,jF+1,kF));
fzz[p]=Sdzdz*(FH(iF,jF,kF-1)-TWO*FH(iF,jF,kF)+FH(iF,jF,kF+1));
fxy[p]=Sdxdy*(FH(iF-1,jF-1,kF)-FH(iF+1,jF-1,kF)-FH(iF-1,jF+1,kF)+FH(iF+1,jF+1,kF));
fxz[p]=Sdxdz*(FH(iF-1,jF,kF-1)-FH(iF+1,jF,kF-1)-FH(iF-1,jF,kF+1)+FH(iF+1,jF,kF+1));
fyz[p]=Sdydz*(FH(iF,jF-1,kF-1)-FH(iF,jF+1,kF-1)-FH(iF,jF-1,kF+1)+FH(iF,jF+1,kF+1));
}}}
}
if(has4){
for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1;
for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1;
for(int i0=i4_lo;i0<=i4_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fxx[p]=Fdxdx*(-FH(iF-2,jF,kF)+F16*FH(iF-1,jF,kF)-F30*FH(iF,jF,kF)-FH(iF+2,jF,kF)+F16*FH(iF+1,jF,kF));
fyy[p]=Fdydy*(-FH(iF,jF-2,kF)+F16*FH(iF,jF-1,kF)-F30*FH(iF,jF,kF)-FH(iF,jF+2,kF)+F16*FH(iF,jF+1,kF));
fzz[p]=Fdzdz*(-FH(iF,jF,kF-2)+F16*FH(iF,jF,kF-1)-F30*FH(iF,jF,kF)-FH(iF,jF,kF+2)+F16*FH(iF,jF,kF+1));
{const double t_jm2=(FH(iF-2,jF-2,kF)-F8*FH(iF-1,jF-2,kF)+F8*FH(iF+1,jF-2,kF)-FH(iF+2,jF-2,kF));
const double t_jm1=(FH(iF-2,jF-1,kF)-F8*FH(iF-1,jF-1,kF)+F8*FH(iF+1,jF-1,kF)-FH(iF+2,jF-1,kF));
const double t_jp1=(FH(iF-2,jF+1,kF)-F8*FH(iF-1,jF+1,kF)+F8*FH(iF+1,jF+1,kF)-FH(iF+2,jF+1,kF));
const double t_jp2=(FH(iF-2,jF+2,kF)-F8*FH(iF-1,jF+2,kF)+F8*FH(iF+1,jF+2,kF)-FH(iF+2,jF+2,kF));
fxy[p]=Fdxdy*(t_jm2-F8*t_jm1+F8*t_jp1-t_jp2);}
{const double t_km2=(FH(iF-2,jF,kF-2)-F8*FH(iF-1,jF,kF-2)+F8*FH(iF+1,jF,kF-2)-FH(iF+2,jF,kF-2));
const double t_km1=(FH(iF-2,jF,kF-1)-F8*FH(iF-1,jF,kF-1)+F8*FH(iF+1,jF,kF-1)-FH(iF+2,jF,kF-1));
const double t_kp1=(FH(iF-2,jF,kF+1)-F8*FH(iF-1,jF,kF+1)+F8*FH(iF+1,jF,kF+1)-FH(iF+2,jF,kF+1));
const double t_kp2=(FH(iF-2,jF,kF+2)-F8*FH(iF-1,jF,kF+2)+F8*FH(iF+1,jF,kF+2)-FH(iF+2,jF,kF+2));
fxz[p]=Fdxdz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
{const double t_km2=(FH(iF,jF-2,kF-2)-F8*FH(iF,jF-1,kF-2)+F8*FH(iF,jF+1,kF-2)-FH(iF,jF+2,kF-2));
const double t_km1=(FH(iF,jF-2,kF-1)-F8*FH(iF,jF-1,kF-1)+F8*FH(iF,jF+1,kF-1)-FH(iF,jF+2,kF-1));
const double t_kp1=(FH(iF,jF-2,kF+1)-F8*FH(iF,jF-1,kF+1)+F8*FH(iF,jF+1,kF+1)-FH(iF,jF+2,kF+1));
const double t_kp2=(FH(iF,jF-2,kF+2)-F8*FH(iF,jF-1,kF+2)+F8*FH(iF,jF+1,kF+2)-FH(iF,jF+2,kF+2));
fyz[p]=Fdydz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
}}}
}
#undef FH
return;
}
#elif (ghost_width == 4)
{
const int ord=3;
int iminF=1,jminF=1,kminF=1;
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-2;
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-2;
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-2;
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
static double *fh_buf=NULL;static size_t cap=0;
if(fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;}
double *fh=fh_buf;if(!fh)return;
symmetry_stbd(ord,ex,f,fh,SoA);
const double Sdxdx=ONE/(dX*dX),Sdydy=ONE/(dY*dY),Sdzdz=ONE/(dZ*dZ);
const double Fdxdx=F1o12/(dX*dX),Fdydy=F1o12/(dY*dY),Fdzdz=F1o12/(dZ*dZ);
const double Xdxdx=F1o180/(dX*dX),Xdydy=F1o180/(dY*dY),Xdzdz=F1o180/(dZ*dZ);
const double Sdxdy=F1o4/(dX*dY),Sdxdz=F1o4/(dX*dZ),Sdydz=F1o4/(dY*dZ);
const double Fdxdy=F1o144/(dX*dY),Fdxdz=F1o144/(dX*dZ),Fdydz=F1o144/(dY*dZ);
const double Xdxdy=F1o3600/(dX*dY),Xdxdz=F1o3600/(dX*dZ),Xdydz=F1o3600/(dY*dZ);
const size_t all=(size_t)ex1*ex2*ex3;
for(size_t p=0;p<all;++p){fxx[p]=fyy[p]=fzz[p]=fxy[p]=fxz[p]=fyz[p]=ZEO;}
const int i2_lo=(iminF>0)?iminF:0,j2_lo=(jminF>0)?jminF:0,k2_lo=1,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2;
const int i4_lo=(iminF+1>0)?iminF+1:0,j4_lo=(jminF+1>0)?jminF+1:0,k4_lo=2,i4_hi=ex1-3,j4_hi=ex2-3,k4_hi=ex3-3;
const int i6_lo=(iminF+2>0)?iminF+2:0,j6_lo=(jminF+2>0)?jminF+2:0,k6_lo=3,i6_hi=ex1-4,j6_hi=ex2-4,k6_hi=ex3-4;
const int has4=(i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi),has6=(i6_lo<=i6_hi&&j6_lo<=j6_hi&&k6_lo<=k6_hi);
#define FH(iF,jF,kF) fh[idx_fh_stbd(iF,jF,kF,ord,ex)]
if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1;
for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1;
for(int i0=i2_lo;i0<=i2_hi;++i0){bool in4=has4&&i0>=i4_lo&&i0<=i4_hi&&j0>=j4_lo&&j0<=j4_hi&&k0>=k4_lo&&k0<=k4_hi;if(in4)continue;
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fxx[p]=Sdxdx*(FH(iF-1,jF,kF)-TWO*FH(iF,jF,kF)+FH(iF+1,jF,kF));
fyy[p]=Sdydy*(FH(iF,jF-1,kF)-TWO*FH(iF,jF,kF)+FH(iF,jF+1,kF));
fzz[p]=Sdzdz*(FH(iF,jF,kF-1)-TWO*FH(iF,jF,kF)+FH(iF,jF,kF+1));
fxy[p]=Sdxdy*(FH(iF-1,jF-1,kF)-FH(iF+1,jF-1,kF)-FH(iF-1,jF+1,kF)+FH(iF+1,jF+1,kF));
fxz[p]=Sdxdz*(FH(iF-1,jF,kF-1)-FH(iF+1,jF,kF-1)-FH(iF-1,jF,kF+1)+FH(iF+1,jF,kF+1));
fyz[p]=Sdydz*(FH(iF,jF-1,kF-1)-FH(iF,jF+1,kF-1)-FH(iF,jF-1,kF+1)+FH(iF,jF+1,kF+1));
}}}}
if(has4){for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1;
for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1;
for(int i0=i4_lo;i0<=i4_hi;++i0){if(has6&&i0>=i6_lo&&i0<=i6_hi&&j0>=j6_lo&&j0<=j6_hi&&k0>=k6_lo&&k0<=k6_hi)continue;
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fxx[p]=Fdxdx*(-FH(iF-2,jF,kF)+F16*FH(iF-1,jF,kF)-F30*FH(iF,jF,kF)-FH(iF+2,jF,kF)+F16*FH(iF+1,jF,kF));
fyy[p]=Fdydy*(-FH(iF,jF-2,kF)+F16*FH(iF,jF-1,kF)-F30*FH(iF,jF,kF)-FH(iF,jF+2,kF)+F16*FH(iF,jF+1,kF));
fzz[p]=Fdzdz*(-FH(iF,jF,kF-2)+F16*FH(iF,jF,kF-1)-F30*FH(iF,jF,kF)-FH(iF,jF,kF+2)+F16*FH(iF,jF,kF+1));
{const double t_jm2=(FH(iF-2,jF-2,kF)-F8*FH(iF-1,jF-2,kF)+F8*FH(iF+1,jF-2,kF)-FH(iF+2,jF-2,kF));
const double t_jm1=(FH(iF-2,jF-1,kF)-F8*FH(iF-1,jF-1,kF)+F8*FH(iF+1,jF-1,kF)-FH(iF+2,jF-1,kF));
const double t_jp1=(FH(iF-2,jF+1,kF)-F8*FH(iF-1,jF+1,kF)+F8*FH(iF+1,jF+1,kF)-FH(iF+2,jF+1,kF));
const double t_jp2=(FH(iF-2,jF+2,kF)-F8*FH(iF-1,jF+2,kF)+F8*FH(iF+1,jF+2,kF)-FH(iF+2,jF+2,kF));
fxy[p]=Fdxdy*(t_jm2-F8*t_jm1+F8*t_jp1-t_jp2);}
{const double t_km2=(FH(iF-2,jF,kF-2)-F8*FH(iF-1,jF,kF-2)+F8*FH(iF+1,jF,kF-2)-FH(iF+2,jF,kF-2));
const double t_km1=(FH(iF-2,jF,kF-1)-F8*FH(iF-1,jF,kF-1)+F8*FH(iF+1,jF,kF-1)-FH(iF+2,jF,kF-1));
const double t_kp1=(FH(iF-2,jF,kF+1)-F8*FH(iF-1,jF,kF+1)+F8*FH(iF+1,jF,kF+1)-FH(iF+2,jF,kF+1));
const double t_kp2=(FH(iF-2,jF,kF+2)-F8*FH(iF-1,jF,kF+2)+F8*FH(iF+1,jF,kF+2)-FH(iF+2,jF,kF+2));
fxz[p]=Fdxdz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
{const double t_km2=(FH(iF,jF-2,kF-2)-F8*FH(iF,jF-1,kF-2)+F8*FH(iF,jF+1,kF-2)-FH(iF,jF+2,kF-2));
const double t_km1=(FH(iF,jF-2,kF-1)-F8*FH(iF,jF-1,kF-1)+F8*FH(iF,jF+1,kF-1)-FH(iF,jF+2,kF-1));
const double t_kp1=(FH(iF,jF-2,kF+1)-F8*FH(iF,jF-1,kF+1)+F8*FH(iF,jF+1,kF+1)-FH(iF,jF+2,kF+1));
const double t_kp2=(FH(iF,jF-2,kF+2)-F8*FH(iF,jF-1,kF+2)+F8*FH(iF,jF+1,kF+2)-FH(iF,jF+2,kF+2));
fyz[p]=Fdydz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
}}}}
if(has6){for(int k0=k6_lo;k0<=k6_hi;++k0){const int kF=k0+1;
for(int j0=j6_lo;j0<=j6_hi;++j0){const int jF=j0+1;
for(int i0=i6_lo;i0<=i6_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fxx[p]=Xdxdx*(TWO*FH(iF-3,jF,kF)-F27*FH(iF-2,jF,kF)+F270*FH(iF-1,jF,kF)-F490*FH(iF,jF,kF)+F270*FH(iF+1,jF,kF)-F27*FH(iF+2,jF,kF)+TWO*FH(iF+3,jF,kF));
fyy[p]=Xdydy*(TWO*FH(iF,jF-3,kF)-F27*FH(iF,jF-2,kF)+F270*FH(iF,jF-1,kF)-F490*FH(iF,jF,kF)+F270*FH(iF,jF+1,kF)-F27*FH(iF,jF+2,kF)+TWO*FH(iF,jF+3,kF));
fzz[p]=Xdzdz*(TWO*FH(iF,jF,kF-3)-F27*FH(iF,jF,kF-2)+F270*FH(iF,jF,kF-1)-F490*FH(iF,jF,kF)+F270*FH(iF,jF,kF+1)-F27*FH(iF,jF,kF+2)+TWO*FH(iF,jF,kF+3));
#define XS6(JF,KFDUMMY) (-FH(iF-3,JF,KFDUMMY)+F9*FH(iF-2,JF,KFDUMMY)-F45*FH(iF-1,JF,KFDUMMY)+F45*FH(iF+1,JF,KFDUMMY)-F9*FH(iF+2,JF,KFDUMMY)+FH(iF+3,JF,KFDUMMY))
fxy[p]=Xdxdy*(-XS6(jF-3,kF)+F9*XS6(jF-2,kF)-F45*XS6(jF-1,kF)+F45*XS6(jF+1,kF)-F9*XS6(jF+2,kF)+XS6(jF+3,kF));
fxz[p]=Xdxdz*(-XS6(jF,kF-3)+F9*XS6(jF,kF-2)-F45*XS6(jF,kF-1)+F45*XS6(jF,kF+1)-F9*XS6(jF,kF+2)+XS6(jF,kF+3));
#undef XS6
#define YS6(JF,KFDUMMY) (-FH(iF,JF-3,KFDUMMY)+F9*FH(iF,JF-2,KFDUMMY)-F45*FH(iF,JF-1,KFDUMMY)+F45*FH(iF,JF+1,KFDUMMY)-F9*FH(iF,JF+2,KFDUMMY)+FH(iF,JF+3,KFDUMMY))
fyz[p]=Xdydz*(-YS6(jF,kF-3)+F9*YS6(jF,kF-2)-F45*YS6(jF,kF-1)+F45*YS6(jF,kF+1)-F9*YS6(jF,kF+2)+YS6(jF,kF+3));
#undef YS6
}}}}
#undef FH
return;
}
#elif (ghost_width == 5)
{
/* 8th-order shell second derivatives — inherits 8th-order stencil coeffs from Cartesian */
const int ord=4;
int iminF=1,jminF=1,kminF=1;
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-3;
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-3;
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-3;
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
static double *fh_buf=NULL;static size_t cap=0;
if(fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;}
double *fh=fh_buf;if(!fh)return;
symmetry_stbd(ord,ex,f,fh,SoA);
const double Sdxdx=ONE/(dX*dX),Sdydy=ONE/(dY*dY),Sdzdz=ONE/(dZ*dZ);
const double Fdxdx=F1o12/(dX*dX),Fdydy=F1o12/(dY*dY),Fdzdz=F1o12/(dZ*dZ);
const double Xdxdx=F1o180/(dX*dX),Xdydy=F1o180/(dY*dY),Xdzdz=F1o180/(dZ*dZ);
const double Edxdx=F1o5040/(dX*dX),Edydy=F1o5040/(dY*dY),Edzdz=F1o5040/(dZ*dZ);
const double Sdxdy=F1o4/(dX*dY),Sdxdz=F1o4/(dX*dZ),Sdydz=F1o4/(dY*dZ);
const double Fdxdy=F1o144/(dX*dY),Fdxdz=F1o144/(dX*dZ),Fdydz=F1o144/(dY*dZ);
const double Xdxdy=F1o3600/(dX*dY),Xdxdz=F1o3600/(dX*dZ),Xdydz=F1o3600/(dY*dZ);
const double Edxdy=F1o705600/(dX*dY),Edxdz=F1o705600/(dX*dZ),Edydz=F1o705600/(dY*dZ);
const size_t all=(size_t)ex1*ex2*ex3;
for(size_t p=0;p<all;++p){fxx[p]=fyy[p]=fzz[p]=fxy[p]=fxz[p]=fyz[p]=ZEO;}
const int i2_lo=(iminF>0)?iminF:0,j2_lo=(jminF>0)?jminF:0,k2_lo=1,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2;
const int i4_lo=(iminF+1>0)?iminF+1:0,j4_lo=(jminF+1>0)?jminF+1:0,k4_lo=2,i4_hi=ex1-3,j4_hi=ex2-3,k4_hi=ex3-3;
const int i6_lo=(iminF+2>0)?iminF+2:0,j6_lo=(jminF+2>0)?jminF+2:0,k6_lo=3,i6_hi=ex1-4,j6_hi=ex2-4,k6_hi=ex3-4;
const int i8_lo=(iminF+3>0)?iminF+3:0,j8_lo=(jminF+3>0)?jminF+3:0,k8_lo=4,i8_hi=ex1-5,j8_hi=ex2-5,k8_hi=ex3-5;
const int has4=(i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi),has6=(i6_lo<=i6_hi&&j6_lo<=j6_hi&&k6_lo<=k6_hi),has8=(i8_lo<=i8_hi&&j8_lo<=j8_hi&&k8_lo<=k8_hi);
#define FH(iF,jF,kF) fh[idx_fh_stbd(iF,jF,kF,ord,ex)]
/* 2nd-order pass */
if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1;
for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1;
for(int i0=i2_lo;i0<=i2_hi;++i0){bool in4=has4&&i0>=i4_lo&&i0<=i4_hi&&j0>=j4_lo&&j0<=j4_hi&&k0>=k4_lo&&k0<=k4_hi;if(in4)continue;
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fxx[p]=Sdxdx*(FH(iF-1,jF,kF)-TWO*FH(iF,jF,kF)+FH(iF+1,jF,kF));
fyy[p]=Sdydy*(FH(iF,jF-1,kF)-TWO*FH(iF,jF,kF)+FH(iF,jF+1,kF));
fzz[p]=Sdzdz*(FH(iF,jF,kF-1)-TWO*FH(iF,jF,kF)+FH(iF,jF,kF+1));
fxy[p]=Sdxdy*(FH(iF-1,jF-1,kF)-FH(iF+1,jF-1,kF)-FH(iF-1,jF+1,kF)+FH(iF+1,jF+1,kF));
fxz[p]=Sdxdz*(FH(iF-1,jF,kF-1)-FH(iF+1,jF,kF-1)-FH(iF-1,jF,kF+1)+FH(iF+1,jF,kF+1));
fyz[p]=Sdydz*(FH(iF,jF-1,kF-1)-FH(iF,jF+1,kF-1)-FH(iF,jF-1,kF+1)+FH(iF,jF+1,kF+1));
}}}}
/* 4th-order pass */
if(has4){for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1;
for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1;
for(int i0=i4_lo;i0<=i4_hi;++i0){bool in6=has6&&i0>=i6_lo&&i0<=i6_hi&&j0>=j6_lo&&j0<=j6_hi&&k0>=k6_lo&&k0<=k6_hi;if(in6)continue;
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fxx[p]=Fdxdx*(-FH(iF-2,jF,kF)+F16*FH(iF-1,jF,kF)-F30*FH(iF,jF,kF)-FH(iF+2,jF,kF)+F16*FH(iF+1,jF,kF));
fyy[p]=Fdydy*(-FH(iF,jF-2,kF)+F16*FH(iF,jF-1,kF)-F30*FH(iF,jF,kF)-FH(iF,jF+2,kF)+F16*FH(iF,jF+1,kF));
fzz[p]=Fdzdz*(-FH(iF,jF,kF-2)+F16*FH(iF,jF,kF-1)-F30*FH(iF,jF,kF)-FH(iF,jF,kF+2)+F16*FH(iF,jF,kF+1));
{const double t_jm2=(FH(iF-2,jF-2,kF)-F8*FH(iF-1,jF-2,kF)+F8*FH(iF+1,jF-2,kF)-FH(iF+2,jF-2,kF));
const double t_jm1=(FH(iF-2,jF-1,kF)-F8*FH(iF-1,jF-1,kF)+F8*FH(iF+1,jF-1,kF)-FH(iF+2,jF-1,kF));
const double t_jp1=(FH(iF-2,jF+1,kF)-F8*FH(iF-1,jF+1,kF)+F8*FH(iF+1,jF+1,kF)-FH(iF+2,jF+1,kF));
const double t_jp2=(FH(iF-2,jF+2,kF)-F8*FH(iF-1,jF+2,kF)+F8*FH(iF+1,jF+2,kF)-FH(iF+2,jF+2,kF));
fxy[p]=Fdxdy*(t_jm2-F8*t_jm1+F8*t_jp1-t_jp2);}
{const double t_km2=(FH(iF-2,jF,kF-2)-F8*FH(iF-1,jF,kF-2)+F8*FH(iF+1,jF,kF-2)-FH(iF+2,jF,kF-2));
const double t_km1=(FH(iF-2,jF,kF-1)-F8*FH(iF-1,jF,kF-1)+F8*FH(iF+1,jF,kF-1)-FH(iF+2,jF,kF-1));
const double t_kp1=(FH(iF-2,jF,kF+1)-F8*FH(iF-1,jF,kF+1)+F8*FH(iF+1,jF,kF+1)-FH(iF+2,jF,kF+1));
const double t_kp2=(FH(iF-2,jF,kF+2)-F8*FH(iF-1,jF,kF+2)+F8*FH(iF+1,jF,kF+2)-FH(iF+2,jF,kF+2));
fxz[p]=Fdxdz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
{const double t_km2=(FH(iF,jF-2,kF-2)-F8*FH(iF,jF-1,kF-2)+F8*FH(iF,jF+1,kF-2)-FH(iF,jF+2,kF-2));
const double t_km1=(FH(iF,jF-2,kF-1)-F8*FH(iF,jF-1,kF-1)+F8*FH(iF,jF+1,kF-1)-FH(iF,jF+2,kF-1));
const double t_kp1=(FH(iF,jF-2,kF+1)-F8*FH(iF,jF-1,kF+1)+F8*FH(iF,jF+1,kF+1)-FH(iF,jF+2,kF+1));
const double t_kp2=(FH(iF,jF-2,kF+2)-F8*FH(iF,jF-1,kF+2)+F8*FH(iF,jF+1,kF+2)-FH(iF,jF+2,kF+2));
fyz[p]=Fdydz*(t_km2-F8*t_km1+F8*t_kp1-t_kp2);}
}}}}
/* 6th-order pass */
if(has6){for(int k0=k6_lo;k0<=k6_hi;++k0){const int kF=k0+1;
for(int j0=j6_lo;j0<=j6_hi;++j0){const int jF=j0+1;
for(int i0=i6_lo;i0<=i6_hi;++i0){if(has8&&i0>=i8_lo&&i0<=i8_hi&&j0>=j8_lo&&j0<=j8_hi&&k0>=k8_lo&&k0<=k8_hi)continue;
const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fxx[p]=Xdxdx*(TWO*FH(iF-3,jF,kF)-F27*FH(iF-2,jF,kF)+F270*FH(iF-1,jF,kF)-F490*FH(iF,jF,kF)+F270*FH(iF+1,jF,kF)-F27*FH(iF+2,jF,kF)+TWO*FH(iF+3,jF,kF));
fyy[p]=Xdydy*(TWO*FH(iF,jF-3,kF)-F27*FH(iF,jF-2,kF)+F270*FH(iF,jF-1,kF)-F490*FH(iF,jF,kF)+F270*FH(iF,jF+1,kF)-F27*FH(iF,jF+2,kF)+TWO*FH(iF,jF+3,kF));
fzz[p]=Xdzdz*(TWO*FH(iF,jF,kF-3)-F27*FH(iF,jF,kF-2)+F270*FH(iF,jF,kF-1)-F490*FH(iF,jF,kF)+F270*FH(iF,jF,kF+1)-F27*FH(iF,jF,kF+2)+TWO*FH(iF,jF,kF+3));
#define XS6_8(JF,KFDUMMY) (-FH(iF-3,JF,KFDUMMY)+F9*FH(iF-2,JF,KFDUMMY)-F45*FH(iF-1,JF,KFDUMMY)+F45*FH(iF+1,JF,KFDUMMY)-F9*FH(iF+2,JF,KFDUMMY)+FH(iF+3,JF,KFDUMMY))
fxy[p]=Xdxdy*(-XS6_8(jF-3,kF)+F9*XS6_8(jF-2,kF)-F45*XS6_8(jF-1,kF)+F45*XS6_8(jF+1,kF)-F9*XS6_8(jF+2,kF)+XS6_8(jF+3,kF));
fxz[p]=Xdxdz*(-XS6_8(jF,kF-3)+F9*XS6_8(jF,kF-2)-F45*XS6_8(jF,kF-1)+F45*XS6_8(jF,kF+1)-F9*XS6_8(jF,kF+2)+XS6_8(jF,kF+3));
#undef XS6_8
#define YS6_8(JF,KFDUMMY) (-FH(iF,JF-3,KFDUMMY)+F9*FH(iF,JF-2,KFDUMMY)-F45*FH(iF,JF-1,KFDUMMY)+F45*FH(iF,JF+1,KFDUMMY)-F9*FH(iF,JF+2,KFDUMMY)+FH(iF,JF+3,KFDUMMY))
fyz[p]=Xdydz*(-YS6_8(jF,kF-3)+F9*YS6_8(jF,kF-2)-F45*YS6_8(jF,kF-1)+F45*YS6_8(jF,kF+1)-F9*YS6_8(jF,kF+2)+YS6_8(jF,kF+3));
#undef YS6_8
}}}}
/* 8th-order pass */
if(has8){for(int k0=k8_lo;k0<=k8_hi;++k0){const int kF=k0+1;
for(int j0=j8_lo;j0<=j8_hi;++j0){const int jF=j0+1;
for(int i0=i8_lo;i0<=i8_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fxx[p]=Edxdx*(-(double)9*FH(iF-4,jF,kF)+F128*FH(iF-3,jF,kF)-F1008*FH(iF-2,jF,kF)+F8064*FH(iF-1,jF,kF)-F14350*FH(iF,jF,kF)+F8064*FH(iF+1,jF,kF)-F1008*FH(iF+2,jF,kF)+F128*FH(iF+3,jF,kF)-(double)9*FH(iF+4,jF,kF));
fyy[p]=Edydy*(-(double)9*FH(iF,jF-4,kF)+F128*FH(iF,jF-3,kF)-F1008*FH(iF,jF-2,kF)+F8064*FH(iF,jF-1,kF)-F14350*FH(iF,jF,kF)+F8064*FH(iF,jF+1,kF)-F1008*FH(iF,jF+2,kF)+F128*FH(iF,jF+3,kF)-(double)9*FH(iF,jF+4,kF));
fzz[p]=Edzdz*(-(double)9*FH(iF,jF,kF-4)+F128*FH(iF,jF,kF-3)-F1008*FH(iF,jF,kF-2)+F8064*FH(iF,jF,kF-1)-F14350*FH(iF,jF,kF)+F8064*FH(iF,jF,kF+1)-F1008*FH(iF,jF,kF+2)+F128*FH(iF,jF,kF+3)-(double)9*FH(iF,jF,kF+4));
#define XS8(JF,KFDUMMY) (+(double)3*FH(iF-4,JF,KFDUMMY)-F32*FH(iF-3,JF,KFDUMMY)+F168*FH(iF-2,JF,KFDUMMY)-F672*FH(iF-1,JF,KFDUMMY)+F672*FH(iF+1,JF,KFDUMMY)-F168*FH(iF+2,JF,KFDUMMY)+F32*FH(iF+3,JF,KFDUMMY)-(double)3*FH(iF+4,JF,KFDUMMY))
fxy[p]=Edxdy*(+(double)3*XS8(jF-4,kF)-F32*XS8(jF-3,kF)+F168*XS8(jF-2,kF)-F672*XS8(jF-1,kF)+F672*XS8(jF+1,kF)-F168*XS8(jF+2,kF)+F32*XS8(jF+3,kF)-(double)3*XS8(jF+4,kF));
fxz[p]=Edxdz*(+(double)3*XS8(jF,kF-4)-F32*XS8(jF,kF-3)+F168*XS8(jF,kF-2)-F672*XS8(jF,kF-1)+F672*XS8(jF,kF+1)-F168*XS8(jF,kF+2)+F32*XS8(jF,kF+3)-(double)3*XS8(jF,kF+4));
#undef XS8
#define YS8(JF,KFDUMMY) (+(double)3*FH(iF,JF-4,KFDUMMY)-F32*FH(iF,JF-3,KFDUMMY)+F168*FH(iF,JF-2,KFDUMMY)-F672*FH(iF,JF-1,KFDUMMY)+F672*FH(iF,JF+1,KFDUMMY)-F168*FH(iF,JF+2,KFDUMMY)+F32*FH(iF,JF+3,KFDUMMY)-(double)3*FH(iF,JF+4,KFDUMMY))
fyz[p]=Edydz*(+(double)3*YS8(jF,kF-4)-F32*YS8(jF,kF-3)+F168*YS8(jF,kF-2)-F672*YS8(jF,kF-1)+F672*YS8(jF,kF+1)-F168*YS8(jF,kF+2)+F32*YS8(jF,kF+3)-(double)3*YS8(jF,kF+4));
#undef YS8
}}}}
#undef FH
return;
}
#else
#error "fdderivs_sh_c.C: unsupported ghost_width"
#endif
}

View File

@@ -1,107 +0,0 @@
#include "macrodef.h"
#include "share_func.h"
#include <cstddef>
/* Forward declarations — Fortran-mangled names from shell C kernels */
extern "C" {
void fderivs_sh_(const int ex[3], const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff, int sst);
void fdderivs_sh_(const int ex[3], const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff, int sst);
void fdderivs_shc_(int *ex,
double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
double *crho, double *sigma, double *R,
double &SYM1, double &SYM2, double &SYM3,
int &Symmetry, int &Lev, int &sst,
double *drhodx, double *drhody, double *drhodz,
double *dsigmadx, double *dsigmady, double *dsigmadz,
double *dRdx, double *dRdy, double *dRdz,
double *drhodxx, double *drhodxy, double *drhodxz,
double *drhodyy, double *drhodyz, double *drhodzz,
double *dsigmadxx, double *dsigmadxy, double *dsigmadxz,
double *dsigmadyy, double *dsigmadyz, double *dsigmadzz,
double *dRdxx, double *dRdxy, double *dRdxz,
double *dRdyy, double *dRdyz, double *dRdzz)
{
const int ex3[3] = { ex[0], ex[1], ex[2] };
const size_t n = (size_t)ex[0] * (size_t)ex[1] * (size_t)ex[2];
double *gx = (double*)malloc(n * sizeof(double));
double *gy = (double*)malloc(n * sizeof(double));
double *gz = (double*)malloc(n * sizeof(double));
double *gxx = (double*)malloc(n * sizeof(double));
double *gxy = (double*)malloc(n * sizeof(double));
double *gxz = (double*)malloc(n * sizeof(double));
double *gyy = (double*)malloc(n * sizeof(double));
double *gyz = (double*)malloc(n * sizeof(double));
double *gzz = (double*)malloc(n * sizeof(double));
if (!gx||!gy||!gz||!gxx||!gxy||!gxz||!gyy||!gyz||!gzz) {
free(gx);free(gy);free(gz);free(gxx);free(gxy);free(gxz);free(gyy);free(gyz);free(gzz);
return;
}
fderivs_sh_(ex3, f, gx, gy, gz, crho, sigma, R, SYM1, SYM2, SYM3, Symmetry, Lev, sst);
fdderivs_sh_(ex3, f, gxx, gxy, gxz, gyy, gyz, gzz, crho, sigma, R, SYM1, SYM2, SYM3, Symmetry, Lev, sst);
for (size_t i = 0; i < n; ++i) {
const double rx=drhodx[i], ry=drhody[i], rz=drhodz[i];
const double sx=dsigmadx[i], sy=dsigmady[i], sz=dsigmadz[i];
const double Rx=dRdx[i], Ry=dRdy[i], Rz=dRdz[i];
const double rxx=drhodxx[i], rxy=drhodxy[i], rxz=drhodxz[i];
const double ryy=drhodyy[i], ryz=drhodyz[i], rzz=drhodzz[i];
const double sxx=dsigmadxx[i], sxy=dsigmadxy[i], sxz=dsigmadxz[i];
const double syy=dsigmadyy[i], syz=dsigmadyz[i], szz=dsigmadzz[i];
const double Rxx=dRdxx[i], Rxy=dRdxy[i], Rxz=dRdxz[i];
const double Ryy=dRdyy[i], Ryz=dRdyz[i], Rzz=dRdzz[i];
const double Gr=gx[i], Gs=gy[i], GR=gz[i];
const double Grr=gxx[i], Grs=gxy[i], GrR=gxz[i];
const double Gss=gyy[i], GsR=gyz[i], GRR=gzz[i];
/* fxx */
fxx[i] = rx*rx*Grr + sx*sx*Gss + Rx*Rx*GRR
+ 2.0*(rx*sx*Grs + rx*Rx*GrR + sx*Rx*GsR)
+ rxx*Gr + sxx*Gs + Rxx*GR;
/* fxy */
fxy[i] = rx*ry*Grr + sx*sy*Gss + Rx*Ry*GRR
+ rx*sy*Grs + ry*sx*Grs + rx*Ry*GrR + ry*Rx*GrR + sx*Ry*GsR + sy*Rx*GsR
+ rxy*Gr + sxy*Gs + Rxy*GR;
/* fxz */
fxz[i] = rx*rz*Grr + sx*sz*Gss + Rx*Rz*GRR
+ rx*sz*Grs + rz*sx*Grs + rx*Rz*GrR + rz*Rx*GrR + sx*Rz*GsR + sz*Rx*GsR
+ rxz*Gr + sxz*Gs + Rxz*GR;
/* fyy */
fyy[i] = ry*ry*Grr + sy*sy*Gss + Ry*Ry*GRR
+ 2.0*(ry*sy*Grs + ry*Ry*GrR + sy*Ry*GsR)
+ ryy*Gr + syy*Gs + Ryy*GR;
/* fyz */
fyz[i] = ry*rz*Grr + sy*sz*Gss + Ry*Rz*GRR
+ ry*sz*Grs + rz*sy*Grs + ry*Rz*GrR + rz*Ry*GrR + sy*Rz*GsR + sz*Ry*GsR
+ ryz*Gr + syz*Gs + Ryz*GR;
/* fzz */
fzz[i] = rz*rz*Grr + sz*sz*Gss + Rz*Rz*GRR
+ 2.0*(rz*sz*Grs + rz*Rz*GrR + sz*Rz*GsR)
+ rzz*Gr + szz*Gs + Rzz*GR;
}
free(gx);free(gy);free(gz);free(gxx);free(gxy);free(gxz);free(gyy);free(gyz);free(gzz);
}
} // extern "C"

View File

@@ -1,18 +1,14 @@
#include "macrodef.h"
#include "tool.h"
/*
* C 版 fderivs — first derivatives df/dx, df/dy, df/dz.
* C 版 fderivs
*
* Finite difference order is selected at compile time via the ghost_width macro
* (defined in macrodef.fh):
* ghost_width = 2 → 2nd-order
* ghost_width = 3 → 4th-order
* ghost_width = 4 → 6th-order
* ghost_width = 5 → 8th-order
* Fortran:
* subroutine fderivs(ex,f,fx,fy,fz,X,Y,Z,SYM1,SYM2,SYM3,symmetry,onoff)
*
* Multi-pass overwrite strategy: compute the widest (lowest-order) stencil first,
* then overwrite interior regions with progressively higher-order stencils.
* 约定:
* f, fx, fy, fz: ex1*ex2*ex3按 idx_ex 布局
* X: ex1, Y: ex2, Z: ex3
*/
void fderivs(const int ex[3],
const double *f,
@@ -21,596 +17,151 @@ void fderivs(const int ex[3],
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff;
(void)onoff; // Fortran 里没用到
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0, EIT = 8.0;
const double F9 = 9.0, F12 = 12.0, F45 = 45.0, F60 = 60.0;
const double F32 = 32.0, F168 = 168.0, F672 = 672.0, F840 = 840.0;
const double ZEO = 0.0, ONE = 1.0;
const double TWO = 2.0, EIT = 8.0;
const double F12 = 12.0;
const int NO_SYMM = 0, EQ_SYMM = 1;
const int NO_SYMM = 0, EQ_SYMM = 1; // OCTANT=2 在本子程序里不直接用
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
// Fortran 1-based bounds
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
const int gw = ghost_width; // compile-time constant
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
#if (ghost_width == 2)
/* ---- 2nd-order ------------------------------------------------------ */
{
const int ord = 1; // symmetry_bd ord = ghost_width - 1
// SoA(1:3) = SYM1,SYM2,SYM3
const double SoA[3] = { SYM1, SYM2, SYM3 };
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = 0;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = 0;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = 0;
// fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
static double *fh = NULL;
static size_t cap = 0;
const double SoA[3] = { SYM1, SYM2, SYM3 };
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
static double *fh_buf = NULL;
static size_t cap = 0;
if (fh_size > cap) {
free(fh_buf);
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
double *fh = fh_buf;
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
}
/* 2nd-order pass: [-1, 0, +1] / (2*dx) */
const int i2_lo = (iminF > 0) ? iminF : 0;
const int j2_lo = (jminF > 0) ? jminF : 0;
const int k2_lo = (kminF > 0) ? kminF : 0;
const int i2_hi = ex1 - 2;
const int j2_hi = ex2 - 2;
const int k2_hi = ex3 - 2;
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d2dx * (
-fh[idx_fh_F_ord1(iF - 1, jF, kF, ex)] +
fh[idx_fh_F_ord1(iF + 1, jF, kF, ex)]
);
fy[p] = d2dy * (
-fh[idx_fh_F_ord1(iF, jF - 1, kF, ex)] +
fh[idx_fh_F_ord1(iF, jF + 1, kF, ex)]
);
fz[p] = d2dz * (
-fh[idx_fh_F_ord1(iF, jF, kF - 1, ex)] +
fh[idx_fh_F_ord1(iF, jF, kF + 1, ex)]
);
}
}
}
}
return;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
#elif (ghost_width == 3)
/* ---- 4th-order (original code) ------------------------------------ */
{
const int ord = 2; // symmetry_bd ord
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
// call symmetry_bd(2,ex,f,fh,SoA)
symmetry_bd(2, ex, f, fh, SoA);
const double SoA[3] = { SYM1, SYM2, SYM3 };
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
static double *fh_buf = NULL;
static size_t cap = 0;
if (fh_size > cap) {
free(fh_buf);
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
double *fh = fh_buf;
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
}
const int i2_lo = (iminF > 0) ? iminF : 0;
const int j2_lo = (jminF > 0) ? jminF : 0;
const int k2_lo = (kminF > 0) ? kminF : 0;
const int i2_hi = ex1 - 2;
const int j2_hi = ex2 - 2;
const int k2_hi = ex3 - 2;
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
const int i4_hi = ex1 - 3;
const int j4_hi = ex2 - 3;
const int k4_hi = ex3 - 3;
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d2dx * (
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fy[p] = d2dy * (
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fz[p] = d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
}
}
}
}
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d12dx * (
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
);
fy[p] = d12dy * (
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
);
fz[p] = d12dz * (
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
);
}
}
}
}
return;
// fx = fy = fz = 0
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fx[p] = ZEO;
fy[p] = ZEO;
fz[p] = ZEO;
}
#elif (ghost_width == 4)
/* ---- 6th-order ----------------------------------------------------- */
{
const int ord = 3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
/*
* 两段式:
* 1) 先在二阶可用区域计算二阶模板
* 2) 再在高阶可用区域覆盖为四阶模板
*
* 与原 if/elseif 逻辑等价,但减少逐点分支判断。
*/
const int i2_lo = (iminF > 0) ? iminF : 0;
const int j2_lo = (jminF > 0) ? jminF : 0;
const int k2_lo = (kminF > 0) ? kminF : 0;
const int i2_hi = ex1 - 2;
const int j2_hi = ex2 - 2;
const int k2_hi = ex3 - 2;
const double SoA[3] = { SYM1, SYM2, SYM3 };
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
const int i4_hi = ex1 - 3;
const int j4_hi = ex2 - 3;
const int k4_hi = ex3 - 3;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
static double *fh_buf = NULL;
static size_t cap = 0;
if (fh_size > cap) {
free(fh_buf);
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
double *fh = fh_buf;
if (!fh) return;
fx[p] = d2dx * (
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
symmetry_bd(ord, ex, f, fh, SoA);
fy[p] = d2dy * (
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
/* Denominators */
const double d60dx = ONE / F60 / dX;
const double d60dy = ONE / F60 / dY;
const double d60dz = ONE / F60 / dZ;
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
}
/* 2nd-order pass: 3pt, widest */
const int i2_lo = (iminF > 0) ? iminF : 0;
const int j2_lo = (jminF > 0) ? jminF : 0;
const int k2_lo = (kminF > 0) ? kminF : 0;
const int i2_hi = ex1 - 2;
const int j2_hi = ex2 - 2;
const int k2_hi = ex3 - 2;
/* 4th-order pass: 5pt */
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
const int i4_hi = ex1 - 3;
const int j4_hi = ex2 - 3;
const int k4_hi = ex3 - 3;
/* 6th-order pass: 7pt, narrowest interior */
const int i6_lo = (iminF + 2 > 0) ? (iminF + 2) : 0;
const int j6_lo = (jminF + 2 > 0) ? (jminF + 2) : 0;
const int k6_lo = (kminF + 2 > 0) ? (kminF + 2) : 0;
const int i6_hi = ex1 - 4;
const int j6_hi = ex2 - 4;
const int k6_hi = ex3 - 4;
/* 2nd-order */
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d2dx * (
-fh[idx_fh_F(iF - 1, jF, kF, ex)] +
fh[idx_fh_F(iF + 1, jF, kF, ex)]);
fy[p] = d2dy * (
-fh[idx_fh_F(iF, jF - 1, kF, ex)] +
fh[idx_fh_F(iF, jF + 1, kF, ex)]);
fz[p] = d2dz * (
-fh[idx_fh_F(iF, jF, kF - 1, ex)] +
fh[idx_fh_F(iF, jF, kF + 1, ex)]);
}
fz[p] = d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
}
}
}
/* 4th-order overwrite */
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d12dx * (
fh[idx_fh_F(iF - 2, jF, kF, ex)] -
EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)] +
EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)] -
fh[idx_fh_F(iF + 2, jF, kF, ex)]);
fy[p] = d12dy * (
fh[idx_fh_F(iF, jF - 2, kF, ex)] -
EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)] +
EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)] -
fh[idx_fh_F(iF, jF + 2, kF, ex)]);
fz[p] = d12dz * (
fh[idx_fh_F(iF, jF, kF - 2, ex)] -
EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)] +
EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)] -
fh[idx_fh_F(iF, jF, kF + 2, ex)]);
}
}
}
}
/* 6th-order overwrite: [-1,+9,-45,0,+45,-9,+1] / (60*dx) */
if (i6_lo <= i6_hi && j6_lo <= j6_hi && k6_lo <= k6_hi) {
for (int k0 = k6_lo; k0 <= k6_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j6_lo; j0 <= j6_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i6_lo; i0 <= i6_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d60dx * (
-fh[idx_fh_F(iF - 3, jF, kF, ex)] +
F9 * fh[idx_fh_F(iF - 2, jF, kF, ex)] -
F45 * fh[idx_fh_F(iF - 1, jF, kF, ex)] +
F45 * fh[idx_fh_F(iF + 1, jF, kF, ex)] -
F9 * fh[idx_fh_F(iF + 2, jF, kF, ex)] +
fh[idx_fh_F(iF + 3, jF, kF, ex)]);
fy[p] = d60dy * (
-fh[idx_fh_F(iF, jF - 3, kF, ex)] +
F9 * fh[idx_fh_F(iF, jF - 2, kF, ex)] -
F45 * fh[idx_fh_F(iF, jF - 1, kF, ex)] +
F45 * fh[idx_fh_F(iF, jF + 1, kF, ex)] -
F9 * fh[idx_fh_F(iF, jF + 2, kF, ex)] +
fh[idx_fh_F(iF, jF + 3, kF, ex)]);
fz[p] = d60dz * (
-fh[idx_fh_F(iF, jF, kF - 3, ex)] +
F9 * fh[idx_fh_F(iF, jF, kF - 2, ex)] -
F45 * fh[idx_fh_F(iF, jF, kF - 1, ex)] +
F45 * fh[idx_fh_F(iF, jF, kF + 1, ex)] -
F9 * fh[idx_fh_F(iF, jF, kF + 2, ex)] +
fh[idx_fh_F(iF, jF, kF + 3, ex)]);
}
}
}
}
return;
}
#elif (ghost_width == 5)
/* ---- 8th-order ----------------------------------------------------- */
{
const int ord = 5;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -3;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -3;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -3;
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
const double SoA[3] = { SYM1, SYM2, SYM3 };
fx[p] = d12dx * (
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
);
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
fy[p] = d12dy * (
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
);
static double *fh_buf = NULL;
static size_t cap = 0;
if (fh_size > cap) {
free(fh_buf);
fh_buf = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
double *fh = fh_buf;
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
const double d840dx = ONE / F840 / dX;
const double d840dy = ONE / F840 / dY;
const double d840dz = ONE / F840 / dZ;
const double d60dx = ONE / F60 / dX;
const double d60dy = ONE / F60 / dY;
const double d60dz = ONE / F60 / dZ;
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fx[p] = ZEO; fy[p] = ZEO; fz[p] = ZEO;
}
/* 2nd: 3pt, widest */
const int i2_lo = (iminF > 0) ? iminF : 0;
const int j2_lo = (jminF > 0) ? jminF : 0;
const int k2_lo = (kminF > 0) ? kminF : 0;
const int i2_hi = ex1 - 2;
const int j2_hi = ex2 - 2;
const int k2_hi = ex3 - 2;
/* 4th: 5pt */
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
const int i4_hi = ex1 - 3;
const int j4_hi = ex2 - 3;
const int k4_hi = ex3 - 3;
/* 6th: 7pt */
const int i6_lo = (iminF + 2 > 0) ? (iminF + 2) : 0;
const int j6_lo = (jminF + 2 > 0) ? (jminF + 2) : 0;
const int k6_lo = (kminF + 2 > 0) ? (kminF + 2) : 0;
const int i6_hi = ex1 - 4;
const int j6_hi = ex2 - 4;
const int k6_hi = ex3 - 4;
/* 8th: 9pt, narrowest */
const int i8_lo = (iminF + 3 > 0) ? (iminF + 3) : 0;
const int j8_lo = (jminF + 3 > 0) ? (jminF + 3) : 0;
const int k8_lo = (kminF + 3 > 0) ? (kminF + 3) : 0;
const int i8_hi = ex1 - 5;
const int j8_hi = ex2 - 5;
const int k8_hi = ex3 - 5;
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d2dx * (
-fh[idx_fh_F_ord5(iF - 1, jF, kF, ex)] +
fh[idx_fh_F_ord5(iF + 1, jF, kF, ex)]);
fy[p] = d2dy * (
-fh[idx_fh_F_ord5(iF, jF - 1, kF, ex)] +
fh[idx_fh_F_ord5(iF, jF + 1, kF, ex)]);
fz[p] = d2dz * (
-fh[idx_fh_F_ord5(iF, jF, kF - 1, ex)] +
fh[idx_fh_F_ord5(iF, jF, kF + 1, ex)]);
}
fz[p] = d12dz * (
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
);
}
}
}
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d12dx * (
fh[idx_fh_F_ord5(iF - 2, jF, kF, ex)] -
EIT * fh[idx_fh_F_ord5(iF - 1, jF, kF, ex)] +
EIT * fh[idx_fh_F_ord5(iF + 1, jF, kF, ex)] -
fh[idx_fh_F_ord5(iF + 2, jF, kF, ex)]);
fy[p] = d12dy * (
fh[idx_fh_F_ord5(iF, jF - 2, kF, ex)] -
EIT * fh[idx_fh_F_ord5(iF, jF - 1, kF, ex)] +
EIT * fh[idx_fh_F_ord5(iF, jF + 1, kF, ex)] -
fh[idx_fh_F_ord5(iF, jF + 2, kF, ex)]);
fz[p] = d12dz * (
fh[idx_fh_F_ord5(iF, jF, kF - 2, ex)] -
EIT * fh[idx_fh_F_ord5(iF, jF, kF - 1, ex)] +
EIT * fh[idx_fh_F_ord5(iF, jF, kF + 1, ex)] -
fh[idx_fh_F_ord5(iF, jF, kF + 2, ex)]);
}
}
}
}
if (i6_lo <= i6_hi && j6_lo <= j6_hi && k6_lo <= k6_hi) {
for (int k0 = k6_lo; k0 <= k6_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j6_lo; j0 <= j6_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i6_lo; i0 <= i6_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d60dx * (
-fh[idx_fh_F_ord5(iF - 3, jF, kF, ex)] +
F9 * fh[idx_fh_F_ord5(iF - 2, jF, kF, ex)] -
F45 * fh[idx_fh_F_ord5(iF - 1, jF, kF, ex)] +
F45 * fh[idx_fh_F_ord5(iF + 1, jF, kF, ex)] -
F9 * fh[idx_fh_F_ord5(iF + 2, jF, kF, ex)] +
fh[idx_fh_F_ord5(iF + 3, jF, kF, ex)]);
fy[p] = d60dy * (
-fh[idx_fh_F_ord5(iF, jF - 3, kF, ex)] +
F9 * fh[idx_fh_F_ord5(iF, jF - 2, kF, ex)] -
F45 * fh[idx_fh_F_ord5(iF, jF - 1, kF, ex)] +
F45 * fh[idx_fh_F_ord5(iF, jF + 1, kF, ex)] -
F9 * fh[idx_fh_F_ord5(iF, jF + 2, kF, ex)] +
fh[idx_fh_F_ord5(iF, jF + 3, kF, ex)]);
fz[p] = d60dz * (
-fh[idx_fh_F_ord5(iF, jF, kF - 3, ex)] +
F9 * fh[idx_fh_F_ord5(iF, jF, kF - 2, ex)] -
F45 * fh[idx_fh_F_ord5(iF, jF, kF - 1, ex)] +
F45 * fh[idx_fh_F_ord5(iF, jF, kF + 1, ex)] -
F9 * fh[idx_fh_F_ord5(iF, jF, kF + 2, ex)] +
fh[idx_fh_F_ord5(iF, jF, kF + 3, ex)]);
}
}
}
}
/* 8th-order overwrite: [+3,-32,+168,-672,0,+672,-168,+32,-3] / (840*dx) */
if (i8_lo <= i8_hi && j8_lo <= j8_hi && k8_lo <= k8_hi) {
for (int k0 = k8_lo; k0 <= k8_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j8_lo; j0 <= j8_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i8_lo; i0 <= i8_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d840dx * (
+(double)3 * fh[idx_fh_F_ord5(iF - 4, jF, kF, ex)] -
F32 * fh[idx_fh_F_ord5(iF - 3, jF, kF, ex)] +
F168 * fh[idx_fh_F_ord5(iF - 2, jF, kF, ex)] -
F672 * fh[idx_fh_F_ord5(iF - 1, jF, kF, ex)] +
F672 * fh[idx_fh_F_ord5(iF + 1, jF, kF, ex)] -
F168 * fh[idx_fh_F_ord5(iF + 2, jF, kF, ex)] +
F32 * fh[idx_fh_F_ord5(iF + 3, jF, kF, ex)] -
(double)3 * fh[idx_fh_F_ord5(iF + 4, jF, kF, ex)]);
fy[p] = d840dy * (
+(double)3 * fh[idx_fh_F_ord5(iF, jF - 4, kF, ex)] -
F32 * fh[idx_fh_F_ord5(iF, jF - 3, kF, ex)] +
F168 * fh[idx_fh_F_ord5(iF, jF - 2, kF, ex)] -
F672 * fh[idx_fh_F_ord5(iF, jF - 1, kF, ex)] +
F672 * fh[idx_fh_F_ord5(iF, jF + 1, kF, ex)] -
F168 * fh[idx_fh_F_ord5(iF, jF + 2, kF, ex)] +
F32 * fh[idx_fh_F_ord5(iF, jF + 3, kF, ex)] -
(double)3 * fh[idx_fh_F_ord5(iF, jF + 4, kF, ex)]);
fz[p] = d840dz * (
+(double)3 * fh[idx_fh_F_ord5(iF, jF, kF - 4, ex)] -
F32 * fh[idx_fh_F_ord5(iF, jF, kF - 3, ex)] +
F168 * fh[idx_fh_F_ord5(iF, jF, kF - 2, ex)] -
F672 * fh[idx_fh_F_ord5(iF, jF, kF - 1, ex)] +
F672 * fh[idx_fh_F_ord5(iF, jF, kF + 1, ex)] -
F168 * fh[idx_fh_F_ord5(iF, jF, kF + 2, ex)] +
F32 * fh[idx_fh_F_ord5(iF, jF, kF + 3, ex)] -
(double)3 * fh[idx_fh_F_ord5(iF, jF, kF + 4, ex)]);
}
}
}
}
return;
}
#else
#error "fderivs_c.C: unsupported ghost_width (must be 2, 3, 4, or 5)"
#endif
// free(fh);
}

View File

@@ -1,234 +0,0 @@
#include "macrodef.h"
#include "share_func.h"
/*
* C 版 fderivs_sh — first derivatives on shell patch in (rho, sigma, R) coords.
*
* Same stencil coefficients as Cartesian fderivs, but:
* - Uses symmetry_stbd (ghost on BOTH sides of x/y, none in z)
* - fh buffer: (-ord+1:ex+ord) in x/y, (1:ex) in z
* - SoA is 2-element only (x/y), no z-symmetry
* - sst parameter (shell surface type, not used in stencil computation)
*/
extern "C" void fderivs_sh_(const int ex[3],
const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff, int sst)
{
(void)SYM3; (void)onoff; (void)sst;
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0, EIT = 8.0;
const double F9 = 9.0, F12 = 12.0, F45 = 45.0, F60 = 60.0;
const double F32 = 32.0, F168 = 168.0, F672 = 672.0, F840 = 840.0;
const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
const double SoA[2] = { SYM1, SYM2 };
#if (ghost_width == 2)
{
const int ord = 1;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = 0;
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = 0;
if ((sst==2||sst==4) && fabs(Y[0]) < dY) jminF = 0; // EQ reflection
const size_t nx = (size_t)ex1 + 2 * ord;
const size_t ny = (size_t)ex2 + 2 * ord;
const size_t nz = (size_t)ex3;
const size_t fh_size = nx * ny * nz;
static double *fh_buf = NULL; static size_t cap = 0;
if (fh_size > cap) { free(fh_buf); fh_buf = (double*)aligned_alloc(64, fh_size*sizeof(double)); cap = fh_size; }
double *fh = fh_buf; if (!fh) return;
symmetry_stbd(ord, ex, f, fh, SoA);
const double d2dx = ONE/TWO/dX, d2dy = ONE/TWO/dY, d2dz = ONE/TWO/dZ;
const size_t all = (size_t)ex1*ex2*ex3;
for (size_t p=0;p<all;++p) { fx[p]=ZEO; fy[p]=ZEO; fz[p]=ZEO; }
const int i2_lo=(iminF>0)?iminF:0, j2_lo=(jminF>0)?jminF:0, k2_lo=1;
const int i2_hi=ex1-2, j2_hi=ex2-2, k2_hi=ex3-2;
if (i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi) {
for (int k0=k2_lo;k0<=k2_hi;++k0) { const int kF=k0+1;
for (int j0=j2_lo;j0<=j2_hi;++j0) { const int jF=j0+1;
for (int i0=i2_lo;i0<=i2_hi;++i0) { const int iF=i0+1;
const size_t p=idx_ex(i0,j0,k0,ex);
fx[p]=d2dx*(-fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]);
fy[p]=d2dy*(-fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]);
fz[p]=d2dz*(-fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]);
}}}
}
return;
}
#elif (ghost_width == 3)
{
const int ord = 2;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -1;
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -1;
if ((sst==2||sst==4) && fabs(Y[0]) < dY) jminF = -1;
const size_t nx=(size_t)ex1+2*ord, ny=(size_t)ex2+2*ord, nz=(size_t)ex3;
const size_t fh_size=nx*ny*nz;
static double *fh_buf=NULL; static size_t cap=0;
if (fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;}
double *fh=fh_buf; if(!fh)return;
symmetry_stbd(ord,ex,f,fh,SoA);
const double d12dx=ONE/F12/dX, d12dy=ONE/F12/dY, d12dz=ONE/F12/dZ;
const double d2dx=ONE/TWO/dX, d2dy=ONE/TWO/dY, d2dz=ONE/TWO/dZ;
const size_t all=(size_t)ex1*ex2*ex3;
for(size_t p=0;p<all;++p){fx[p]=ZEO;fy[p]=ZEO;fz[p]=ZEO;}
const int i2_lo=(iminF>0)?iminF:0, j2_lo=(jminF>0)?jminF:0, k2_lo=1;
const int i2_hi=ex1-2, j2_hi=ex2-2, k2_hi=ex3-2;
const int i4_lo=(iminF+1>0)?iminF+1:0, j4_lo=(jminF+1>0)?jminF+1:0, k4_lo=2;
const int i4_hi=ex1-3, j4_hi=ex2-3, k4_hi=ex3-3;
if (i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi) {
for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1;
for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1;
for(int i0=i2_lo;i0<=i2_hi;++i0){const int iF=i0+1;
const size_t p=idx_ex(i0,j0,k0,ex);
fx[p]=d2dx*(-fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]);
fy[p]=d2dy*(-fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]);
fz[p]=d2dz*(-fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]);
}}}
}
if (i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi) {
for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1;
for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1;
for(int i0=i4_lo;i0<=i4_hi;++i0){const int iF=i0+1;
const size_t p=idx_ex(i0,j0,k0,ex);
fx[p]=d12dx*(fh[idx_fh_stbd(iF-2,jF,kF,ord,ex)]-EIT*fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+EIT*fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]-fh[idx_fh_stbd(iF+2,jF,kF,ord,ex)]);
fy[p]=d12dy*(fh[idx_fh_stbd(iF,jF-2,kF,ord,ex)]-EIT*fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+EIT*fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]-fh[idx_fh_stbd(iF,jF+2,kF,ord,ex)]);
fz[p]=d12dz*(fh[idx_fh_stbd(iF,jF,kF-2,ord,ex)]-EIT*fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+EIT*fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]-fh[idx_fh_stbd(iF,jF,kF+2,ord,ex)]);
}}}
}
return;
}
#elif (ghost_width == 4)
{
const int ord = 3;
int iminF=1,jminF=1,kminF=1;
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-2;
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-2;
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-2;
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3;
const size_t fh_size=nx*ny*nz;
static double *fh_buf=NULL;static size_t cap=0;
if(fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;}
double *fh=fh_buf;if(!fh)return;
symmetry_stbd(ord,ex,f,fh,SoA);
const double d60dx=ONE/F60/dX,d60dy=ONE/F60/dY,d60dz=ONE/F60/dZ;
const double d12dx=ONE/F12/dX,d12dy=ONE/F12/dY,d12dz=ONE/F12/dZ;
const double d2dx=ONE/TWO/dX,d2dy=ONE/TWO/dY,d2dz=ONE/TWO/dZ;
const size_t all=(size_t)ex1*ex2*ex3;
for(size_t p=0;p<all;++p){fx[p]=ZEO;fy[p]=ZEO;fz[p]=ZEO;}
const int i2_lo=(iminF>0)?iminF:0,j2_lo=(jminF>0)?jminF:0,k2_lo=1,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2;
const int i4_lo=(iminF+1>0)?iminF+1:0,j4_lo=(jminF+1>0)?jminF+1:0,k4_lo=2,i4_hi=ex1-3,j4_hi=ex2-3,k4_hi=ex3-3;
const int i6_lo=(iminF+2>0)?iminF+2:0,j6_lo=(jminF+2>0)?jminF+2:0,k6_lo=3,i6_hi=ex1-4,j6_hi=ex2-4,k6_hi=ex3-4;
if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){
for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1;
for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1;
for(int i0=i2_lo;i0<=i2_hi;++i0){const int iF=i0+1;
const size_t p=idx_ex(i0,j0,k0,ex);
fx[p]=d2dx*(-fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]);
fy[p]=d2dy*(-fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]);
fz[p]=d2dz*(-fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]);
}}}
}
if(i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi){
for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1;
for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1;
for(int i0=i4_lo;i0<=i4_hi;++i0){const int iF=i0+1;
const size_t p=idx_ex(i0,j0,k0,ex);
fx[p]=d12dx*(fh[idx_fh_stbd(iF-2,jF,kF,ord,ex)]-EIT*fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+EIT*fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]-fh[idx_fh_stbd(iF+2,jF,kF,ord,ex)]);
fy[p]=d12dy*(fh[idx_fh_stbd(iF,jF-2,kF,ord,ex)]-EIT*fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+EIT*fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]-fh[idx_fh_stbd(iF,jF+2,kF,ord,ex)]);
fz[p]=d12dz*(fh[idx_fh_stbd(iF,jF,kF-2,ord,ex)]-EIT*fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+EIT*fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]-fh[idx_fh_stbd(iF,jF,kF+2,ord,ex)]);
}}}
}
if(i6_lo<=i6_hi&&j6_lo<=j6_hi&&k6_lo<=k6_hi){
for(int k0=k6_lo;k0<=k6_hi;++k0){const int kF=k0+1;
for(int j0=j6_lo;j0<=j6_hi;++j0){const int jF=j0+1;
for(int i0=i6_lo;i0<=i6_hi;++i0){const int iF=i0+1;
const size_t p=idx_ex(i0,j0,k0,ex);
fx[p]=d60dx*(-fh[idx_fh_stbd(iF-3,jF,kF,ord,ex)]+F9*fh[idx_fh_stbd(iF-2,jF,kF,ord,ex)]-F45*fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+F45*fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)]-F9*fh[idx_fh_stbd(iF+2,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+3,jF,kF,ord,ex)]);
fy[p]=d60dy*(-fh[idx_fh_stbd(iF,jF-3,kF,ord,ex)]+F9*fh[idx_fh_stbd(iF,jF-2,kF,ord,ex)]-F45*fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+F45*fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)]-F9*fh[idx_fh_stbd(iF,jF+2,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+3,kF,ord,ex)]);
fz[p]=d60dz*(-fh[idx_fh_stbd(iF,jF,kF-3,ord,ex)]+F9*fh[idx_fh_stbd(iF,jF,kF-2,ord,ex)]-F45*fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+F45*fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)]-F9*fh[idx_fh_stbd(iF,jF,kF+2,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+3,ord,ex)]);
}}}
}
return;
}
#elif (ghost_width == 5)
{
const int ord = 4;
int iminF=1,jminF=1,kminF=1;
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-3;
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-3;
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-3;
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3;
const size_t fh_size=nx*ny*nz;
static double *fh_buf=NULL;static size_t cap=0;
if(fh_size>cap){free(fh_buf);fh_buf=(double*)aligned_alloc(64,fh_size*sizeof(double));cap=fh_size;}
double *fh=fh_buf;if(!fh)return;
symmetry_stbd(ord,ex,f,fh,SoA);
const double d840dx=ONE/F840/dX,d840dy=ONE/F840/dY,d840dz=ONE/F840/dZ;
const double d60dx=ONE/F60/dX,d60dy=ONE/F60/dY,d60dz=ONE/F60/dZ;
const double d12dx=ONE/F12/dX,d12dy=ONE/F12/dY,d12dz=ONE/F12/dZ;
const double d2dx=ONE/TWO/dX,d2dy=ONE/TWO/dY,d2dz=ONE/TWO/dZ;
const size_t all=(size_t)ex1*ex2*ex3;
for(size_t p=0;p<all;++p){fx[p]=ZEO;fy[p]=ZEO;fz[p]=ZEO;}
const int i2_lo=(iminF>0)?iminF:0,j2_lo=(jminF>0)?jminF:0,k2_lo=1,i2_hi=ex1-2,j2_hi=ex2-2,k2_hi=ex3-2;
const int i4_lo=(iminF+1>0)?iminF+1:0,j4_lo=(jminF+1>0)?jminF+1:0,k4_lo=2,i4_hi=ex1-3,j4_hi=ex2-3,k4_hi=ex3-3;
const int i6_lo=(iminF+2>0)?iminF+2:0,j6_lo=(jminF+2>0)?jminF+2:0,k6_lo=3,i6_hi=ex1-4,j6_hi=ex2-4,k6_hi=ex3-4;
const int i8_lo=(iminF+3>0)?iminF+3:0,j8_lo=(jminF+3>0)?jminF+3:0,k8_lo=4,i8_hi=ex1-5,j8_hi=ex2-5,k8_hi=ex3-5;
#define FH_S(iF,jF,kF) fh[idx_fh_stbd(iF,jF,kF,ord,ex)]
if(i2_lo<=i2_hi&&j2_lo<=j2_hi&&k2_lo<=k2_hi){for(int k0=k2_lo;k0<=k2_hi;++k0){const int kF=k0+1;
for(int j0=j2_lo;j0<=j2_hi;++j0){const int jF=j0+1;
for(int i0=i2_lo;i0<=i2_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fx[p]=d2dx*(-FH_S(iF-1,jF,kF)+FH_S(iF+1,jF,kF));
fy[p]=d2dy*(-FH_S(iF,jF-1,kF)+FH_S(iF,jF+1,kF));
fz[p]=d2dz*(-FH_S(iF,jF,kF-1)+FH_S(iF,jF,kF+1));}}}}
if(i4_lo<=i4_hi&&j4_lo<=j4_hi&&k4_lo<=k4_hi){for(int k0=k4_lo;k0<=k4_hi;++k0){const int kF=k0+1;
for(int j0=j4_lo;j0<=j4_hi;++j0){const int jF=j0+1;
for(int i0=i4_lo;i0<=i4_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fx[p]=d12dx*(FH_S(iF-2,jF,kF)-EIT*FH_S(iF-1,jF,kF)+EIT*FH_S(iF+1,jF,kF)-FH_S(iF+2,jF,kF));
fy[p]=d12dy*(FH_S(iF,jF-2,kF)-EIT*FH_S(iF,jF-1,kF)+EIT*FH_S(iF,jF+1,kF)-FH_S(iF,jF+2,kF));
fz[p]=d12dz*(FH_S(iF,jF,kF-2)-EIT*FH_S(iF,jF,kF-1)+EIT*FH_S(iF,jF,kF+1)-FH_S(iF,jF,kF+2));}}}}
if(i6_lo<=i6_hi&&j6_lo<=j6_hi&&k6_lo<=k6_hi){for(int k0=k6_lo;k0<=k6_hi;++k0){const int kF=k0+1;
for(int j0=j6_lo;j0<=j6_hi;++j0){const int jF=j0+1;
for(int i0=i6_lo;i0<=i6_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fx[p]=d60dx*(-FH_S(iF-3,jF,kF)+F9*FH_S(iF-2,jF,kF)-F45*FH_S(iF-1,jF,kF)+F45*FH_S(iF+1,jF,kF)-F9*FH_S(iF+2,jF,kF)+FH_S(iF+3,jF,kF));
fy[p]=d60dy*(-FH_S(iF,jF-3,kF)+F9*FH_S(iF,jF-2,kF)-F45*FH_S(iF,jF-1,kF)+F45*FH_S(iF,jF+1,kF)-F9*FH_S(iF,jF+2,kF)+FH_S(iF,jF+3,kF));
fz[p]=d60dz*(-FH_S(iF,jF,kF-3)+F9*FH_S(iF,jF,kF-2)-F45*FH_S(iF,jF,kF-1)+F45*FH_S(iF,jF,kF+1)-F9*FH_S(iF,jF,kF+2)+FH_S(iF,jF,kF+3));}}}}
if(i8_lo<=i8_hi&&j8_lo<=j8_hi&&k8_lo<=k8_hi){for(int k0=k8_lo;k0<=k8_hi;++k0){const int kF=k0+1;
for(int j0=j8_lo;j0<=j8_hi;++j0){const int jF=j0+1;
for(int i0=i8_lo;i0<=i8_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
fx[p]=d840dx*(+(double)3*FH_S(iF-4,jF,kF)-F32*FH_S(iF-3,jF,kF)+F168*FH_S(iF-2,jF,kF)-F672*FH_S(iF-1,jF,kF)+F672*FH_S(iF+1,jF,kF)-F168*FH_S(iF+2,jF,kF)+F32*FH_S(iF+3,jF,kF)-(double)3*FH_S(iF+4,jF,kF));
fy[p]=d840dy*(+(double)3*FH_S(iF,jF-4,kF)-F32*FH_S(iF,jF-3,kF)+F168*FH_S(iF,jF-2,kF)-F672*FH_S(iF,jF-1,kF)+F672*FH_S(iF,jF+1,kF)-F168*FH_S(iF,jF+2,kF)+F32*FH_S(iF,jF+3,kF)-(double)3*FH_S(iF,jF+4,kF));
fz[p]=d840dz*(+(double)3*FH_S(iF,jF,kF-4)-F32*FH_S(iF,jF,kF-3)+F168*FH_S(iF,jF,kF-2)-F672*FH_S(iF,jF,kF-1)+F672*FH_S(iF,jF,kF+1)-F168*FH_S(iF,jF,kF+2)+F32*FH_S(iF,jF,kF+3)-(double)3*FH_S(iF,jF,kF+4));}}}}
#undef FH_S
return;
}
#else
#error "fderivs_sh_c.C: unsupported ghost_width"
#endif
}

View File

@@ -1,54 +0,0 @@
#include "macrodef.h"
#include "share_func.h"
#include <cstddef>
/*
* fderivs_shc — shell first derivatives converted to Cartesian via chain rule.
*
* Calls fderivs_sh internally, then:
* fx = drhodx * df/drho + dsigmadx * df/dsigma + dRdx * df/dR
* fy = drhody * df/drho + dsigmady * df/dsigma + dRdy * df/dR
* fz = drhodz * df/drho + dsigmadz * df/dsigma + dRdz * df/dR
*/
// Forward declaration (defined in fderivs_sh_c.C with extern "C" name fderivs_sh_)
extern "C" {
void fderivs_sh_(const int ex[3], const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff, int sst);
void fderivs_shc_(int *ex,
double *f,
double *fx, double *fy, double *fz,
double *crho, double *sigma, double *R,
double &SYM1, double &SYM2, double &SYM3,
int &Symmetry, int &Lev, int &sst,
double *drhodx, double *drhody, double *drhodz,
double *dsigmadx, double *dsigmady, double *dsigmadz,
double *dRdx, double *dRdy, double *dRdz)
{
const int ex3[3] = { ex[0], ex[1], ex[2] };
const size_t n = (size_t)ex[0] * (size_t)ex[1] * (size_t)ex[2];
// Temporary shell-coordinate derivatives
double *gx = (double*)malloc(n * sizeof(double));
double *gy = (double*)malloc(n * sizeof(double));
double *gz = (double*)malloc(n * sizeof(double));
if (!gx || !gy || !gz) { free(gx); free(gy); free(gz); return; }
// Compute shell-coordinate derivatives
fderivs_sh_(ex3, f, gx, gy, gz, crho, sigma, R, SYM1, SYM2, SYM3, Symmetry, Lev, sst);
// Chain rule to Cartesian
for (size_t i = 0; i < n; ++i) {
fx[i] = drhodx[i] * gx[i] + dsigmadx[i] * gy[i] + dRdx[i] * gz[i];
fy[i] = drhody[i] * gx[i] + dsigmady[i] * gy[i] + dRdy[i] * gz[i];
fz[i] = drhodz[i] * gx[i] + dsigmadz[i] * gy[i] + dRdz[i] * gz[i];
}
free(gx); free(gy); free(gz);
}
} // extern "C"

View File

@@ -18,7 +18,7 @@ using namespace std;
#endif
// Intel oneMKL LAPACK interface
#include <mkl_lapacke.h>
#include <lapacke.h>
/* Linear equation solution using Intel oneMKL LAPACK.
a[0..n-1][0..n-1] is the input matrix. b[0..n-1] is input
containing the right-hand side vectors. On output a is

View File

@@ -1,16 +1,16 @@
#include "macrodef.h"
#include "tool.h"
/*
* C 版 kodis — Kreiss-Oliger numerical dissipation (Cartesian patches).
* C 版 kodis
*
* The KO operator is (D₊D₋)^r applied to f_rhs with alternating sign (-1)^(r-1).
* Fortran signature:
* subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
*
* FD order → r → cof=2^(2r) mapping:
* ghost_width=2 (2nd) → r=2, cof=16, sign=-
* ghost_width=3 (4th) → r=3, cof=64, sign=+
* ghost_width=4 (6th) → r=4, cof=256, sign=-
* ghost_width=5 (8th) → r=5, cof=1024,sign=+
* 约定:
* X: ex1, Y: ex2, Z: ex3
* f, f_rhs: ex1*ex2*ex3 按 idx_ex 布局
* SoA[3]
* eps: double
*/
void kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
@@ -18,304 +18,100 @@ void kodis(const int ex[3],
const double SoA[3],
int Symmetry, double eps)
{
const double ZEO = 0.0;
const double ONE = 1.0, SIX = 6.0, FIT = 15.0, TWT = 20.0;
const double cof = 64.0; // 2^6
const int NO_SYMM = 0, OCTANT = 2;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// Fortran: dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
(void)ONE; // ONE 在原 Fortran 里只是参数,这里不一定用得上
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
// Fortran: imax=ex(1) 等是 1-based 上界
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
#if (ghost_width == 2)
/* ---- r=2, cof=16, sign=-, 5pt stencil ----------------------------- */
{
const int ord = 2;
const int r = 2;
const double cof = 16.0;
const double F4 = 4.0, F6 = 6.0;
const int NO_SYMM = 0, EQ_SYMM = 1;
// Fortran: imin=jmin=kmin=1某些对称情况变 -2
int iminF = 1, jminF = 1, kminF = 1;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -2;
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -2;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3),对应 ord=3
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/* i±2 must be valid: i-2 >= iminF && i+2 <= imaxF
C 0-based: i0 >= iminF+1, i0 <= ex1-3 */
const int i0_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
const int j0_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
const int k0_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
const int i0_hi = imaxF - 3;
const int j0_hi = jmaxF - 3;
const int k0_hi = kmaxF - 3;
/*
* Fortran loops:
* do k=1,ex3
* do j=1,ex2
* do i=1,ex1
*
* C: k0=0..ex3-1, j0=0..ex2-1, i0=0..ex1-1
* 并定义 Fortran index: iF=i0+1, ...
*/
// 收紧循环范围:只遍历满足 iF±3/jF±3/kF±3 条件的内部点
// iF-3 >= iminF => iF >= iminF+3 => i0 >= iminF+2 (因为 iF=i0+1)
// iF+3 <= imaxF => iF <= imaxF-3 => i0 <= imaxF-4
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
const int i0_hi = imaxF - 4; // inclusive
const int j0_hi = jmaxF - 4;
const int k0_hi = kmaxF - 4;
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
const double Dx = (
(fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] + fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]) -
F4 * (fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] + fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]) +
F6 * fh[idx_fh_F_ord2(iF, jF, kF, ex)]
) / dX;
const double Dy = (
(fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] + fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]) -
F4 * (fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] + fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]) +
F6 * fh[idx_fh_F_ord2(iF, jF, kF, ex)]
) / dY;
const double Dz = (
(fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] + fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]) -
F4 * (fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] + fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]) +
F6 * fh[idx_fh_F_ord2(iF, jF, kF, ex)]
) / dZ;
f_rhs[p] -= (eps / cof) * (Dx + Dy + Dz); /* sign=- */
}
}
}
}
if (i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi) {
free(fh);
return;
}
#elif (ghost_width == 3)
/* ---- r=3, cof=64, sign=+, 7pt stencil (current default) ---------- */
{
const int ord = 3;
const int r = 3;
const double cof = 64.0;
const double SIX = 6.0, FIT = 15.0, TWT = 20.0;
const int NO_SYMM = 0, OCTANT = 2;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -2;
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -2;
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
const int iF = i0 + 1;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
const size_t p = idx_ex(i0, j0, k0, ex);
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// 三个方向各一份同型的 7 点组合(实际上是对称的 6th-order dissipation/filter 核)
const double Dx_term =
( (fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
TWT * fh[idx_fh_F(iF , jF, kF, ex)] ) / dX;
symmetry_bd(ord, ex, f, fh, SoA);
const double Dy_term =
( (fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF , kF, ex)] ) / dY;
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
const int i0_hi = imaxF - 4;
const int j0_hi = jmaxF - 4;
const int k0_hi = kmaxF - 4;
const double Dz_term =
( (fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF , ex)] ) / dZ;
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
const double Dx = (
(fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]
) / dX;
const double Dy = (
(fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]
) / dY;
const double Dz = (
(fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]
) / dZ;
f_rhs[p] += (eps / cof) * (Dx + Dy + Dz); /* sign=+ */
}
}
// Fortran:
// f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof*(Dx_term + Dy_term + Dz_term)
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
}
}
free(fh);
return;
}
#elif (ghost_width == 4)
/* ---- r=4, cof=256, sign=-, 9pt stencil ---------------------------- */
{
const int ord = 4;
const int r = 4;
const double cof = 256.0;
const double F8 = 8.0, F28 = 28.0, F56 = 56.0, F70 = 70.0;
const int NO_SYMM = 0, EQ_SYMM = 1;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -3;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -3;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -3;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
/* i±4 valid: i-4>=iminF → i0>=iminF+3, i+4<=imaxF → i0<=ex1-5 */
const int i0_lo = (iminF + 3 > 0) ? iminF + 3 : 0;
const int j0_lo = (jminF + 3 > 0) ? jminF + 3 : 0;
const int k0_lo = (kminF + 3 > 0) ? kminF + 3 : 0;
const int i0_hi = imaxF - 5;
const int j0_hi = jmaxF - 5;
const int k0_hi = kmaxF - 5;
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* Stencil: [1,-8,28,-56,70,-56,28,-8,1] */
const double Dx = (
(fh[idx_fh_F_ord4(iF - 4, jF, kF, ex)] + fh[idx_fh_F_ord4(iF + 4, jF, kF, ex)]) -
F8 * (fh[idx_fh_F_ord4(iF - 3, jF, kF, ex)] + fh[idx_fh_F_ord4(iF + 3, jF, kF, ex)]) +
F28* (fh[idx_fh_F_ord4(iF - 2, jF, kF, ex)] + fh[idx_fh_F_ord4(iF + 2, jF, kF, ex)]) -
F56* (fh[idx_fh_F_ord4(iF - 1, jF, kF, ex)] + fh[idx_fh_F_ord4(iF + 1, jF, kF, ex)]) +
F70* fh[idx_fh_F_ord4(iF, jF, kF, ex)]
) / dX;
const double Dy = (
(fh[idx_fh_F_ord4(iF, jF - 4, kF, ex)] + fh[idx_fh_F_ord4(iF, jF + 4, kF, ex)]) -
F8 * (fh[idx_fh_F_ord4(iF, jF - 3, kF, ex)] + fh[idx_fh_F_ord4(iF, jF + 3, kF, ex)]) +
F28* (fh[idx_fh_F_ord4(iF, jF - 2, kF, ex)] + fh[idx_fh_F_ord4(iF, jF + 2, kF, ex)]) -
F56* (fh[idx_fh_F_ord4(iF, jF - 1, kF, ex)] + fh[idx_fh_F_ord4(iF, jF + 1, kF, ex)]) +
F70* fh[idx_fh_F_ord4(iF, jF, kF, ex)]
) / dY;
const double Dz = (
(fh[idx_fh_F_ord4(iF, jF, kF - 4, ex)] + fh[idx_fh_F_ord4(iF, jF, kF + 4, ex)]) -
F8 * (fh[idx_fh_F_ord4(iF, jF, kF - 3, ex)] + fh[idx_fh_F_ord4(iF, jF, kF + 3, ex)]) +
F28* (fh[idx_fh_F_ord4(iF, jF, kF - 2, ex)] + fh[idx_fh_F_ord4(iF, jF, kF + 2, ex)]) -
F56* (fh[idx_fh_F_ord4(iF, jF, kF - 1, ex)] + fh[idx_fh_F_ord4(iF, jF, kF + 1, ex)]) +
F70* fh[idx_fh_F_ord4(iF, jF, kF, ex)]
) / dZ;
f_rhs[p] -= (eps / cof) * (Dx + Dy + Dz); /* sign=- */
}
}
}
}
free(fh);
return;
}
#elif (ghost_width == 5)
/* ---- r=5, cof=1024, sign=+, 11pt stencil ------------------------- */
{
const int ord = 5;
const int r = 5;
const double cof = 1024.0;
const double F10 = 10.0, F45 = 45.0, F120 = 120.0;
const double F210 = 210.0, F252 = 252.0;
const int NO_SYMM = 0, EQ_SYMM = 1;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -4;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -4;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -4;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
/* i±5 valid: i0>=iminF+4, i0<=ex1-6 */
const int i0_lo = (iminF + 4 > 0) ? iminF + 4 : 0;
const int j0_lo = (jminF + 4 > 0) ? jminF + 4 : 0;
const int k0_lo = (kminF + 4 > 0) ? kminF + 4 : 0;
const int i0_hi = imaxF - 6;
const int j0_hi = jmaxF - 6;
const int k0_hi = kmaxF - 6;
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* Stencil: [1,-10,45,-120,210,-252,210,-120,45,-10,1] */
const double Dx = (
(fh[idx_fh_F_ord5(iF - 5, jF, kF, ex)] + fh[idx_fh_F_ord5(iF + 5, jF, kF, ex)]) -
F10 * (fh[idx_fh_F_ord5(iF - 4, jF, kF, ex)] + fh[idx_fh_F_ord5(iF + 4, jF, kF, ex)]) +
F45 * (fh[idx_fh_F_ord5(iF - 3, jF, kF, ex)] + fh[idx_fh_F_ord5(iF + 3, jF, kF, ex)]) -
F120* (fh[idx_fh_F_ord5(iF - 2, jF, kF, ex)] + fh[idx_fh_F_ord5(iF + 2, jF, kF, ex)]) +
F210* (fh[idx_fh_F_ord5(iF - 1, jF, kF, ex)] + fh[idx_fh_F_ord5(iF + 1, jF, kF, ex)]) -
F252* fh[idx_fh_F_ord5(iF, jF, kF, ex)]
) / dX;
const double Dy = (
(fh[idx_fh_F_ord5(iF, jF - 5, kF, ex)] + fh[idx_fh_F_ord5(iF, jF + 5, kF, ex)]) -
F10 * (fh[idx_fh_F_ord5(iF, jF - 4, kF, ex)] + fh[idx_fh_F_ord5(iF, jF + 4, kF, ex)]) +
F45 * (fh[idx_fh_F_ord5(iF, jF - 3, kF, ex)] + fh[idx_fh_F_ord5(iF, jF + 3, kF, ex)]) -
F120* (fh[idx_fh_F_ord5(iF, jF - 2, kF, ex)] + fh[idx_fh_F_ord5(iF, jF + 2, kF, ex)]) +
F210* (fh[idx_fh_F_ord5(iF, jF - 1, kF, ex)] + fh[idx_fh_F_ord5(iF, jF + 1, kF, ex)]) -
F252* fh[idx_fh_F_ord5(iF, jF, kF, ex)]
) / dY;
const double Dz = (
(fh[idx_fh_F_ord5(iF, jF, kF - 5, ex)] + fh[idx_fh_F_ord5(iF, jF, kF + 5, ex)]) -
F10 * (fh[idx_fh_F_ord5(iF, jF, kF - 4, ex)] + fh[idx_fh_F_ord5(iF, jF, kF + 4, ex)]) +
F45 * (fh[idx_fh_F_ord5(iF, jF, kF - 3, ex)] + fh[idx_fh_F_ord5(iF, jF, kF + 3, ex)]) -
F120* (fh[idx_fh_F_ord5(iF, jF, kF - 2, ex)] + fh[idx_fh_F_ord5(iF, jF, kF + 2, ex)]) +
F210* (fh[idx_fh_F_ord5(iF, jF, kF - 1, ex)] + fh[idx_fh_F_ord5(iF, jF, kF + 1, ex)]) -
F252* fh[idx_fh_F_ord5(iF, jF, kF, ex)]
) / dZ;
f_rhs[p] += (eps / cof) * (Dx + Dy + Dz); /* sign=+ */
}
}
}
}
free(fh);
return;
}
#else
#error "kodiss_c.C: unsupported ghost_width (must be 2, 3, 4, or 5)"
#endif
}
free(fh);
}

View File

@@ -1,136 +0,0 @@
#include "macrodef.h"
#include "share_func.h"
/*
* kodis_sh — Kreiss-Oliger dissipation on shell patches.
* Same stencil coefficients as Cartesian kodis. Uses symmetry_stbd.
*/
extern "C" void kodis_sh_(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double SoAi[2],
int Symmetry, double eps, int sst)
{
(void)sst;
const double ZEO=0.0;
const int ex1=ex[0], ex2=ex[1], ex3=ex[2];
const double dX=X[1]-X[0], dY=Y[1]-Y[0], dZ=Z[1]-Z[0];
const int imaxF=ex1, jmaxF=ex2, kmaxF=ex3;
const double SoA[2]={SoAi[0],SoAi[1]};
#if (ghost_width == 2)
{
const int ord=2, r=2;
const double cof=16.0, F4=4.0, F6=6.0;
const int NO_SYMM=0, OCTANT=2;
int iminF=1,jminF=1,kminF=1;
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-1;
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-1;
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-1;
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
double *fh=(double*)malloc(fh_size*sizeof(double));if(!fh)return;
symmetry_stbd(ord,ex,f,fh,SoA);
const int i0_lo=(iminF+1>0)?iminF+1:0,j0_lo=(jminF+1>0)?jminF+1:0,k0_lo=2;
const int i0_hi=imaxF-3,j0_hi=jmaxF-3,k0_hi=kmaxF-3;
if(!(i0_lo>i0_hi||j0_lo>j0_hi||k0_lo>k0_hi)){
for(int k0=k0_lo;k0<=k0_hi;++k0){const int kF=k0+1;
for(int j0=j0_lo;j0<=j0_hi;++j0){const int jF=j0+1;
for(int i0=i0_lo;i0<=i0_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
const double Dx=((fh[idx_fh_stbd(iF-2,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+2,jF,kF,ord,ex)])-F4*(fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)])+F6*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dX;
const double Dy=((fh[idx_fh_stbd(iF,jF-2,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+2,kF,ord,ex)])-F4*(fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)])+F6*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dY;
const double Dz=((fh[idx_fh_stbd(iF,jF,kF-2,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+2,ord,ex)])-F4*(fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)])+F6*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dZ;
f_rhs[p]-=(eps/cof)*(Dx+Dy+Dz);
}}}
}
free(fh);return;
}
#elif (ghost_width == 3)
{
const int ord=3, r=3;
const double cof=64.0,SIX=6.0,FIT=15.0,TWT=20.0;
const int NO_SYMM=0,OCTANT=2;
int iminF=1,jminF=1,kminF=1;
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-2;
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-2;
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-2;
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
double *fh=(double*)malloc(fh_size*sizeof(double));if(!fh)return;
symmetry_stbd(ord,ex,f,fh,SoA);
const int i0_lo=(iminF+2>0)?iminF+2:0,j0_lo=(jminF+2>0)?jminF+2:0,k0_lo=3;
const int i0_hi=imaxF-4,j0_hi=jmaxF-4,k0_hi=kmaxF-4;
if(!(i0_lo>i0_hi||j0_lo>j0_hi||k0_lo>k0_hi)){
for(int k0=k0_lo;k0<=k0_hi;++k0){const int kF=k0+1;
for(int j0=j0_lo;j0<=j0_hi;++j0){const int jF=j0+1;
for(int i0=i0_lo;i0<=i0_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
const double Dx=((fh[idx_fh_stbd(iF-3,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+3,jF,kF,ord,ex)])-SIX*(fh[idx_fh_stbd(iF-2,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+2,jF,kF,ord,ex)])+FIT*(fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)])-TWT*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dX;
const double Dy=((fh[idx_fh_stbd(iF,jF-3,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+3,kF,ord,ex)])-SIX*(fh[idx_fh_stbd(iF,jF-2,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+2,kF,ord,ex)])+FIT*(fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)])-TWT*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dY;
const double Dz=((fh[idx_fh_stbd(iF,jF,kF-3,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+3,ord,ex)])-SIX*(fh[idx_fh_stbd(iF,jF,kF-2,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+2,ord,ex)])+FIT*(fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)])-TWT*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dZ;
f_rhs[p]+=(eps/cof)*(Dx+Dy+Dz);
}}}
}
free(fh);return;
}
#elif (ghost_width == 4)
{
const int ord=4, r=4;
const double cof=256.0,F8=8.0,F28=28.0,F56=56.0,F70=70.0;
const int NO_SYMM=0,OCTANT=2;
int iminF=1,jminF=1,kminF=1;
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-3;
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-3;
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-3;
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
double *fh=(double*)malloc(fh_size*sizeof(double));if(!fh)return;
symmetry_stbd(ord,ex,f,fh,SoA);
const int i0_lo=(iminF+3>0)?iminF+3:0,j0_lo=(jminF+3>0)?jminF+3:0,k0_lo=4;
const int i0_hi=imaxF-5,j0_hi=jmaxF-5,k0_hi=kmaxF-5;
if(!(i0_lo>i0_hi||j0_lo>j0_hi||k0_lo>k0_hi)){
for(int k0=k0_lo;k0<=k0_hi;++k0){const int kF=k0+1;
for(int j0=j0_lo;j0<=j0_hi;++j0){const int jF=j0+1;
for(int i0=i0_lo;i0<=i0_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
const double Dx=((fh[idx_fh_stbd(iF-4,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+4,jF,kF,ord,ex)])-F8*(fh[idx_fh_stbd(iF-3,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+3,jF,kF,ord,ex)])+F28*(fh[idx_fh_stbd(iF-2,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+2,jF,kF,ord,ex)])-F56*(fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)])+F70*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dX;
const double Dy=((fh[idx_fh_stbd(iF,jF-4,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+4,kF,ord,ex)])-F8*(fh[idx_fh_stbd(iF,jF-3,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+3,kF,ord,ex)])+F28*(fh[idx_fh_stbd(iF,jF-2,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+2,kF,ord,ex)])-F56*(fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)])+F70*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dY;
const double Dz=((fh[idx_fh_stbd(iF,jF,kF-4,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+4,ord,ex)])-F8*(fh[idx_fh_stbd(iF,jF,kF-3,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+3,ord,ex)])+F28*(fh[idx_fh_stbd(iF,jF,kF-2,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+2,ord,ex)])-F56*(fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)])+F70*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dZ;
f_rhs[p]-=(eps/cof)*(Dx+Dy+Dz);
}}}
}
free(fh);return;
}
#elif (ghost_width == 5)
{
const int ord=5, r=5;
const double cof=1024.0,F10=10.0,F45k=45.0,F120=120.0,F210=210.0,F252=252.0;
const int NO_SYMM=0,OCTANT=2;
int iminF=1,jminF=1,kminF=1;
if(Symmetry==OCTANT&&fabs(X[0])<dX)iminF=-4;
if(Symmetry==OCTANT&&fabs(Y[0])<dY)jminF=-4;
if((sst==2||sst==4)&&fabs(Y[0])<dY)jminF=-4;
const size_t nx=(size_t)ex1+2*ord,ny=(size_t)ex2+2*ord,nz=(size_t)ex3,fh_size=nx*ny*nz;
double *fh=(double*)malloc(fh_size*sizeof(double));if(!fh)return;
symmetry_stbd(ord,ex,f,fh,SoA);
const int i0_lo=(iminF+4>0)?iminF+4:0,j0_lo=(jminF+4>0)?jminF+4:0,k0_lo=5;
const int i0_hi=imaxF-6,j0_hi=jmaxF-6,k0_hi=kmaxF-6;
if(!(i0_lo>i0_hi||j0_lo>j0_hi||k0_lo>k0_hi)){
for(int k0=k0_lo;k0<=k0_hi;++k0){const int kF=k0+1;
for(int j0=j0_lo;j0<=j0_hi;++j0){const int jF=j0+1;
for(int i0=i0_lo;i0<=i0_hi;++i0){const int iF=i0+1;const size_t p=idx_ex(i0,j0,k0,ex);
const double Dx=((fh[idx_fh_stbd(iF-5,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+5,jF,kF,ord,ex)])-F10*(fh[idx_fh_stbd(iF-4,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+4,jF,kF,ord,ex)])+F45k*(fh[idx_fh_stbd(iF-3,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+3,jF,kF,ord,ex)])-F120*(fh[idx_fh_stbd(iF-2,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+2,jF,kF,ord,ex)])+F210*(fh[idx_fh_stbd(iF-1,jF,kF,ord,ex)]+fh[idx_fh_stbd(iF+1,jF,kF,ord,ex)])-F252*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dX;
const double Dy=((fh[idx_fh_stbd(iF,jF-5,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+5,kF,ord,ex)])-F10*(fh[idx_fh_stbd(iF,jF-4,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+4,kF,ord,ex)])+F45k*(fh[idx_fh_stbd(iF,jF-3,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+3,kF,ord,ex)])-F120*(fh[idx_fh_stbd(iF,jF-2,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+2,kF,ord,ex)])+F210*(fh[idx_fh_stbd(iF,jF-1,kF,ord,ex)]+fh[idx_fh_stbd(iF,jF+1,kF,ord,ex)])-F252*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dY;
const double Dz=((fh[idx_fh_stbd(iF,jF,kF-5,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+5,ord,ex)])-F10*(fh[idx_fh_stbd(iF,jF,kF-4,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+4,ord,ex)])+F45k*(fh[idx_fh_stbd(iF,jF,kF-3,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+3,ord,ex)])-F120*(fh[idx_fh_stbd(iF,jF,kF-2,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+2,ord,ex)])+F210*(fh[idx_fh_stbd(iF,jF,kF-1,ord,ex)]+fh[idx_fh_stbd(iF,jF,kF+1,ord,ex)])-F252*fh[idx_fh_stbd(iF,jF,kF,ord,ex)])/dZ;
f_rhs[p]+=(eps/cof)*(Dx+Dy+Dz);
}}}
}
free(fh);return;
}
#else
#error "kodiss_sh_c.C: unsupported ghost_width"
#endif
}

View File

@@ -1,13 +1,14 @@
#include "macrodef.h"
#include "tool.h"
/*
* C 版 lopsided — upwind (lopsided) advection derivatives.
* 你需要提供 symmetry_bd 的 C 版本(或 Fortran 绑到 C 的接口)。
* Fortran: call symmetry_bd(3,ex,f,fh,SoA)
*
* Adds advection terms to f_rhs for all three spatial directions.
* Uses sign-biased (one-sided) stencils with centered fallbacks.
*
* For lopsided, symmetry_bd ord = ghost_width (same as kodiss).
* 约定:
* nghost = 3
* ex[3] = {ex1,ex2,ex3}
* f = 原始网格 (ex1*ex2*ex3)
* fh = 扩展网格 ((ex1+3)*(ex2+3)*(ex3+3)),对应 Fortran 的 (-2:ex1, ...)
* SoA[3] = 输入参数
*/
void lopsided(const int ex[3],
const double *X, const double *Y, const double *Z,
@@ -15,577 +16,240 @@ void lopsided(const int ex[3],
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3])
{
const double ZEO = 0.0, ONE = 1.0;
const double TWO = 2.0, F6 = 6.0, EIT = 8.0;
const double F3 = 3.0, F4 = 4.0, F5 = 5.0, F10 = 10.0, F12 = 12.0, F18 = 18.0;
const double F9 = 9.0, F45 = 45.0, F60 = 60.0;
const double F2 = 2.0, F15 = 15.0, F24 = 24.0, F30 = 30.0, F35 = 35.0;
const double F50 = 50.0, F77 = 77.0, F80 = 80.0, F100 = 100.0, F150 = 150.0;
const double F32 = 32.0, F168 = 168.0, F672 = 672.0, F840 = 840.0;
const double F140=140.0, F378=378.0, F420=420.0, F1050=1050.0;
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
const double TWO = 2.0, F6 = 6.0, F18 = 18.0;
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2;
(void)OCTANT; // 这里和 Fortran 一样只是定义了不用也没关系
const int NO_SYMM = 0, EQ_SYMM = 1;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// 对应 Fortran: dX = X(2)-X(1) Fortran 1-based
// C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
#if (ghost_width == 2)
/* ---- 2nd-order lopsided --------------------------------------------- */
{
const int ord = 2;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
// Fortran 里算了 d2dx/d2dy/d2dz 但本 subroutine 里没用到(保持一致也算出来)
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
(void)d2dx; (void)d2dy; (void)d2dz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
// Fortran:
// imax = ex(1); jmax = ex(2); kmax = ex(3)
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
// Fortran:
// imin=jmin=kmin=1; 若满足对称条件则设为 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3)
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return; // 内存不足:直接返回(你也可以改成 abort/报错)
/* x-direction */
const double sfx = Sfx[p];
if (sfx > ZEO) {
if (i0 <= ex1 - 3) // i+2 <= imax
f_rhs[p] += sfx * d2dx * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF+1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF+2, jF, kF, ex)]);
else if (i0 <= ex1 - 2) // i+1 <= imax
f_rhs[p] += sfx * d2dx * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF+1, jF, kF, ex)]);
} else if (sfx < ZEO) {
if ((i0 - 1) >= iminF) // i-2 >= imin
f_rhs[p] -= sfx * d2dx * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF-1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF-2, jF, kF, ex)]);
else if (i0 >= iminF) // i-1 >= imin
f_rhs[p] -= sfx * d2dx * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF-1, jF, kF, ex)]);
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran 主循环:
* do k=1,ex(3)-1
* do j=1,ex(2)-1
* do i=1,ex(1)-1
*
* 转成 C 0-based
* k0 = 0..ex3-2, j0 = 0..ex2-2, i0 = 0..ex1-2
*
* 并且 Fortran 里的 i/j/k 在 fh 访问时,仍然是 Fortran 索引值:
* iF=i0+1, jF=j0+1, kF=k0+1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// ---------------- x direction ----------------
const double sfx = Sfx[p];
if (sfx > ZEO) {
// Fortran: if(i+3 <= imax)
// iF+3 <= ex1 <=> i0+4 <= ex1 <=> i0 <= ex1-4
if (i0 <= ex1 - 4) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
/* y-direction */
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0 <= ex2-3)
f_rhs[p] += sfy * d2dy * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF, jF+1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF+2, kF, ex)]);
else if (j0 <= ex2-2)
f_rhs[p] += sfy * d2dy * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF+1, kF, ex)]);
} else if (sfy < ZEO) {
if ((j0-1) >= jminF)
f_rhs[p] -= sfy * d2dy * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF, jF-1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF-2, kF, ex)]);
else if (j0 >= jminF)
f_rhs[p] -= sfy * d2dy * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF-1, kF, ex)]);
// elseif(i+2 <= imax) <=> i0 <= ex1-3
else if (i0 <= ex1 - 3) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i+1 <= imax) <=> i0 <= ex1-2循环里总成立
else if (i0 <= ex1 - 2) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
} else if (sfx < ZEO) {
// Fortran: if(i-3 >= imin)
// (iF-3) >= iminF <=> (i0-2) >= iminF
if ((i0 - 2) >= iminF) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
// elseif(i-2 >= imin) <=> (i0-1) >= iminF
else if ((i0 - 1) >= iminF) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i-1 >= imin) <=> i0 >= iminF
else if (i0 >= iminF) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
}
/* z-direction */
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3-3)
f_rhs[p] += sfz * d2dz * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF, jF, kF+1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF+2, ex)]);
else if (k0 <= ex3-2)
f_rhs[p] += sfz * d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF+1, ex)]);
} else if (sfz < ZEO) {
if ((k0-1) >= kminF)
f_rhs[p] -= sfz * d2dz * (
-F3*fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
F4*fh[idx_fh_F_ord2(iF, jF, kF-1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF-2, ex)]);
else if (k0 >= kminF)
f_rhs[p] -= sfz * d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF-1, ex)]);
// ---------------- y direction ----------------
const double sfy = Sfy[p];
if (sfy > ZEO) {
// jF+3 <= ex2 <=> j0+4 <= ex2 <=> j0 <= ex2-4
if (j0 <= ex2 - 4) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
} else if (j0 <= ex2 - 3) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 <= ex2 - 2) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
}
} else if (sfy < ZEO) {
if ((j0 - 2) >= jminF) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
} else if ((j0 - 1) >= jminF) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 >= jminF) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
}
}
// ---------------- z direction ----------------
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3 - 4) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
} else if (k0 <= ex3 - 3) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 <= ex3 - 2) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
}
} else if (sfz < ZEO) {
if ((k0 - 2) >= kminF) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
} else if ((k0 - 1) >= kminF) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 >= kminF) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
}
}
}
}
free(fh);
return;
}
#elif (ghost_width == 3)
/* ---- 4th-order lopsided (original code) ---------------------------- */
{
const int ord = 3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
const double sfx = Sfx[p];
if (sfx > ZEO) {
if (i0 <= ex1 - 4) // i+3 <= imax
f_rhs[p] += sfx * d12dx * (
-F3 *fh[idx_fh_F(iF-1, jF, kF, ex)]
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
+F18*fh[idx_fh_F(iF+1, jF, kF, ex)]
-F6 *fh[idx_fh_F(iF+2, jF, kF, ex)]
+ fh[idx_fh_F(iF+3, jF, kF, ex)]);
else if (i0 <= ex1 - 3) // i+2 <= imax
f_rhs[p] += sfx * d12dx * (
fh[idx_fh_F(iF-2, jF, kF, ex)]
-EIT*fh[idx_fh_F(iF-1, jF, kF, ex)]
+EIT*fh[idx_fh_F(iF+1, jF, kF, ex)]
- fh[idx_fh_F(iF+2, jF, kF, ex)]);
else if (i0 <= ex1 - 2) // i+1 <= imax → mirrored
f_rhs[p] -= sfx * d12dx * (
-F3 *fh[idx_fh_F(iF+1, jF, kF, ex)]
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
+F18*fh[idx_fh_F(iF-1, jF, kF, ex)]
-F6 *fh[idx_fh_F(iF-2, jF, kF, ex)]
+ fh[idx_fh_F(iF-3, jF, kF, ex)]);
} else if (sfx < ZEO) {
if ((i0 - 2) >= iminF) // i-3 >= imin
f_rhs[p] -= sfx * d12dx * (
-F3 *fh[idx_fh_F(iF+1, jF, kF, ex)]
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
+F18*fh[idx_fh_F(iF-1, jF, kF, ex)]
-F6 *fh[idx_fh_F(iF-2, jF, kF, ex)]
+ fh[idx_fh_F(iF-3, jF, kF, ex)]);
else if ((i0 - 1) >= iminF) // i-2 >= imin
f_rhs[p] += sfx * d12dx * (
fh[idx_fh_F(iF-2, jF, kF, ex)]
-EIT*fh[idx_fh_F(iF-1, jF, kF, ex)]
+EIT*fh[idx_fh_F(iF+1, jF, kF, ex)]
- fh[idx_fh_F(iF+2, jF, kF, ex)]);
else if (i0 >= iminF) // i-1 >= imin → mirrored
f_rhs[p] += sfx * d12dx * (
-F3 *fh[idx_fh_F(iF-1, jF, kF, ex)]
-F10*fh[idx_fh_F(iF, jF, kF, ex)]
+F18*fh[idx_fh_F(iF+1, jF, kF, ex)]
-F6 *fh[idx_fh_F(iF+2, jF, kF, ex)]
+ fh[idx_fh_F(iF+3, jF, kF, ex)]);
}
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0 <= ex2-4)
f_rhs[p] += sfy * d12dy * (
-F3*fh[idx_fh_F(iF,jF-1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF+1,kF,ex)]-F6*fh[idx_fh_F(iF,jF+2,kF,ex)]
+fh[idx_fh_F(iF,jF+3,kF,ex)]);
else if (j0 <= ex2-3)
f_rhs[p] += sfy * d12dy * (fh[idx_fh_F(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F(iF,jF+1,kF,ex)]-fh[idx_fh_F(iF,jF+2,kF,ex)]);
else if (j0 <= ex2-2)
f_rhs[p] -= sfy * d12dy * (
-F3*fh[idx_fh_F(iF,jF+1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF-1,kF,ex)]-F6*fh[idx_fh_F(iF,jF-2,kF,ex)]
+fh[idx_fh_F(iF,jF-3,kF,ex)]);
} else if (sfy < ZEO) {
if ((j0-2) >= jminF)
f_rhs[p] -= sfy * d12dy * (
-F3*fh[idx_fh_F(iF,jF+1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF-1,kF,ex)]-F6*fh[idx_fh_F(iF,jF-2,kF,ex)]
+fh[idx_fh_F(iF,jF-3,kF,ex)]);
else if ((j0-1) >= jminF)
f_rhs[p] += sfy * d12dy * (fh[idx_fh_F(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F(iF,jF+1,kF,ex)]-fh[idx_fh_F(iF,jF+2,kF,ex)]);
else if (j0 >= jminF)
f_rhs[p] += sfy * d12dy * (
-F3*fh[idx_fh_F(iF,jF-1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF+1,kF,ex)]-F6*fh[idx_fh_F(iF,jF+2,kF,ex)]
+fh[idx_fh_F(iF,jF+3,kF,ex)]);
}
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3-4)
f_rhs[p] += sfz * d12dz * (
-F3*fh[idx_fh_F(iF,jF,kF-1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF,kF+1,ex)]-F6*fh[idx_fh_F(iF,jF,kF+2,ex)]
+fh[idx_fh_F(iF,jF,kF+3,ex)]);
else if (k0 <= ex3-3)
f_rhs[p] += sfz * d12dz * (fh[idx_fh_F(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F(iF,jF,kF+1,ex)]-fh[idx_fh_F(iF,jF,kF+2,ex)]);
else if (k0 <= ex3-2)
f_rhs[p] -= sfz * d12dz * (
-F3*fh[idx_fh_F(iF,jF,kF+1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF,kF-1,ex)]-F6*fh[idx_fh_F(iF,jF,kF-2,ex)]
+fh[idx_fh_F(iF,jF,kF-3,ex)]);
} else if (sfz < ZEO) {
if ((k0-2) >= kminF)
f_rhs[p] -= sfz * d12dz * (
-F3*fh[idx_fh_F(iF,jF,kF+1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF,kF-1,ex)]-F6*fh[idx_fh_F(iF,jF,kF-2,ex)]
+fh[idx_fh_F(iF,jF,kF-3,ex)]);
else if ((k0-1) >= kminF)
f_rhs[p] += sfz * d12dz * (fh[idx_fh_F(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F(iF,jF,kF+1,ex)]-fh[idx_fh_F(iF,jF,kF+2,ex)]);
else if (k0 >= kminF)
f_rhs[p] += sfz * d12dz * (
-F3*fh[idx_fh_F(iF,jF,kF-1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]
+F18*fh[idx_fh_F(iF,jF,kF+1,ex)]-F6*fh[idx_fh_F(iF,jF,kF+2,ex)]
+fh[idx_fh_F(iF,jF,kF+3,ex)]);
}
}
}
}
free(fh);
return;
}
#elif (ghost_width == 4)
/* ---- 6th-order lopsided --------------------------------------------- */
{
const int ord = 4;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -3;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -3;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -3;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
const double d60dx = ONE / F60 / dX;
const double d60dy = ONE / F60 / dY;
const double d60dz = ONE / F60 / dZ;
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* ---- x-direction ---- */
const double sfx = Sfx[p];
if (sfx > ZEO) {
/* Primary biased: 2*f(i-2)-24*f(i-1)-35*f(i)+80*f(i+1)-30*f(i+2)+8*f(i+3)-f(i+4) */
if (i0 <= ex1-5 && (i0-1)>=iminF) // i+4<=imax && i-2>=imin
f_rhs[p] += sfx * d60dx * (
+F2*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-F24*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
-F30*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]
-fh[idx_fh_F_ord4(iF+4,jF,kF,ex)]);
/* Boundary-adapted: -10*f(i-1)-77*f(i)+150*f(i+1)-100*f(i+2)+50*f(i+3)-15*f(i+4)+2*f(i+5) */
else if (i0 <= ex1-6 && i0 >= iminF) // i+5<=imax && i-1>=imin
f_rhs[p] += sfx * d60dx * (
-F10*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]
+F150*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F100*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]
+F50*fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]-F15*fh[idx_fh_F_ord4(iF+4,jF,kF,ex)]
+F2*fh[idx_fh_F_ord4(iF+5,jF,kF,ex)]);
/* Centered fallbacks */
else if (i0 <= ex1-4 && (i0-2)>=iminF) // 6th: i+3<=imax && i-3>=imin
f_rhs[p] += sfx * d60dx * (
-fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]
-F45*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
-F9*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]);
else if (i0 <= ex1-3 && (i0-1)>=iminF) // 4th
f_rhs[p] += sfx * d12dx * (
fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
+EIT*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]);
else if (i0 <= ex1-2 && i0>=iminF) // 2nd
f_rhs[p] += sfx * d2dx * (
-fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
} else if (sfx < ZEO) {
if ((i0-3)>=iminF && i0<=ex1-3) // i-4>=imin && i+2<=imax
f_rhs[p] -= sfx * d60dx * (
+F2*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]-F24*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
-F30*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]
-fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]);
else if ((i0-4)>=iminF && i0<=ex1-2) // i-5>=imin && i+1<=imax
f_rhs[p] -= sfx * d60dx * (
-F10*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]
+F150*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F100*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]
+F50*fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]-F15*fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]
+F2*fh[idx_fh_F_ord4(iF-5,jF,kF,ex)]);
else if ((i0-2)>=iminF && i0<=ex1-4) // 6th centered
f_rhs[p] += sfx * d60dx * (
-fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]
-F45*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]
-F9*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]);
else if ((i0-1)>=iminF && i0<=ex1-3) // 4th
f_rhs[p] += sfx * d12dx * (
fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]
+EIT*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]);
else if (i0>=iminF && i0<=ex1-2) // 2nd
f_rhs[p] += sfx * d2dx * (
-fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
}
/* ---- y-direction ---- */
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0<=ex2-5 && (j0-1)>=jminF)
f_rhs[p] += sfy * d60dy*(F2*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F24*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F30*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+4,kF,ex)]);
else if (j0<=ex2-6 && j0>=jminF)
f_rhs[p] += sfy * d60dy*(-F10*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F100*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+F50*fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]-F15*fh[idx_fh_F_ord4(iF,jF+4,kF,ex)]+F2*fh[idx_fh_F_ord4(iF,jF+5,kF,ex)]);
else if (j0<=ex2-4 && (j0-2)>=jminF)
f_rhs[p] += sfy * d60dy*(-fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]);
else if (j0<=ex2-3 && (j0-1)>=jminF)
f_rhs[p] += sfy * d12dy*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]);
else if (j0<=ex2-2 && j0>=jminF)
f_rhs[p] += sfy * d2dy*(-fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
} else if (sfy < ZEO) {
if ((j0-3)>=jminF && j0<=ex2-3)
f_rhs[p] -= sfy * d60dy*(F2*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]-F24*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F30*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]-fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]);
else if ((j0-4)>=jminF && j0<=ex2-2)
f_rhs[p] -= sfy * d60dy*(-F10*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F100*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+F50*fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]-F15*fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]+F2*fh[idx_fh_F_ord4(iF,jF-5,kF,ex)]);
else if ((j0-2)>=jminF && j0<=ex2-4)
f_rhs[p] += sfy * d60dy*(-fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]);
else if ((j0-1)>=jminF && j0<=ex2-3)
f_rhs[p] += sfy * d12dy*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]);
else if (j0>=jminF && j0<=ex2-2)
f_rhs[p] += sfy * d2dy*(-fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
}
/* ---- z-direction ---- */
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0<=ex3-5 && (k0-1)>=kminF)
f_rhs[p] += sfz * d60dz*(F2*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F24*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F30*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+4,ex)]);
else if (k0<=ex3-6 && k0>=kminF)
f_rhs[p] += sfz * d60dz*(-F10*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F100*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+F50*fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]-F15*fh[idx_fh_F_ord4(iF,jF,kF+4,ex)]+F2*fh[idx_fh_F_ord4(iF,jF,kF+5,ex)]);
else if (k0<=ex3-4 && (k0-2)>=kminF)
f_rhs[p] += sfz * d60dz*(-fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]);
else if (k0<=ex3-3 && (k0-1)>=kminF)
f_rhs[p] += sfz * d12dz*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]);
else if (k0<=ex3-2 && k0>=kminF)
f_rhs[p] += sfz * d2dz*(-fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
} else if (sfz < ZEO) {
if ((k0-3)>=kminF && k0<=ex3-3)
f_rhs[p] -= sfz * d60dz*(F2*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]-F24*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F30*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]-fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]);
else if ((k0-4)>=kminF && k0<=ex3-2)
f_rhs[p] -= sfz * d60dz*(-F10*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F100*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+F50*fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]-F15*fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]+F2*fh[idx_fh_F_ord4(iF,jF,kF-5,ex)]);
else if ((k0-2)>=kminF && k0<=ex3-4)
f_rhs[p] += sfz * d60dz*(-fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]);
else if ((k0-1)>=kminF && k0<=ex3-3)
f_rhs[p] += sfz * d12dz*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]);
else if (k0>=kminF && k0<=ex3-2)
f_rhs[p] += sfz * d2dz*(-fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
}
}
}
}
free(fh);
return;
}
#elif (ghost_width == 5)
/* ---- 8th-order lopsided --------------------------------------------- */
{
const int ord = 5;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -4;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -4;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -4;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
const size_t fh_size = nx * ny * nz;
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
const double d840dx = ONE / F840 / dX;
const double d840dy = ONE / F840 / dY;
const double d840dz = ONE / F840 / dZ;
const double d60dx = ONE / F60 / dX;
const double d60dy = ONE / F60 / dY;
const double d60dz = ONE / F60 / dZ;
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
const double sfx = Sfx[p];
if (sfx > ZEO) {
/* 8th biased: -5*f(i-3)+60*f(i-2)-420*f(i-1)-378*f(i)+1050*f(i+1)-420*f(i+2)+140*f(i+3)-30*f(i+4)+3*f(i+5) */
if (i0 <= ex1-6 && (i0-2)>=iminF) // i+5<=imax && i-3>=imin
f_rhs[p] += sfx * d840dx * (
-F5*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F60*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
-F420*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]
+F1050*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F420*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
+F140*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F30*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]
+F3*fh[idx_fh_F_ord5(iF+5,jF,kF,ex)]);
/* 8th centered: +3*f(i-4)-32*f(i-3)+168*f(i-2)-672*f(i-1)+672*f(i+1)-168*f(i+2)+32*f(i+3)-3*f(i+4) */
else if (i0 <= ex1-5 && (i0-3)>=iminF)
f_rhs[p] += sfx * d840dx * (
+F3*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]-F32*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]
+F168*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-F672*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
+F672*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F168*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
+F32*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F3*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]);
else if (i0 <= ex1-4 && (i0-2)>=iminF) // 6th centered
f_rhs[p] += sfx * d60dx * (
-fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
-F45*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]
-F9*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]);
else if (i0 <= ex1-3 && (i0-1)>=iminF) // 4th centered
f_rhs[p] += sfx * d12dx * (
fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
+EIT*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]);
else if (i0 <= ex1-2 && i0>=iminF) // 2nd centered
f_rhs[p] += sfx * d2dx * (
-fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]);
} else if (sfx < ZEO) {
if ((i0-4)>=iminF && i0<=ex1-4)
f_rhs[p] -= sfx * d840dx * (
-F5*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]+F60*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
-F420*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]
+F1050*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]-F420*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
+F140*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]-F30*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]
+F3*fh[idx_fh_F_ord5(iF-5,jF,kF,ex)]);
else if ((i0-3)>=iminF && i0<=ex1-5) // 8th centered
f_rhs[p] += sfx * d840dx * (
+F3*fh[idx_fh_F_ord5(iF-4,jF,kF,ex)]-F32*fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]
+F168*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-F672*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
+F672*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-F168*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]
+F32*fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]-F3*fh[idx_fh_F_ord5(iF+4,jF,kF,ex)]);
else if ((i0-2)>=iminF && i0<=ex1-4) // 6th centered
f_rhs[p] += sfx * d60dx * (
-fh[idx_fh_F_ord5(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]
-F45*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]
-F9*fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+3,jF,kF,ex)]);
else if ((i0-1)>=iminF && i0<=ex1-3) // 4th centered
f_rhs[p] += sfx * d12dx * (
fh[idx_fh_F_ord5(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]
+EIT*fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord5(iF+2,jF,kF,ex)]);
else if (i0>=iminF && i0<=ex1-2) // 2nd centered
f_rhs[p] += sfx * d2dx * (
-fh[idx_fh_F_ord5(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord5(iF+1,jF,kF,ex)]);
}
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0<=ex2-6 && (j0-2)>=jminF)
f_rhs[p] += sfy * d840dy*(-F5*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F60*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F140*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F30*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]+F3*fh[idx_fh_F_ord5(iF,jF+5,kF,ex)]);
else if (j0<=ex2-5 && (j0-3)>=jminF)
f_rhs[p] += sfy * d840dy*(+F3*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]-F32*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F168*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F672*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F672*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F168*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F32*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F3*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]);
else if (j0<=ex2-4 && (j0-2)>=jminF)
f_rhs[p] += sfy * d60dy*(-fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]);
else if (j0<=ex2-3 && (j0-1)>=jminF)
f_rhs[p] += sfy * d12dy*(fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]);
else if (j0<=ex2-2 && j0>=jminF)
f_rhs[p] += sfy * d2dy*(-fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]);
} else if (sfy < ZEO) {
if ((j0-4)>=jminF && j0<=ex2-4)
f_rhs[p] -= sfy * d840dy*(-F5*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]+F60*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]-F420*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]+F140*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]-F30*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]+F3*fh[idx_fh_F_ord5(iF,jF-5,kF,ex)]);
else if ((j0-3)>=jminF && j0<=ex2-5)
f_rhs[p] += sfy * d840dy*(+F3*fh[idx_fh_F_ord5(iF,jF-4,kF,ex)]-F32*fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F168*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F672*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F672*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F168*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+F32*fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]-F3*fh[idx_fh_F_ord5(iF,jF+4,kF,ex)]);
else if ((j0-2)>=jminF && j0<=ex2-4)
f_rhs[p] += sfy * d60dy*(-fh[idx_fh_F_ord5(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+3,kF,ex)]);
else if ((j0-1)>=jminF && j0<=ex2-3)
f_rhs[p] += sfy * d12dy*(fh[idx_fh_F_ord5(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord5(iF,jF+2,kF,ex)]);
else if (j0>=jminF && j0<=ex2-2)
f_rhs[p] += sfy * d2dy*(-fh[idx_fh_F_ord5(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord5(iF,jF+1,kF,ex)]);
}
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0<=ex3-6 && (k0-2)>=kminF)
f_rhs[p] += sfz * d840dz*(-F5*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F60*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F140*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F30*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]+F3*fh[idx_fh_F_ord5(iF,jF,kF+5,ex)]);
else if (k0<=ex3-5 && (k0-3)>=kminF)
f_rhs[p] += sfz * d840dz*(+F3*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]-F32*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F168*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F672*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F672*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F168*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F32*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F3*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]);
else if (k0<=ex3-4 && (k0-2)>=kminF)
f_rhs[p] += sfz * d60dz*(-fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]);
else if (k0<=ex3-3 && (k0-1)>=kminF)
f_rhs[p] += sfz * d12dz*(fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]);
else if (k0<=ex3-2 && k0>=kminF)
f_rhs[p] += sfz * d2dz*(-fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]);
} else if (sfz < ZEO) {
if ((k0-4)>=kminF && k0<=ex3-4)
f_rhs[p] -= sfz * d840dz*(-F5*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]+F60*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F378*fh[idx_fh_F_ord5(iF,jF,kF,ex)]+F1050*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]-F420*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]+F140*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]-F30*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]+F3*fh[idx_fh_F_ord5(iF,jF,kF-5,ex)]);
else if ((k0-3)>=kminF && k0<=ex3-5)
f_rhs[p] += sfz * d840dz*(+F3*fh[idx_fh_F_ord5(iF,jF,kF-4,ex)]-F32*fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F168*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F672*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F672*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F168*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+F32*fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]-F3*fh[idx_fh_F_ord5(iF,jF,kF+4,ex)]);
else if ((k0-2)>=kminF && k0<=ex3-4)
f_rhs[p] += sfz * d60dz*(-fh[idx_fh_F_ord5(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+3,ex)]);
else if ((k0-1)>=kminF && k0<=ex3-3)
f_rhs[p] += sfz * d12dz*(fh[idx_fh_F_ord5(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord5(iF,jF,kF+2,ex)]);
else if (k0>=kminF && k0<=ex3-2)
f_rhs[p] += sfz * d2dz*(-fh[idx_fh_F_ord5(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord5(iF,jF,kF+1,ex)]);
}
}
}
}
free(fh);
return;
}
#else
#error "lopsided_c.C: unsupported ghost_width (must be 2, 3, 4, or 5)"
#endif
free(fh);
}

View File

@@ -1,17 +1,8 @@
#include "macrodef.h"
#include "tool.h"
/*
* C 版 lopsided_kodis — combined upwind advection + KO dissipation.
* Uses one shared symmetry_bd buffer (ord = ghost_width for both components)
* where a stable merged stencil is available. The 8th-order path delegates to
* the separate lopsided + kodis kernels, matching the original Fortran flow.
*
* FD order selection via ghost_width:
* 2 → 2nd-order advection + r=2 KO (cof=16, sign=-)
* 3 → 4th-order advection + r=3 KO (cof=64, sign=+)
* 4 → 6th-order advection + r=4 KO (cof=256, sign=-)
* 5 → 8th-order advection + r=5 KO (cof=1024, sign=+)
* Combined advection (lopsided) + KO dissipation (kodis).
* Uses one shared symmetry_bd buffer per call.
*/
void lopsided_kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
@@ -19,286 +10,239 @@ void lopsided_kodis(const int ex[3],
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3], double eps)
{
const double ZEO = 0.0, ONE = 1.0;
const double TWO = 2.0, F6 = 6.0, EIT = 8.0;
const double F3 = 3.0, F4 = 4.0, F5 = 5.0, F10 = 10.0, F12 = 12.0, F18 = 18.0;
const double F9 = 9.0, F45 = 45.0, F60 = 60.0;
const double F2 = 2.0, F15 = 15.0, F24 = 24.0, F30 = 30.0, F35 = 35.0;
const double F50 = 50.0, F77 = 77.0, F80 = 80.0, F100 = 100.0, F150 = 150.0;
const double F32 = 32.0, F168 = 168.0, F672 = 672.0, F840 = 840.0;
const double F140=140.0, F378=378.0, F420=420.0, F1050=1050.0;
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
const double F6 = 6.0, F18 = 18.0;
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
const double SIX = 6.0, FIT = 15.0, TWT = 20.0;
const double cof = 64.0; // 2^6
const int NO_SYMM = 0, EQ_SYMM = 1;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const int imaxF = ex1, jmaxF = ex2, kmaxF = ex3;
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
#if (ghost_width == 2)
{
const int ord = 2;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
double *fh = (double*)malloc(nx*ny*nz*sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
const double d2dx = ONE/TWO/dX, d2dy = ONE/TWO/dY, d2dz = ONE/TWO/dZ;
// fh for Fortran-style domain (-2:ex1,-2:ex2,-2:ex3)
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
/* ---- advection (2nd-order) ---- */
for (int k0 = 0; k0 <= ex3-2; ++k0) {
const int kF = k0+1;
for (int j0 = 0; j0 <= ex2-2; ++j0) {
const int jF = j0+1;
for (int i0 = 0; i0 <= ex1-2; ++i0) {
const int iF = i0+1;
const size_t p = idx_ex(i0,j0,k0,ex);
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
const double sfx = Sfx[p];
if (sfx > ZEO) {
if (i0<=ex1-3) f_rhs[p] += sfx*d2dx*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord2(iF+2,jF,kF,ex)]);
else if (i0<=ex1-2) f_rhs[p] += sfx*d2dx*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF+1,jF,kF,ex)]);
} else if (sfx < ZEO) {
if ((i0-1)>=iminF) f_rhs[p] -= sfx*d2dx*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF-1,jF,kF,ex)]-fh[idx_fh_F_ord2(iF-2,jF,kF,ex)]);
else if (i0>=iminF) f_rhs[p] -= sfx*d2dx*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF-1,jF,kF,ex)]);
symmetry_bd(3, ex, f, fh, SoA);
// Advection (same stencil logic as lopsided_c.C)
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
const double sfx = Sfx[p];
if (sfx > ZEO) {
if (i0 <= ex1 - 4) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
} else if (i0 <= ex1 - 3) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
} else if (i0 <= ex1 - 2) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0<=ex2-3) f_rhs[p] += sfy*d2dy*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord2(iF,jF+2,kF,ex)]);
else if (j0<=ex2-2) f_rhs[p] += sfy*d2dy*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF,jF+1,kF,ex)]);
} else if (sfy < ZEO) {
if ((j0-1)>=jminF) f_rhs[p] -= sfy*d2dy*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF,jF-1,kF,ex)]-fh[idx_fh_F_ord2(iF,jF-2,kF,ex)]);
else if (j0>=jminF) f_rhs[p] -= sfy*d2dy*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF,jF-1,kF,ex)]);
} else if (sfx < ZEO) {
if ((i0 - 2) >= iminF) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
} else if ((i0 - 1) >= iminF) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
} else if (i0 >= iminF) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0<=ex3-3) f_rhs[p] += sfz*d2dz*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord2(iF,jF,kF+2,ex)]);
else if (k0<=ex3-2) f_rhs[p] += sfz*d2dz*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF,jF,kF+1,ex)]);
} else if (sfz < ZEO) {
if ((k0-1)>=kminF) f_rhs[p] -= sfz*d2dz*(-F3*fh[idx_fh_F_ord2(iF,jF,kF,ex)]+F4*fh[idx_fh_F_ord2(iF,jF,kF-1,ex)]-fh[idx_fh_F_ord2(iF,jF,kF-2,ex)]);
else if (k0>=kminF) f_rhs[p] -= sfz*d2dz*(-fh[idx_fh_F_ord2(iF,jF,kF,ex)]+fh[idx_fh_F_ord2(iF,jF,kF-1,ex)]);
}
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0 <= ex2 - 4) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
} else if (j0 <= ex2 - 3) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 <= ex2 - 2) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
}
} else if (sfy < ZEO) {
if ((j0 - 2) >= jminF) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
} else if ((j0 - 1) >= jminF) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 >= jminF) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
}
}
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3 - 4) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
} else if (k0 <= ex3 - 3) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 <= ex3 - 2) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
}
} else if (sfz < ZEO) {
if ((k0 - 2) >= kminF) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
} else if ((k0 - 1) >= kminF) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 >= kminF) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
}
}
}
}
/* ---- KO dissipation (r=2, cof=16, sign=-) ---- */
if (eps > ZEO) {
const double cof = 16.0;
const double F4k = 4.0, F6k = 6.0;
const int i0_lo = (iminF+1>0)?iminF+1:0, j0_lo=(jminF+1>0)?jminF+1:0, k0_lo=(kminF+1>0)?kminF+1:0;
const int i0_hi=imaxF-3, j0_hi=jmaxF-3, k0_hi=kmaxF-3;
if (!(i0_lo>i0_hi||j0_lo>j0_hi||k0_lo>k0_hi)) {
for (int k0=k0_lo;k0<=k0_hi;++k0) { const int kF=k0+1;
for (int j0=j0_lo;j0<=j0_hi;++j0) { const int jF=j0+1;
for (int i0=i0_lo;i0<=i0_hi;++i0) { const int iF=i0+1;
const size_t p=idx_ex(i0,j0,k0,ex);
const double Dx=((fh[idx_fh_F_ord2(iF-2,jF,kF,ex)]+fh[idx_fh_F_ord2(iF+2,jF,kF,ex)])-F4k*(fh[idx_fh_F_ord2(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord2(iF+1,jF,kF,ex)])+F6k*fh[idx_fh_F_ord2(iF,jF,kF,ex)])/dX;
const double Dy=((fh[idx_fh_F_ord2(iF,jF-2,kF,ex)]+fh[idx_fh_F_ord2(iF,jF+2,kF,ex)])-F4k*(fh[idx_fh_F_ord2(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord2(iF,jF+1,kF,ex)])+F6k*fh[idx_fh_F_ord2(iF,jF,kF,ex)])/dY;
const double Dz=((fh[idx_fh_F_ord2(iF,jF,kF-2,ex)]+fh[idx_fh_F_ord2(iF,jF,kF+2,ex)])-F4k*(fh[idx_fh_F_ord2(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord2(iF,jF,kF+1,ex)])+F6k*fh[idx_fh_F_ord2(iF,jF,kF,ex)])/dZ;
f_rhs[p] -= (eps/cof)*(Dx+Dy+Dz);
}}}
}
}
free(fh);
return;
}
#elif (ghost_width == 3)
/* ---- 4th-order advection + r=3 KO (original code) ----------------- */
{
const int ord = 3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
double *fh = (double*)malloc(nx*ny*nz*sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
// KO dissipation (same domain restriction as kodiss_c.C)
if (eps > ZEO) {
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
const int i0_hi = imaxF - 4; // inclusive
const int j0_hi = jmaxF - 4;
const int k0_hi = kmaxF - 4;
const double d12dx = ONE/F12/dX, d12dy = ONE/F12/dY, d12dz = ONE/F12/dZ;
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* ---- advection ---- */
for (int k0 = 0; k0 <= ex3-2; ++k0) {
const int kF = k0+1;
for (int j0 = 0; j0 <= ex2-2; ++j0) {
const int jF = j0+1;
for (int i0 = 0; i0 <= ex1-2; ++i0) {
const int iF = i0+1;
const size_t p = idx_ex(i0,j0,k0,ex);
const double Dx_term =
((fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dX;
const double sfx = Sfx[p];
if (sfx > ZEO) {
if (i0 <= ex1-4)
f_rhs[p] += sfx*d12dx*(-F3*fh[idx_fh_F(iF-1,jF,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF+1,jF,kF,ex)]-F6*fh[idx_fh_F(iF+2,jF,kF,ex)]+fh[idx_fh_F(iF+3,jF,kF,ex)]);
else if (i0 <= ex1-3)
f_rhs[p] += sfx*d12dx*(fh[idx_fh_F(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F(iF-1,jF,kF,ex)]+EIT*fh[idx_fh_F(iF+1,jF,kF,ex)]-fh[idx_fh_F(iF+2,jF,kF,ex)]);
else if (i0 <= ex1-2)
f_rhs[p] -= sfx*d12dx*(-F3*fh[idx_fh_F(iF+1,jF,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF-1,jF,kF,ex)]-F6*fh[idx_fh_F(iF-2,jF,kF,ex)]+fh[idx_fh_F(iF-3,jF,kF,ex)]);
} else if (sfx < ZEO) {
if ((i0-2) >= iminF)
f_rhs[p] -= sfx*d12dx*(-F3*fh[idx_fh_F(iF+1,jF,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF-1,jF,kF,ex)]-F6*fh[idx_fh_F(iF-2,jF,kF,ex)]+fh[idx_fh_F(iF-3,jF,kF,ex)]);
else if ((i0-1) >= iminF)
f_rhs[p] += sfx*d12dx*(fh[idx_fh_F(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F(iF-1,jF,kF,ex)]+EIT*fh[idx_fh_F(iF+1,jF,kF,ex)]-fh[idx_fh_F(iF+2,jF,kF,ex)]);
else if (i0 >= iminF)
f_rhs[p] += sfx*d12dx*(-F3*fh[idx_fh_F(iF-1,jF,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF+1,jF,kF,ex)]-F6*fh[idx_fh_F(iF+2,jF,kF,ex)]+fh[idx_fh_F(iF+3,jF,kF,ex)]);
}
const double sfy = Sfy[p];
if (sfy > ZEO) {
if (j0<=ex2-4) f_rhs[p] += sfy*d12dy*(-F3*fh[idx_fh_F(iF,jF-1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF+1,kF,ex)]-F6*fh[idx_fh_F(iF,jF+2,kF,ex)]+fh[idx_fh_F(iF,jF+3,kF,ex)]);
else if (j0<=ex2-3) f_rhs[p] += sfy*d12dy*(fh[idx_fh_F(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F(iF,jF+1,kF,ex)]-fh[idx_fh_F(iF,jF+2,kF,ex)]);
else if (j0<=ex2-2) f_rhs[p] -= sfy*d12dy*(-F3*fh[idx_fh_F(iF,jF+1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF-1,kF,ex)]-F6*fh[idx_fh_F(iF,jF-2,kF,ex)]+fh[idx_fh_F(iF,jF-3,kF,ex)]);
} else if (sfy < ZEO) {
if ((j0-2)>=jminF) f_rhs[p] -= sfy*d12dy*(-F3*fh[idx_fh_F(iF,jF+1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF-1,kF,ex)]-F6*fh[idx_fh_F(iF,jF-2,kF,ex)]+fh[idx_fh_F(iF,jF-3,kF,ex)]);
else if ((j0-1)>=jminF) f_rhs[p] += sfy*d12dy*(fh[idx_fh_F(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F(iF,jF+1,kF,ex)]-fh[idx_fh_F(iF,jF+2,kF,ex)]);
else if (j0>=jminF) f_rhs[p] += sfy*d12dy*(-F3*fh[idx_fh_F(iF,jF-1,kF,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF+1,kF,ex)]-F6*fh[idx_fh_F(iF,jF+2,kF,ex)]+fh[idx_fh_F(iF,jF+3,kF,ex)]);
}
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0<=ex3-4) f_rhs[p] += sfz*d12dz*(-F3*fh[idx_fh_F(iF,jF,kF-1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF,kF+1,ex)]-F6*fh[idx_fh_F(iF,jF,kF+2,ex)]+fh[idx_fh_F(iF,jF,kF+3,ex)]);
else if (k0<=ex3-3) f_rhs[p] += sfz*d12dz*(fh[idx_fh_F(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F(iF,jF,kF+1,ex)]-fh[idx_fh_F(iF,jF,kF+2,ex)]);
else if (k0<=ex3-2) f_rhs[p] -= sfz*d12dz*(-F3*fh[idx_fh_F(iF,jF,kF+1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF,kF-1,ex)]-F6*fh[idx_fh_F(iF,jF,kF-2,ex)]+fh[idx_fh_F(iF,jF,kF-3,ex)]);
} else if (sfz < ZEO) {
if ((k0-2)>=kminF) f_rhs[p] -= sfz*d12dz*(-F3*fh[idx_fh_F(iF,jF,kF+1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF,kF-1,ex)]-F6*fh[idx_fh_F(iF,jF,kF-2,ex)]+fh[idx_fh_F(iF,jF,kF-3,ex)]);
else if ((k0-1)>=kminF) f_rhs[p] += sfz*d12dz*(fh[idx_fh_F(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F(iF,jF,kF+1,ex)]-fh[idx_fh_F(iF,jF,kF+2,ex)]);
else if (k0>=kminF) f_rhs[p] += sfz*d12dz*(-F3*fh[idx_fh_F(iF,jF,kF-1,ex)]-F10*fh[idx_fh_F(iF,jF,kF,ex)]+F18*fh[idx_fh_F(iF,jF,kF+1,ex)]-F6*fh[idx_fh_F(iF,jF,kF+2,ex)]+fh[idx_fh_F(iF,jF,kF+3,ex)]);
const double Dy_term =
((fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dY;
const double Dz_term =
((fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dZ;
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
}
}
}
}
/* ---- KO dissipation (r=3, cof=64, sign=+) ---- */
if (eps > ZEO) {
const double cof = 64.0;
const double SIX = 6.0, FIT = 15.0, TWT = 20.0;
const int i0_lo=(iminF+2>0)?iminF+2:0, j0_lo=(jminF+2>0)?jminF+2:0, k0_lo=(kminF+2>0)?kminF+2:0;
const int i0_hi=imaxF-4, j0_hi=jmaxF-4, k0_hi=kmaxF-4;
if (!(i0_lo>i0_hi||j0_lo>j0_hi||k0_lo>k0_hi)) {
for (int k0=k0_lo;k0<=k0_hi;++k0) { const int kF=k0+1;
for (int j0=j0_lo;j0<=j0_hi;++j0) { const int jF=j0+1;
for (int i0=i0_lo;i0<=i0_hi;++i0) { const int iF=i0+1;
const size_t p=idx_ex(i0,j0,k0,ex);
const double Dx=((fh[idx_fh_F(iF-3,jF,kF,ex)]+fh[idx_fh_F(iF+3,jF,kF,ex)])-SIX*(fh[idx_fh_F(iF-2,jF,kF,ex)]+fh[idx_fh_F(iF+2,jF,kF,ex)])+FIT*(fh[idx_fh_F(iF-1,jF,kF,ex)]+fh[idx_fh_F(iF+1,jF,kF,ex)])-TWT*fh[idx_fh_F(iF,jF,kF,ex)])/dX;
const double Dy=((fh[idx_fh_F(iF,jF-3,kF,ex)]+fh[idx_fh_F(iF,jF+3,kF,ex)])-SIX*(fh[idx_fh_F(iF,jF-2,kF,ex)]+fh[idx_fh_F(iF,jF+2,kF,ex)])+FIT*(fh[idx_fh_F(iF,jF-1,kF,ex)]+fh[idx_fh_F(iF,jF+1,kF,ex)])-TWT*fh[idx_fh_F(iF,jF,kF,ex)])/dY;
const double Dz=((fh[idx_fh_F(iF,jF,kF-3,ex)]+fh[idx_fh_F(iF,jF,kF+3,ex)])-SIX*(fh[idx_fh_F(iF,jF,kF-2,ex)]+fh[idx_fh_F(iF,jF,kF+2,ex)])+FIT*(fh[idx_fh_F(iF,jF,kF-1,ex)]+fh[idx_fh_F(iF,jF,kF+1,ex)])-TWT*fh[idx_fh_F(iF,jF,kF,ex)])/dZ;
f_rhs[p] += (eps/cof)*(Dx+Dy+Dz);
}}}
}
}
free(fh);
return;
}
#elif (ghost_width == 4)
{
const int ord = 4;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -3;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -3;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -3;
const size_t nx = (size_t)ex1 + ord;
const size_t ny = (size_t)ex2 + ord;
const size_t nz = (size_t)ex3 + ord;
double *fh = (double*)malloc(nx*ny*nz*sizeof(double));
if (!fh) return;
symmetry_bd(ord, ex, f, fh, SoA);
const double d60dx=ONE/F60/dX, d60dy=ONE/F60/dY, d60dz=ONE/F60/dZ;
const double d12dx=ONE/F12/dX, d12dy=ONE/F12/dY, d12dz=ONE/F12/dZ;
const double d2dx=ONE/TWO/dX, d2dy=ONE/TWO/dY, d2dz=ONE/TWO/dZ;
/* ---- advection (6th-order lopsided) ---- */
for (int k0=0;k0<=ex3-2;++k0) { const int kF=k0+1;
for (int j0=0;j0<=ex2-2;++j0) { const int jF=j0+1;
for (int i0=0;i0<=ex1-2;++i0) { const int iF=i0+1;
const size_t p=idx_ex(i0,j0,k0,ex);
/* x */
const double sfx=Sfx[p];
if (sfx>ZEO) {
if (i0<=ex1-5&&(i0-1)>=iminF) f_rhs[p]+=sfx*d60dx*(+F2*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-F24*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F30*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+4,jF,kF,ex)]);
else if (i0<=ex1-6&&i0>=iminF) f_rhs[p]+=sfx*d60dx*(-F10*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F100*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+F50*fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]-F15*fh[idx_fh_F_ord4(iF+4,jF,kF,ex)]+F2*fh[idx_fh_F_ord4(iF+5,jF,kF,ex)]);
else if (i0<=ex1-4&&(i0-2)>=iminF) f_rhs[p]+=sfx*d60dx*(-fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-F45*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F9*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]);
else if (i0<=ex1-3&&(i0-1)>=iminF) f_rhs[p]+=sfx*d12dx*(fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]);
else if (i0<=ex1-2&&i0>=iminF) f_rhs[p]+=sfx*d2dx*(-fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
} else if (sfx<ZEO) {
if ((i0-3)>=iminF&&i0<=ex1-3) f_rhs[p]-=sfx*d60dx*(+F2*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]-F24*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F30*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]-fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]);
else if ((i0-4)>=iminF&&i0<=ex1-2) f_rhs[p]-=sfx*d60dx*(-F10*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]-F100*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]+F50*fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]-F15*fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]+F2*fh[idx_fh_F_ord4(iF-5,jF,kF,ex)]);
else if ((i0-2)>=iminF&&i0<=ex1-4) f_rhs[p]+=sfx*d60dx*(-fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+F9*fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-F45*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+F45*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-F9*fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)]);
else if ((i0-1)>=iminF&&i0<=ex1-3) f_rhs[p]+=sfx*d12dx*(fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]-fh[idx_fh_F_ord4(iF+2,jF,kF,ex)]);
else if (i0>=iminF&&i0<=ex1-2) f_rhs[p]+=sfx*d2dx*(-fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)]);
}
/* y */
const double sfy=Sfy[p];
if (sfy>ZEO) {
if (j0<=ex2-5&&(j0-1)>=jminF) f_rhs[p]+=sfy*d60dy*(F2*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F24*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F30*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+4,kF,ex)]);
else if (j0<=ex2-6&&j0>=jminF) f_rhs[p]+=sfy*d60dy*(-F10*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F100*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+F50*fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]-F15*fh[idx_fh_F_ord4(iF,jF+4,kF,ex)]+F2*fh[idx_fh_F_ord4(iF,jF+5,kF,ex)]);
else if (j0<=ex2-4&&(j0-2)>=jminF) f_rhs[p]+=sfy*d60dy*(-fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]);
else if (j0<=ex2-3&&(j0-1)>=jminF) f_rhs[p]+=sfy*d12dy*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]);
else if (j0<=ex2-2&&j0>=jminF) f_rhs[p]+=sfy*d2dy*(-fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
} else if (sfy<ZEO) {
if ((j0-3)>=jminF&&j0<=ex2-3) f_rhs[p]-=sfy*d60dy*(F2*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]-F24*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F30*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]-fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]);
else if ((j0-4)>=jminF&&j0<=ex2-2) f_rhs[p]-=sfy*d60dy*(-F10*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]-F100*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+F50*fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]-F15*fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]+F2*fh[idx_fh_F_ord4(iF,jF-5,kF,ex)]);
else if ((j0-2)>=jminF&&j0<=ex2-4) f_rhs[p]+=sfy*d60dy*(-fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+F9*fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-F45*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+F45*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-F9*fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)]);
else if ((j0-1)>=jminF&&j0<=ex2-3) f_rhs[p]+=sfy*d12dy*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]-fh[idx_fh_F_ord4(iF,jF+2,kF,ex)]);
else if (j0>=jminF&&j0<=ex2-2) f_rhs[p]+=sfy*d2dy*(-fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)]);
}
/* z */
const double sfz=Sfz[p];
if (sfz>ZEO) {
if (k0<=ex3-5&&(k0-1)>=kminF) f_rhs[p]+=sfz*d60dz*(F2*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F24*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F30*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+4,ex)]);
else if (k0<=ex3-6&&k0>=kminF) f_rhs[p]+=sfz*d60dz*(-F10*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F100*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+F50*fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]-F15*fh[idx_fh_F_ord4(iF,jF,kF+4,ex)]+F2*fh[idx_fh_F_ord4(iF,jF,kF+5,ex)]);
else if (k0<=ex3-4&&(k0-2)>=kminF) f_rhs[p]+=sfz*d60dz*(-fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]);
else if (k0<=ex3-3&&(k0-1)>=kminF) f_rhs[p]+=sfz*d12dz*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]);
else if (k0<=ex3-2&&k0>=kminF) f_rhs[p]+=sfz*d2dz*(-fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
} else if (sfz<ZEO) {
if ((k0-3)>=kminF&&k0<=ex3-3) f_rhs[p]-=sfz*d60dz*(F2*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]-F24*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F35*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F80*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F30*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]-fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]);
else if ((k0-4)>=kminF&&k0<=ex3-2) f_rhs[p]-=sfz*d60dz*(-F10*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F77*fh[idx_fh_F_ord4(iF,jF,kF,ex)]+F150*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]-F100*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+F50*fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]-F15*fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]+F2*fh[idx_fh_F_ord4(iF,jF,kF-5,ex)]);
else if ((k0-2)>=kminF&&k0<=ex3-4) f_rhs[p]+=sfz*d60dz*(-fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+F9*fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-F45*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+F45*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-F9*fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)]);
else if ((k0-1)>=kminF&&k0<=ex3-3) f_rhs[p]+=sfz*d12dz*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]-EIT*fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+EIT*fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]-fh[idx_fh_F_ord4(iF,jF,kF+2,ex)]);
else if (k0>=kminF&&k0<=ex3-2) f_rhs[p]+=sfz*d2dz*(-fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)]);
}
}}}
/* ---- KO dissipation (r=4, cof=256, sign=-) ---- */
if (eps > ZEO) {
const double cof = 256.0;
const double F8k = 8.0, F28 = 28.0, F56 = 56.0, F70 = 70.0;
const int i0_lo=(iminF+3>0)?iminF+3:0, j0_lo=(jminF+3>0)?jminF+3:0, k0_lo=(kminF+3>0)?kminF+3:0;
const int i0_hi=imaxF-5, j0_hi=jmaxF-5, k0_hi=kmaxF-5;
if (!(i0_lo>i0_hi||j0_lo>j0_hi||k0_lo>k0_hi)) {
for (int k0=k0_lo;k0<=k0_hi;++k0) { const int kF=k0+1;
for (int j0=j0_lo;j0<=j0_hi;++j0) { const int jF=j0+1;
for (int i0=i0_lo;i0<=i0_hi;++i0) { const int iF=i0+1;
const size_t p=idx_ex(i0,j0,k0,ex);
const double Dx=((fh[idx_fh_F_ord4(iF-4,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+4,jF,kF,ex)])-F8k*(fh[idx_fh_F_ord4(iF-3,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+3,jF,kF,ex)])+F28*(fh[idx_fh_F_ord4(iF-2,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+2,jF,kF,ex)])-F56*(fh[idx_fh_F_ord4(iF-1,jF,kF,ex)]+fh[idx_fh_F_ord4(iF+1,jF,kF,ex)])+F70*fh[idx_fh_F_ord4(iF,jF,kF,ex)])/dX;
const double Dy=((fh[idx_fh_F_ord4(iF,jF-4,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+4,kF,ex)])-F8k*(fh[idx_fh_F_ord4(iF,jF-3,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+3,kF,ex)])+F28*(fh[idx_fh_F_ord4(iF,jF-2,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+2,kF,ex)])-F56*(fh[idx_fh_F_ord4(iF,jF-1,kF,ex)]+fh[idx_fh_F_ord4(iF,jF+1,kF,ex)])+F70*fh[idx_fh_F_ord4(iF,jF,kF,ex)])/dY;
const double Dz=((fh[idx_fh_F_ord4(iF,jF,kF-4,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+4,ex)])-F8k*(fh[idx_fh_F_ord4(iF,jF,kF-3,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+3,ex)])+F28*(fh[idx_fh_F_ord4(iF,jF,kF-2,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+2,ex)])-F56*(fh[idx_fh_F_ord4(iF,jF,kF-1,ex)]+fh[idx_fh_F_ord4(iF,jF,kF+1,ex)])+F70*fh[idx_fh_F_ord4(iF,jF,kF,ex)])/dZ;
f_rhs[p] -= (eps/cof)*(Dx+Dy+Dz);
}}}
}
}
free(fh);
return;
}
#elif (ghost_width == 5)
{
lopsided(ex, X, Y, Z, f, f_rhs, Sfx, Sfy, Sfz, Symmetry, SoA);
if (eps > ZEO) kodis(ex, X, Y, Z, f, f_rhs, SoA, Symmetry, eps);
return;
}
#else
#error "lopsided_kodis_c.C: unsupported ghost_width (must be 2, 3, 4, or 5)"
#endif
free(fh);
}

View File

@@ -32,24 +32,6 @@ $(error USE_CXX_ESCALAR_KERNEL=1 requires USE_CXX_KERNELS=1 because bssn_escalar
endif
endif
ifeq ($(USE_CXX_EM_KERNEL),1)
ifeq ($(ABE_TYPE),3)
EFFECTIVE_USE_CXX_EM_KERNEL = 1
else
EFFECTIVE_USE_CXX_EM_KERNEL = 0
endif
else
EFFECTIVE_USE_CXX_EM_KERNEL = 0
endif
ifeq ($(EFFECTIVE_USE_CXX_EM_KERNEL),1)
ifeq ($(USE_CXX_KERNELS),0)
$(error USE_CXX_EM_KERNEL=1 requires USE_CXX_KERNELS=1 because bssn_em_rhs_c.C reuses the C BSSN kernel)
endif
endif
EM_KERNEL_FLAG = -DBSSN_USE_EM_C_KERNEL=$(EFFECTIVE_USE_CXX_EM_KERNEL)
## polint(ordn=6) kernel selector:
## 1 (default): barycentric fast path
## 0 : fallback to Neville path
@@ -58,30 +40,12 @@ POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE)
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL)
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt)
## make -> opt (PGO-guided, maximum performance)
## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data)
PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
ifeq ($(PGO_MODE),instrument)
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG)
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
else
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
## PGO has been turned off, now tested and found to be negative optimization
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG)
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
endif
## AMD AOCC build flags optimized for EPYC Zen 4 (-march=znver4)
CXXAPPFLAGS = -O3 -march=znver4 -ffast-math -flto \
-Dfortran3 -Dnewc -I$(AOCL_ROOT)/include $(INTERP_LB_FLAGS) \
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
f90appflags = -O3 -march=znver4 -ffast-math -flto \
-cpp -I$(AOCL_ROOT)/include $(POLINT6_FLAG)
.SUFFIXES: .o .f90 .C .for .cu
@@ -91,10 +55,6 @@ endif
.C.o:
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
# ShellPatch.C uses OpenMP for setupintintstuff search loops
ShellPatch.o: ShellPatch.C
${CXX} $(CXXAPPFLAGS) $(OMP_FLAG) -c $< $(filein) -o $@
.for.o:
$(f77) -c $< -o $@
@@ -120,42 +80,18 @@ lopsided_c.o: lopsided_c.C
lopsided_kodis_c.o: lopsided_kodis_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
# C rewrite of shell-patch derivative kernels
fderivs_sh_c.o: fderivs_sh_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
fdderivs_sh_c.o: fdderivs_sh_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
fderivs_shc_c.o: fderivs_shc_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
fdderivs_shc_c.o: fdderivs_shc_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
kodiss_sh_c.o: kodiss_sh_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
bssn_em_rhs_c.o: bssn_em_rhs_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
z4c_rhs_c.o: z4c_rhs_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-fprofile-instr-use=$(TP_PROFDATA) \
-Dfortran3 -Dnewc -I${MKLROOT}/include
## TwoPunctureABE uses fixed optimal flags (AMD AOCC, no PGO)
TP_OPTFLAGS = -O3 -march=znver4 -ffast-math -flto \
-Dfortran3 -Dnewc -I$(AOCL_ROOT)/include
TwoPunctures.o: TwoPunctures.C
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
${CXX} $(TP_OPTFLAGS) -fopenmp -c $< -o $@
TwoPunctureABE.o: TwoPunctureABE.C
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
${CXX} $(TP_OPTFLAGS) -fopenmp -c $< -o $@
# Input files
@@ -169,16 +105,6 @@ CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_
ifeq ($(EFFECTIVE_USE_CXX_ESCALAR_KERNEL),1)
CFILES += bssn_escalar_rhs_c.o
endif
ifeq ($(EFFECTIVE_USE_CXX_EM_KERNEL),1)
CFILES += bssn_em_rhs_c.o
endif
endif
ifeq ($(USE_CXX_Z4C_KERNELS),1)
CFILES += z4c_rhs_c.o
Z4C_F90_OBJ =
else
Z4C_F90_OBJ = Z4c_rhs.o
endif
## RK4 kernel switch (independent from USE_CXX_KERNELS)
@@ -189,17 +115,6 @@ else
RK4_F90_OBJ = rungekutta4_rout.o
endif
## Shell-patch derivative kernel switch (independent from USE_CXX_KERNELS)
## 1 : use C++ rewrite of shell derivative functions (experimental)
## 0 : use original Fortran diff_new_sh.o and kodiss_sh.o (default)
USE_CXX_SHELL_KERNELS ?= 0
ifeq ($(USE_CXX_SHELL_KERNELS),1)
CFILES += fderivs_sh_c.o fdderivs_sh_c.o fderivs_shc_c.o fdderivs_shc_c.o kodiss_sh_c.o
SH_F90_OBJ =
else
SH_F90_OBJ = diff_new_sh.o kodiss_sh.o point_diff_new_sh.o
endif
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
@@ -217,11 +132,11 @@ C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o
F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
prolongrestrict_cell.o prolongrestrict_vertex.o\
$(RK4_F90_OBJ) diff_new.o kodiss.o\
lopsidediff.o sommerfeld_rout.o getnp4.o $(SH_F90_OBJ)\
$(RK4_F90_OBJ) diff_new.o kodiss.o kodiss_sh.o\
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\
getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\
fadmquantites_bssn.o $(Z4C_F90_OBJ) Z4c_rhs_ss.o\
fadmquantites_bssn.o Z4c_rhs.o Z4c_rhs_ss.o point_diff_new_sh.o\
cpbc.o getnp4old.o NullEvol.o initial_null.o initial_maxwell.o\
getnpem2.o empart.o NullNews.o fourdcurvature.o\
bssn2adm.o adm_constraint.o adm_ricci_gamma.o\
@@ -287,7 +202,7 @@ ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILE
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
TwoPunctureABE: $(TwoPunctureFILES)
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
$(CLINKER) $(TP_OPTFLAGS) -fopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
clean:
rm *.o ABE ABEGPU TwoPunctureABE make.log -f

View File

@@ -1,33 +1,17 @@
## GCC version (commented out)
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
## AMD AOCC version with AOCL (Optimized for AMD EPYC Zen 4)
## Intel oneAPI version with oneMKL (Optimized for performance)
filein = -I/usr/include/ -I${MKLROOT}/include
## AOCL root path for includes and libraries
AOCL_ROOT ?= /home/gh0s7/AOCC/aocl/5.2.0/aocc
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
## AOCC-built OpenMPI prefix
OMPI_PREFIX ?= /home/gh0s7/AOCC/aocc-openmpi
## Memory allocator switch
## 1 (default) : link Intel oneTBB allocator (libtbbmalloc)
## 0 : use system default allocator (ptmalloc)
USE_TBBMALLOC ?= 1
TBBMALLOC_SO ?= /home/intel/oneapi/2025.3/lib/libtbbmalloc.so
ifneq ($(wildcard $(TBBMALLOC_SO)),)
TBBMALLOC_LIBS = -Wl,--no-as-needed $(TBBMALLOC_SO) -Wl,--as-needed
else
TBBMALLOC_LIBS = -Wl,--no-as-needed -ltbbmalloc -Wl,--as-needed
endif
ifeq ($(USE_TBBMALLOC),1)
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
endif
filein = -I/usr/include/ -I$(AOCL_ROOT)/include
## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
## opt : (default) maximum performance with PGO profile-guided optimization
## instrument : PGO Phase 1 instrumentation to collect fresh profile data
PGO_MODE ?= opt
## Using AOCL BLIS + libFLAME for BLAS/LAPACK
## AOCC Fortran runtime: -lflang (includes FortranRuntime)
## AOCC OpenMP runtime: -lomp (LLVM OpenMP)
LDLIBS = -L$(AOCL_ROOT)/lib -lblis -lflame -lamdlibm -lflang -lpgmath -lpthread -lm -ldl -lomp
## Interp_Points load balance profiling mode
## off : (default) no load balance instrumentation
@@ -48,23 +32,12 @@ endif
## 0 : fall back to original Fortran kernels
USE_CXX_KERNELS ?= 1
## Z4C Cartesian RHS kernel switch
## 1 (default) : use C++ rewrite of Z4c_rhs (main Cartesian path faster)
## 0 : use original Fortran Z4c_rhs.o
USE_CXX_Z4C_KERNELS ?= 1
## BSSN-EScalar RHS switch
## 1 (default) : use BSSN-EScalar C wrapper on the normal patch path
## 0 : keep the original Fortran BSSN-EScalar RHS for precision-safe runs
## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel.
USE_CXX_ESCALAR_KERNEL ?= 1
## BSSN-EM RHS switch
## 1 : use BSSN-EM C kernel (bssn_em_rhs_c.C) on the normal patch path
## 0 : keep the original Fortran empart.f90 RHS for the EM fields (default)
## Note: experimental, requires USE_CXX_KERNELS=1
USE_CXX_EM_KERNEL ?= 0
## Cached transfer switch
## auto (default): enable for BSSN vacuum, keep other paths on the safe uncached path
## 1 : force cached Sync/Restrict/OutBd transfer on evolution hot paths
@@ -76,11 +49,11 @@ USE_TRANSFER_CACHE ?= auto
## 0 : use original Fortran rungekutta4_rout.o
USE_CXX_RK4 ?= 1
f90 = ifx
f77 = ifx
CXX = icpx
CC = icx
CLINKER = mpiicpx
f90 = flang
f77 = flang
CXX = clang++
CC = clang
CLINKER = $(OMPI_PREFIX)/bin/mpicxx
Cu = nvcc
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include

View File

@@ -46,45 +46,6 @@ static inline size_t idx_fh_F(int iF, int jF, int kF, const int ex[3]) {
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* fh 对应 Fortran: fh(0:ex1, 0:ex2, 0:ex3)
* ord=1 => shift=0
* iF/jF/kF 为 Fortran 索引 (0..ex)
*/
static inline size_t idx_fh_F_ord1(int iF, int jF, int kF, const int ex[3]) {
const int nx = ex[0] + 1; // ex1 + ord
const int ny = ex[1] + 1;
return (size_t)iF + (size_t)jF * (size_t)nx + (size_t)kF * (size_t)nx * (size_t)ny;
}
/*
* fh 对应 Fortran: fh(-3:ex1, -3:ex2, -3:ex3)
* ord=4 => shift=3
*/
static inline size_t idx_fh_F_ord4(int iF, int jF, int kF, const int ex[3]) {
const int shift = 3;
const int nx = ex[0] + 4; // ex1 + ord
const int ny = ex[1] + 4;
const int ii = iF + shift; // 0..ex1+3
const int jj = jF + shift; // 0..ex2+3
const int kk = kF + shift; // 0..ex3+3
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* fh 对应 Fortran: fh(-4:ex1, -4:ex2, -4:ex3)
* ord=5 => shift=4
*/
static inline size_t idx_fh_F_ord5(int iF, int jF, int kF, const int ex[3]) {
const int shift = 4;
const int nx = ex[0] + 5; // ex1 + ord
const int ny = ex[1] + 5;
const int ii = iF + shift; // 0..ex1+4
const int jj = jF + shift; // 0..ex2+4
const int kk = kF + shift; // 0..ex3+4
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* func: (1..extc1, 1..extc2, 1..extc3) 1-based in Fortran
* funcc: (-ord+1..extc1, -ord+1..extc2, -ord+1..extc3) in Fortran
@@ -270,10 +231,7 @@ static inline void symmetry_bd(int ord,
{
if (ord <= 0) return;
if (ord == 1) {
symmetry_bd_impl(1, 0, extc, func, funcc, SoA);
return;
}
/* Fast paths used by current C kernels: ord=2 (derivs), ord=3 (lopsided/KO). */
if (ord == 2) {
symmetry_bd_impl(2, 1, extc, func, funcc, SoA);
return;
@@ -282,91 +240,7 @@ static inline void symmetry_bd(int ord,
symmetry_bd_impl(3, 2, extc, func, funcc, SoA);
return;
}
if (ord == 4) {
symmetry_bd_impl(4, 3, extc, func, funcc, SoA);
return;
}
symmetry_bd_impl(ord, ord - 1, extc, func, funcc, SoA);
}
/*
* symmetry_stbd — shell-patch (staggered boundary) ghost fill.
*
* Fortran: funcc(-ord+1:extc1+ord, -ord+1:extc2+ord, extc3)
* Only 2 SoA values (x/y). No z symmetry fill.
* Ghost on BOTH positive and negative sides of x and y.
* Reflection uses i+2 (skips boundary) instead of i+1.
* nx = extc1 + 2*ord, ny = extc2 + 2*ord
*/
static inline void symmetry_stbd(int ord,
const int extc[3],
const double *func,
double *funcc,
const double SoA[2])
{
const int extc1 = extc[0], extc2 = extc[1], extc3 = extc[2];
const int nx = extc1 + 2 * ord;
const int ny = extc2 + 2 * ord;
const int sh = ord - 1;
const size_t snx = (size_t)nx;
const size_t splane = snx * (size_t)ny;
/* 1) Copy interior: funcc(1:extc1, 1:extc2, 1:extc3) = func */
for (int k0 = 0; k0 < extc3; ++k0) {
const double *src = func + (size_t)k0 * (size_t)extc2 * (size_t)extc1;
const size_t kbase = (size_t)k0 * splane;
for (int j0 = 0; j0 < extc2; ++j0) {
double *dst = funcc + kbase + (size_t)(sh + j0 + 1) * snx + (size_t)(sh + 1);
const double *s = src + (size_t)j0 * (size_t)extc1;
for (int i0 = 0; i0 < extc1; ++i0) dst[i0] = s[i0];
}
}
/* 2) x-direction ghost fill */
const double s1 = SoA[0];
for (int k0 = 0; k0 < extc3; ++k0) {
const size_t kbase = (size_t)k0 * splane;
for (int j0 = 0; j0 < extc2; ++j0) {
const size_t off = kbase + (size_t)(sh + j0 + 1) * snx;
/* left side: funcc(-i) = funcc(i+2) * s1 */
for (int i = 0; i < ord; ++i) {
funcc[off + (size_t)(sh - i)] = funcc[off + (size_t)(sh + i + 2)] * s1;
/* right side: funcc(extc1+1+i) = funcc(extc1-1-i) * s1 */
funcc[off + (size_t)(sh + extc1 + 1 + i)] = funcc[off + (size_t)(sh + extc1 - 1 - i)] * s1;
}
}
}
/* 3) y-direction ghost fill */
const double s2 = SoA[1];
for (int i = 0; i < nx; ++i) {
for (int k0 = 0; k0 < extc3; ++k0) {
const size_t kbase = (size_t)k0 * splane;
/* bottom: funcc(:,-i,:) = funcc(:,i+2,:) * s2 */
for (int jj = 0; jj < ord; ++jj) {
funcc[kbase + (size_t)(sh - jj) * snx + (size_t)i] =
funcc[kbase + (size_t)(sh + jj + 2) * snx + (size_t)i] * s2;
/* top: funcc(:,extc2+1+jj,:) = funcc(:,extc2-1-jj,:) * s2 */
funcc[kbase + (size_t)(sh + extc2 + 1 + jj) * snx + (size_t)i] =
funcc[kbase + (size_t)(sh + extc2 - 1 - jj) * snx + (size_t)i] * s2;
}
}
}
}
/*
* Indexing for shell fh buffer: Fortran fh(-ord+1:extc1+ord, -ord+1:extc2+ord, extc3)
* C 0-based: ii = iF + ord - 1
* nx = extc1 + 2*ord, ny = extc2 + 2*ord
*/
static inline size_t idx_fh_stbd(int iF, int jF, int kF, int ord, const int extc[3]) {
const int sh = ord - 1;
const int nx = extc[0] + 2 * ord;
const int ny = extc[1] + 2 * ord;
const int ii = iF + sh;
const int jj = jF + sh;
const int kk = kF - 1; // Fortran 1-based kF → C 0-based
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
#endif

View File

@@ -1,901 +0,0 @@
#include "macrodef.h"
#include "bssn_rhs.h"
#include "fmisc.h"
#include "ricci_gamma.h"
#include "share_func.h"
#include "tool.h"
#include <vector>
#ifdef fortran1
#define f_constraint_bssn constraint_bssn
#define f_z4c_rhs_point z4c_rhs_point
#endif
#ifdef fortran2
#define f_constraint_bssn CONSTRAINT_BSSN
#define f_z4c_rhs_point Z4C_RHS_POINT
#endif
#ifdef fortran3
#define f_constraint_bssn constraint_bssn_
#define f_z4c_rhs_point z4c_rhs_point_
#endif
extern "C" void f_constraint_bssn(int *, double *, double *, double *,
double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *,
double *, double *, double *, double *, double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *,
double *, double *, double *, double *, double *, double *, double *, double *,
double *, double *, double *,
int &);
extern "C" void f_z4c_rhs_point(
double &A11,
double &A12,
double &A13,
double &A22,
double &A23,
double &A33,
double &alpha,
double &B1,
double &B2,
double &B3,
double &beta1,
double &beta2,
double &beta3,
double &chi,
double &chiDivFloor,
double &da1,
double &dA111,
double &dA112,
double &dA113,
double &dA122,
double &dA123,
double &dA133,
double &da2,
double &dA211,
double &dA212,
double &dA213,
double &dA222,
double &dA223,
double &dA233,
double &da3,
double &dA311,
double &dA312,
double &dA313,
double &dA322,
double &dA323,
double &dA333,
double &db11,
double &dB11,
double &db12,
double &dB12,
double &db13,
double &dB13,
double &db21,
double &dB21,
double &db22,
double &dB22,
double &db23,
double &dB23,
double &db31,
double &dB31,
double &db32,
double &dB32,
double &db33,
double &dB33,
double &dchi1,
double &dchi2,
double &dchi3,
double &dda11,
double &dda12,
double &dda13,
double &dda22,
double &dda23,
double &dda33,
double &ddb111,
double &ddb112,
double &ddb113,
double &ddb121,
double &ddb122,
double &ddb123,
double &ddb131,
double &ddb132,
double &ddb133,
double &ddb221,
double &ddb222,
double &ddb223,
double &ddb231,
double &ddb232,
double &ddb233,
double &ddb331,
double &ddb332,
double &ddb333,
double &ddchi11,
double &ddchi12,
double &ddchi13,
double &ddchi22,
double &ddchi23,
double &ddchi33,
double &deldelg1111,
double &deldelg1112,
double &deldelg1113,
double &deldelg1122,
double &deldelg1123,
double &deldelg1133,
double &deldelg1211,
double &deldelg1212,
double &deldelg1213,
double &deldelg1222,
double &deldelg1223,
double &deldelg1233,
double &deldelg1311,
double &deldelg1312,
double &deldelg1313,
double &deldelg1322,
double &deldelg1323,
double &deldelg1333,
double &deldelg2211,
double &deldelg2212,
double &deldelg2213,
double &deldelg2222,
double &deldelg2223,
double &deldelg2233,
double &deldelg2311,
double &deldelg2312,
double &deldelg2313,
double &deldelg2322,
double &deldelg2323,
double &deldelg2333,
double &deldelg3311,
double &deldelg3312,
double &deldelg3313,
double &deldelg3322,
double &deldelg3323,
double &deldelg3333,
double &delG11,
double &delg111,
double &delg112,
double &delg113,
double &delG12,
double &delg122,
double &delg123,
double &delG13,
double &delg133,
double &delG21,
double &delg211,
double &delg212,
double &delg213,
double &delG22,
double &delg222,
double &delg223,
double &delG23,
double &delg233,
double &delG31,
double &delg311,
double &delg312,
double &delg313,
double &delG32,
double &delg322,
double &delg323,
double &delG33,
double &delg333,
double &dKhat1,
double &dKhat2,
double &dKhat3,
double &dTheta1,
double &dTheta2,
double &dTheta3,
double &G1,
double &g11,
double &g12,
double &g13,
double &G2,
double &g22,
double &g23,
double &G3,
double &g33,
double &kappa1,
double &kappa2,
double &Khat,
double &rA11,
double &rA12,
double &rA13,
double &rA22,
double &rA23,
double &rA33,
double &rchi,
double &rG1,
double &rg11,
double &rg12,
double &rg13,
double &rG2,
double &rg22,
double &rg23,
double &rG3,
double &rg33,
double &rKhat,
double &rTheta,
double &Theta);
static inline void z4c_contract_gamma(
const double gxx, const double gxy, const double gxz,
const double gyy, const double gyz, const double gzz,
const double gxxx, const double gxyx, const double gxzx,
const double gyyx, const double gyzx, const double gzzx,
const double gxxy, const double gxyy, const double gxzy,
const double gyyy, const double gyzy, const double gzzy,
const double gxxz, const double gxyz, const double gxzz,
const double gyyz, const double gyzz, const double gzzz,
double &Gamxa, double &Gamya, double &Gamza)
{
double det = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz -
gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz;
const double gupxx = (gyy * gzz - gyz * gyz) / det;
const double gupxy = -(gxy * gzz - gyz * gxz) / det;
const double gupxz = (gxy * gyz - gyy * gxz) / det;
const double gupyy = (gxx * gzz - gxz * gxz) / det;
const double gupyz = -(gxx * gyz - gxy * gxz) / det;
const double gupzz = (gxx * gyy - gxy * gxy) / det;
const double Gamxxx = 0.5 * (gupxx * gxxx + gupxy * (2.0 * gxyx - gxxy) + gupxz * (2.0 * gxzx - gxxz));
const double Gamyxx = 0.5 * (gupxy * gxxx + gupyy * (2.0 * gxyx - gxxy) + gupyz * (2.0 * gxzx - gxxz));
const double Gamzxx = 0.5 * (gupxz * gxxx + gupyz * (2.0 * gxyx - gxxy) + gupzz * (2.0 * gxzx - gxxz));
const double Gamxyy = 0.5 * (gupxx * (2.0 * gxyy - gyyx) + gupxy * gyyy + gupxz * (2.0 * gyzy - gyyz));
const double Gamyyy = 0.5 * (gupxy * (2.0 * gxyy - gyyx) + gupyy * gyyy + gupyz * (2.0 * gyzy - gyyz));
const double Gamzyy = 0.5 * (gupxz * (2.0 * gxyy - gyyx) + gupyz * gyyy + gupzz * (2.0 * gyzy - gyyz));
const double Gamxzz = 0.5 * (gupxx * (2.0 * gxzz - gzzx) + gupxy * (2.0 * gyzz - gzzy) + gupxz * gzzz);
const double Gamyzz = 0.5 * (gupxy * (2.0 * gxzz - gzzx) + gupyy * (2.0 * gyzz - gzzy) + gupyz * gzzz);
const double Gamzzz = 0.5 * (gupxz * (2.0 * gxzz - gzzx) + gupyz * (2.0 * gyzz - gzzy) + gupzz * gzzz);
const double Gamxxy = 0.5 * (gupxx * gxxy + gupxy * gyyx + gupxz * (gxzy + gyzx - gxyz));
const double Gamyxy = 0.5 * (gupxy * gxxy + gupyy * gyyx + gupyz * (gxzy + gyzx - gxyz));
const double Gamzxy = 0.5 * (gupxz * gxxy + gupyz * gyyx + gupzz * (gxzy + gyzx - gxyz));
const double Gamxxz = 0.5 * (gupxx * gxxz + gupxy * (gxyz + gyzx - gxzy) + gupxz * gzzx);
const double Gamyxz = 0.5 * (gupxy * gxxz + gupyy * (gxyz + gyzx - gxzy) + gupyz * gzzx);
const double Gamzxz = 0.5 * (gupxz * gxxz + gupyz * (gxyz + gyzx - gxzy) + gupzz * gzzx);
const double Gamxyz = 0.5 * (gupxx * (gxyz + gxzy - gyzx) + gupxy * gyyz + gupxz * gzzy);
const double Gamyyz = 0.5 * (gupxy * (gxyz + gxzy - gyzx) + gupyy * gyyz + gupyz * gzzy);
const double Gamzyz = 0.5 * (gupxz * (gxyz + gxzy - gyzx) + gupyz * gyyz + gupzz * gzzy);
Gamxa = gupxx * Gamxxx + gupyy * Gamxyy + gupzz * Gamxzz +
2.0 * (gupxy * Gamxxy + gupxz * Gamxxz + gupyz * Gamxyz);
Gamya = gupxx * Gamyxx + gupyy * Gamyyy + gupzz * Gamyzz +
2.0 * (gupxy * Gamyxy + gupxz * Gamyxz + gupyz * Gamyyz);
Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz +
2.0 * (gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz);
}
static int compute_rhs_z4c_cartesian(
int *ex, double &T, double *X, double *Y, double *Z,
double *chi_state, double *chi_constraints, double *trK,
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
double *Gamx, double *Gamy, double *Gamz,
double *Lap, double *betax, double *betay, double *betaz,
double *dtSfx, double *dtSfy, double *dtSfz,
double *TZ,
double *chi_rhs, double *trK_rhs,
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
double *TZ_rhs,
double *rho, double *Sx, double *Sy, double *Sz,
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
double *Hcon, double *Mxcon, double *Mycon, double *Mzcon, double *Gmxcon, double *Gmycon, double *Gmzcon,
int &Symmetry, int &Lev, double &eps, int &co)
{
(void)T;
const int nx = ex[0];
const int ny = ex[1];
const int nz = ex[2];
const int all = nx * ny * nz;
double alpn1[all], chin1[all], gxx[all], gyy[all], gzz[all];
double chix[all], chiy[all], chiz[all], chixx[all], chixy[all], chixz[all], chiyy[all], chiyz[all], chizz[all];
double gxxx[all], gxyx[all], gxzx[all], gyyx[all], gyzx[all], gzzx[all];
double gxxy[all], gxyy[all], gxzy[all], gyyy[all], gyzy[all], gzzy[all];
double gxxz[all], gxyz[all], gxzz[all], gyyz[all], gyzz[all], gzzz[all];
double gxxxx[all], gxxxy[all], gxxxz[all], gxxyy[all], gxxyz[all], gxxzz[all];
double gxyxx[all], gxyxy[all], gxyxz[all], gxyyy[all], gxyyz[all], gxyzz[all];
double gxzxx[all], gxzxy[all], gxzxz[all], gxzyy[all], gxzyz[all], gxzzz[all];
double gyyxx[all], gyyxy[all], gyyxz[all], gyyyy[all], gyyyz[all], gyyzz[all];
double gyzxx[all], gyzxy[all], gyzxz[all], gyzyy[all], gyzyz[all], gyzzz[all];
double gzzxx[all], gzzxy[all], gzzxz[all], gzzyy[all], gzzyz[all], gzzzz[all];
double Lapx[all], Lapy[all], Lapz[all], Lapxx[all], Lapxy[all], Lapxz[all], Lapyy[all], Lapyz[all], Lapzz[all];
double betaxx[all], betaxy[all], betaxz[all], betayx[all], betayy[all], betayz[all], betazx[all], betazy[all], betazz[all];
double dBxx[all], dBxy[all], dBxz[all], dByx[all], dByy[all], dByz[all], dBzx[all], dBzy[all], dBzz[all];
double sfxxx[all], sfxxy[all], sfxxz[all], sfxyy[all], sfxyz[all], sfxzz[all];
double sfyxx[all], sfyxy[all], sfyxz[all], sfyyy[all], sfyyz[all], sfyzz[all];
double sfzxx[all], sfzxy[all], sfzxz[all], sfzyy[all], sfzyz[all], sfzzz[all];
double Gamxx[all], Gamxy[all], Gamxz[all], Gamyx[all], Gamyy[all], Gamyz[all], Gamzx[all], Gamzy[all], Gamzz[all];
double Kx[all], Ky[all], Kz[all], TZx[all], TZy[all], TZz[all];
double Axxx[all], Axxy[all], Axxz[all], Axyx[all], Axyy[all], Axyz[all];
double Axzx[all], Axzy[all], Axzz[all], Ayyx[all], Ayyy[all], Ayyz[all];
double Ayzx[all], Ayzy[all], Ayzz[all], Azzx[all], Azzy[all], Azzz[all];
#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5)
double reta[all];
#endif
const double SSS[3] = {1.0, 1.0, 1.0};
const double AAS[3] = {-1.0, -1.0, 1.0};
const double ASA[3] = {-1.0, 1.0, -1.0};
const double SAA[3] = {1.0, -1.0, -1.0};
const double ASS[3] = {-1.0, 1.0, 1.0};
const double SAS[3] = {1.0, -1.0, 1.0};
const double SSA[3] = {1.0, 1.0, -1.0};
const double ONE = 1.0;
const double TWO = 2.0;
const double ZEO = 0.0;
double chiDivfloor = 1.0e-5;
double kappa1 = 2.0e-2;
double kappa2 = 0.0;
double FF = 0.75;
double eta = 2.0;
for (int idx = 0; idx < all; ++idx)
{
alpn1[idx] = Lap[idx] + ONE;
chin1[idx] = chi_state[idx] + ONE;
gxx[idx] = dxx[idx] + ONE;
gyy[idx] = dyy[idx] + ONE;
gzz[idx] = dzz[idx] + ONE;
}
fderivs(ex, betax, betaxx, betaxy, betaxz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, betay, betayx, betayy, betayz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
fderivs(ex, betaz, betazx, betazy, betazz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
fderivs(ex, dtSfx, dBxx, dBxy, dBxz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, dtSfy, dByx, dByy, dByz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
fderivs(ex, dtSfz, dBzx, dBzy, dBzz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
fderivs(ex, chi_state, chix, chiy, chiz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, dxx, gxxx, gxxy, gxxz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, gxy, gxyx, gxyy, gxyz, X, Y, Z, -1.0, -1.0, 1.0, Symmetry, Lev);
fderivs(ex, gxz, gxzx, gxzy, gxzz, X, Y, Z, -1.0, 1.0, -1.0, Symmetry, Lev);
fderivs(ex, dyy, gyyx, gyyy, gyyz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, gyz, gyzx, gyzy, gyzz, X, Y, Z, 1.0, -1.0, -1.0, Symmetry, Lev);
fderivs(ex, dzz, gzzx, gzzy, gzzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fdderivs(ex, dxx, gxxxx, gxxxy, gxxxz, gxxyy, gxxyz, gxxzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fdderivs(ex, dyy, gyyxx, gyyxy, gyyxz, gyyyy, gyyyz, gyyzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fdderivs(ex, dzz, gzzxx, gzzxy, gzzxz, gzzyy, gzzyz, gzzzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fdderivs(ex, gxy, gxyxx, gxyxy, gxyxz, gxyyy, gxyyz, gxyzz, X, Y, Z, -1.0, -1.0, 1.0, Symmetry, Lev);
fdderivs(ex, gxz, gxzxx, gxzxy, gxzxz, gxzyy, gxzyz, gxzzz, X, Y, Z, -1.0, 1.0, -1.0, Symmetry, Lev);
fdderivs(ex, gyz, gyzxx, gyzxy, gyzxz, gyzyy, gyzyz, gyzzz, X, Y, Z, 1.0, -1.0, -1.0, Symmetry, Lev);
fderivs(ex, Gamx, Gamxx, Gamxy, Gamxz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, Gamy, Gamyx, Gamyy, Gamyz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
fderivs(ex, Gamz, Gamzx, Gamzy, Gamzz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
fderivs(ex, Lap, Lapx, Lapy, Lapz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, trK, Kx, Ky, Kz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, TZ, TZx, TZy, TZz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fdderivs(ex, betax, sfxxx, sfxxy, sfxxz, sfxyy, sfxyz, sfxzz, X, Y, Z, -1.0, 1.0, 1.0, Symmetry, Lev);
fdderivs(ex, betay, sfyxx, sfyxy, sfyxz, sfyyy, sfyyz, sfyzz, X, Y, Z, 1.0, -1.0, 1.0, Symmetry, Lev);
fdderivs(ex, betaz, sfzxx, sfzxy, sfzxz, sfzyy, sfzyz, sfzzz, X, Y, Z, 1.0, 1.0, -1.0, Symmetry, Lev);
fdderivs(ex, chi_state, chixx, chixy, chixz, chiyy, chiyz, chizz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fdderivs(ex, Lap, Lapxx, Lapxy, Lapxz, Lapyy, Lapyz, Lapzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, Axx, Axxx, Axxy, Axxz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, Axy, Axyx, Axyy, Axyz, X, Y, Z, -1.0, -1.0, 1.0, Symmetry, Lev);
fderivs(ex, Axz, Axzx, Axzy, Axzz, X, Y, Z, -1.0, 1.0, -1.0, Symmetry, Lev);
fderivs(ex, Ayy, Ayyx, Ayyy, Ayyz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
fderivs(ex, Ayz, Ayzx, Ayzy, Ayzz, X, Y, Z, 1.0, -1.0, -1.0, Symmetry, Lev);
fderivs(ex, Azz, Azzx, Azzy, Azzz, X, Y, Z, 1.0, 1.0, 1.0, Symmetry, Lev);
for (int idx = 0; idx < all; ++idx)
{
double point_kappa1 = 0.0;
f_z4c_rhs_point(
Axx[idx], Axy[idx], Axz[idx], Ayy[idx], Ayz[idx], Azz[idx],
alpn1[idx], dtSfx[idx], dtSfy[idx], dtSfz[idx],
betax[idx], betay[idx], betaz[idx],
chin1[idx], chiDivfloor,
Lapx[idx],
Axxx[idx], Axyx[idx], Axzx[idx], Ayyx[idx], Ayzx[idx], Azzx[idx],
Lapy[idx],
Axxy[idx], Axyy[idx], Axzy[idx], Ayyy[idx], Ayzy[idx], Azzy[idx],
Lapz[idx],
Axxz[idx], Axyz[idx], Axzz[idx], Ayyz[idx], Ayzz[idx], Azzz[idx],
betaxx[idx], dBxx[idx], betayx[idx], dByx[idx], betazx[idx], dBzx[idx],
betaxy[idx], dBxy[idx], betayy[idx], dByy[idx], betazy[idx], dBzy[idx],
betaxz[idx], dBxz[idx], betayz[idx], dByz[idx], betazz[idx], dBzz[idx],
chix[idx], chiy[idx], chiz[idx],
Lapxx[idx], Lapxy[idx], Lapxz[idx], Lapyy[idx], Lapyz[idx], Lapzz[idx],
sfxxx[idx], sfyxx[idx], sfzxx[idx],
sfxxy[idx], sfyxy[idx], sfzxy[idx],
sfxxz[idx], sfyxz[idx], sfzxz[idx],
sfxyy[idx], sfyyy[idx], sfzyy[idx],
sfxyz[idx], sfyyz[idx], sfzyz[idx],
sfxzz[idx], sfyzz[idx], sfzzz[idx],
chixx[idx], chixy[idx], chixz[idx], chiyy[idx], chiyz[idx], chizz[idx],
gxxxx[idx], gxyxx[idx], gxzxx[idx], gyyxx[idx], gyzxx[idx], gzzxx[idx],
gxxxy[idx], gxyxy[idx], gxzxy[idx], gyyxy[idx], gyzxy[idx], gzzxy[idx],
gxxxz[idx], gxyxz[idx], gxzxz[idx], gyyxz[idx], gyzxz[idx], gzzxz[idx],
gxxyy[idx], gxyyy[idx], gxzyy[idx], gyyyy[idx], gyzyy[idx], gzzyy[idx],
gxxyz[idx], gxyyz[idx], gxzyz[idx], gyyyz[idx], gyzyz[idx], gzzyz[idx],
gxxzz[idx], gxyzz[idx], gxzzz[idx], gyyzz[idx], gyzzz[idx], gzzzz[idx],
Gamxx[idx], gxxx[idx], gxyx[idx], gxzx[idx],
Gamyx[idx], gyyx[idx], gyzx[idx],
Gamzx[idx], gzzx[idx],
Gamxy[idx], gxxy[idx], gxyy[idx], gxzy[idx],
Gamyy[idx], gyyy[idx], gyzy[idx],
Gamzy[idx], gzzy[idx],
Gamxz[idx], gxxz[idx], gxyz[idx], gxzz[idx],
Gamyz[idx], gyyz[idx], gyzz[idx],
Gamzz[idx], gzzz[idx],
Kx[idx], Ky[idx], Kz[idx],
TZx[idx], TZy[idx], TZz[idx],
Gamx[idx], gxx[idx], gxy[idx], gxz[idx],
Gamy[idx], gyy[idx], gyz[idx],
Gamz[idx], gzz[idx],
point_kappa1, kappa2,
trK[idx],
Axx_rhs[idx], Axy_rhs[idx], Axz_rhs[idx], Ayy_rhs[idx], Ayz_rhs[idx], Azz_rhs[idx],
chi_rhs[idx],
Gamx_rhs[idx], gxx_rhs[idx], gxy_rhs[idx], gxz_rhs[idx],
Gamy_rhs[idx], gyy_rhs[idx], gyz_rhs[idx],
Gamz_rhs[idx], gzz_rhs[idx], trK_rhs[idx], TZ_rhs[idx], TZ[idx]);
}
for (int idx = 0; idx < all; ++idx)
Lap_rhs[idx] = -TWO * alpn1[idx] * trK[idx];
#if (GAUGE == 0)
for (int idx = 0; idx < all; ++idx)
{
betax_rhs[idx] = FF * dtSfx[idx];
betay_rhs[idx] = FF * dtSfy[idx];
betaz_rhs[idx] = FF * dtSfz[idx];
dtSfx_rhs[idx] = Gamx_rhs[idx] - eta * dtSfx[idx];
dtSfy_rhs[idx] = Gamy_rhs[idx] - eta * dtSfy[idx];
dtSfz_rhs[idx] = Gamz_rhs[idx] - eta * dtSfz[idx];
}
#elif (GAUGE == 1)
for (int idx = 0; idx < all; ++idx)
{
betax_rhs[idx] = Gamx[idx] - eta * betax[idx];
betay_rhs[idx] = Gamy[idx] - eta * betay[idx];
betaz_rhs[idx] = Gamz[idx] - eta * betaz[idx];
dtSfx_rhs[idx] = ZEO;
dtSfy_rhs[idx] = ZEO;
dtSfz_rhs[idx] = ZEO;
}
#elif (GAUGE == 2)
/* Variable-eta gamma-driver, chi-sqrt denominator */
for (int idx = 0; idx < all; ++idx)
{
const double chin1i = chin1[idx];
const double det = gxx[idx] * gyy[idx] * gzz[idx]
+ gxy[idx] * gyz[idx] * gxz[idx] * 2.0
- gxz[idx] * gyy[idx] * gxz[idx]
- gxy[idx] * gxy[idx] * gzz[idx]
- gxx[idx] * gyz[idx] * gyz[idx];
const double idet = ONE / det;
const double upxx = (gyy[idx] * gzz[idx] - gyz[idx] * gyz[idx]) * idet;
const double upxy = -(gxy[idx] * gzz[idx] - gyz[idx] * gxz[idx]) * idet;
const double upxz = (gxy[idx] * gyz[idx] - gyy[idx] * gxz[idx]) * idet;
const double upyy = (gxx[idx] * gzz[idx] - gxz[idx] * gxz[idx]) * idet;
const double upyz = -(gxx[idx] * gyz[idx] - gxy[idx] * gxz[idx]) * idet;
const double upzz = (gxx[idx] * gyy[idx] - gxy[idx] * gxy[idx]) * idet;
const double grdchi2 =
upxx * chix[idx] * chix[idx] + upyy * chiy[idx] * chiy[idx] + upzz * chiz[idx] * chiz[idx]
+ TWO * (upxy * chix[idx] * chiy[idx] + upxz * chix[idx] * chiz[idx] + upyz * chiy[idx] * chiz[idx]);
const double sqchi = sqrt(chin1i);
reta[idx] = 1.31 / TWO * sqrt(grdchi2 / chin1i) / ((ONE - sqchi) * (ONE - sqchi));
betax_rhs[idx] = FF * dtSfx[idx];
betay_rhs[idx] = FF * dtSfy[idx];
betaz_rhs[idx] = FF * dtSfz[idx];
dtSfx_rhs[idx] = Gamx_rhs[idx] - reta[idx] * dtSfx[idx];
dtSfy_rhs[idx] = Gamy_rhs[idx] - reta[idx] * dtSfy[idx];
dtSfz_rhs[idx] = Gamz_rhs[idx] - reta[idx] * dtSfz[idx];
}
#elif (GAUGE == 3)
/* Variable-eta gamma-driver, chi-linear denominator */
for (int idx = 0; idx < all; ++idx)
{
const double chin1i = chin1[idx];
const double det = gxx[idx] * gyy[idx] * gzz[idx]
+ gxy[idx] * gyz[idx] * gxz[idx] * 2.0
- gxz[idx] * gyy[idx] * gxz[idx]
- gxy[idx] * gxy[idx] * gzz[idx]
- gxx[idx] * gyz[idx] * gyz[idx];
const double idet = ONE / det;
const double upxx = (gyy[idx] * gzz[idx] - gyz[idx] * gyz[idx]) * idet;
const double upxy = -(gxy[idx] * gzz[idx] - gyz[idx] * gxz[idx]) * idet;
const double upxz = (gxy[idx] * gyz[idx] - gyy[idx] * gxz[idx]) * idet;
const double upyy = (gxx[idx] * gzz[idx] - gxz[idx] * gxz[idx]) * idet;
const double upyz = -(gxx[idx] * gyz[idx] - gxy[idx] * gxz[idx]) * idet;
const double upzz = (gxx[idx] * gyy[idx] - gxy[idx] * gxy[idx]) * idet;
const double grdchi2 =
upxx * chix[idx] * chix[idx] + upyy * chiy[idx] * chiy[idx] + upzz * chiz[idx] * chiz[idx]
+ TWO * (upxy * chix[idx] * chiy[idx] + upxz * chix[idx] * chiz[idx] + upyz * chiy[idx] * chiz[idx]);
reta[idx] = 1.31 / TWO * sqrt(grdchi2 / chin1i) / ((ONE - chin1i) * (ONE - chin1i));
betax_rhs[idx] = FF * dtSfx[idx];
betay_rhs[idx] = FF * dtSfy[idx];
betaz_rhs[idx] = FF * dtSfz[idx];
dtSfx_rhs[idx] = Gamx_rhs[idx] - reta[idx] * dtSfx[idx];
dtSfy_rhs[idx] = Gamy_rhs[idx] - reta[idx] * dtSfy[idx];
dtSfz_rhs[idx] = Gamz_rhs[idx] - reta[idx] * dtSfz[idx];
}
#elif (GAUGE == 4)
/* Variable-eta gamma-driver, first-order, chi-sqrt denominator */
for (int idx = 0; idx < all; ++idx)
{
const double chin1i = chin1[idx];
const double det = gxx[idx] * gyy[idx] * gzz[idx]
+ gxy[idx] * gyz[idx] * gxz[idx] * 2.0
- gxz[idx] * gyy[idx] * gxz[idx]
- gxy[idx] * gxy[idx] * gzz[idx]
- gxx[idx] * gyz[idx] * gyz[idx];
const double idet = ONE / det;
const double upxx = (gyy[idx] * gzz[idx] - gyz[idx] * gyz[idx]) * idet;
const double upxy = -(gxy[idx] * gzz[idx] - gyz[idx] * gxz[idx]) * idet;
const double upxz = (gxy[idx] * gyz[idx] - gyy[idx] * gxz[idx]) * idet;
const double upyy = (gxx[idx] * gzz[idx] - gxz[idx] * gxz[idx]) * idet;
const double upyz = -(gxx[idx] * gyz[idx] - gxy[idx] * gxz[idx]) * idet;
const double upzz = (gxx[idx] * gyy[idx] - gxy[idx] * gxy[idx]) * idet;
const double grdchi2 =
upxx * chix[idx] * chix[idx] + upyy * chiy[idx] * chiy[idx] + upzz * chiz[idx] * chiz[idx]
+ TWO * (upxy * chix[idx] * chiy[idx] + upxz * chix[idx] * chiz[idx] + upyz * chiy[idx] * chiz[idx]);
const double sqchi = sqrt(chin1i);
reta[idx] = 1.31 / TWO * sqrt(grdchi2 / chin1i) / ((ONE - sqchi) * (ONE - sqchi));
betax_rhs[idx] = Gamx_rhs[idx] - reta[idx] * betax[idx];
betay_rhs[idx] = Gamy_rhs[idx] - reta[idx] * betay[idx];
betaz_rhs[idx] = Gamz_rhs[idx] - reta[idx] * betaz[idx];
dtSfx_rhs[idx] = ZEO;
dtSfy_rhs[idx] = ZEO;
dtSfz_rhs[idx] = ZEO;
}
#elif (GAUGE == 5)
/* Variable-eta gamma-driver, first-order, chi-linear denominator */
for (int idx = 0; idx < all; ++idx)
{
const double chin1i = chin1[idx];
const double det = gxx[idx] * gyy[idx] * gzz[idx]
+ gxy[idx] * gyz[idx] * gxz[idx] * 2.0
- gxz[idx] * gyy[idx] * gxz[idx]
- gxy[idx] * gxy[idx] * gzz[idx]
- gxx[idx] * gyz[idx] * gyz[idx];
const double idet = ONE / det;
const double upxx = (gyy[idx] * gzz[idx] - gyz[idx] * gyz[idx]) * idet;
const double upxy = -(gxy[idx] * gzz[idx] - gyz[idx] * gxz[idx]) * idet;
const double upxz = (gxy[idx] * gyz[idx] - gyy[idx] * gxz[idx]) * idet;
const double upyy = (gxx[idx] * gzz[idx] - gxz[idx] * gxz[idx]) * idet;
const double upyz = -(gxx[idx] * gyz[idx] - gxy[idx] * gxz[idx]) * idet;
const double upzz = (gxx[idx] * gyy[idx] - gxy[idx] * gxy[idx]) * idet;
const double grdchi2 =
upxx * chix[idx] * chix[idx] + upyy * chiy[idx] * chiy[idx] + upzz * chiz[idx] * chiz[idx]
+ TWO * (upxy * chix[idx] * chiy[idx] + upxz * chix[idx] * chiz[idx] + upyz * chiy[idx] * chiz[idx]);
reta[idx] = 1.31 / TWO * sqrt(grdchi2 / chin1i) / ((ONE - chin1i) * (ONE - chin1i));
betax_rhs[idx] = Gamx_rhs[idx] - reta[idx] * betax[idx];
betay_rhs[idx] = Gamy_rhs[idx] - reta[idx] * betay[idx];
betaz_rhs[idx] = Gamz_rhs[idx] - reta[idx] * betaz[idx];
dtSfx_rhs[idx] = ZEO;
dtSfy_rhs[idx] = ZEO;
dtSfz_rhs[idx] = ZEO;
}
#elif (GAUGE == 6 || GAUGE == 7)
{
/* Jason's position-dependent damping: rational (6) or exponential (7) */
int BHN = 0;
double Porg[9] = {0.0};
double Mass[3] = {0.0};
#ifdef fortran1
extern "C" { void getpbh(int &, double *, double *); }
#elif defined(fortran2)
extern "C" { void GETPBH(int &, double *, double *); }
#else
extern "C" { void getpbh_(int &, double *, double *); }
#endif
{
#ifdef fortran1
getpbh(BHN, Porg, Mass);
#elif defined(fortran2)
GETPBH(BHN, Porg, Mass);
#else
getpbh_(BHN, Porg, Mass);
#endif
}
if (BHN == 2)
{
const double M = Mass[0] + Mass[1];
const double A = 2.0 / M;
const double w1 = 12.0, w2 = 12.0;
const double C1 = 1.0 / Mass[0] - A;
const double C2 = 1.0 / Mass[1] - A;
const double BH_sep2 = (Porg[3] - Porg[0]) * (Porg[3] - Porg[0])
+ (Porg[4] - Porg[1]) * (Porg[4] - Porg[1])
+ (Porg[5] - Porg[2]) * (Porg[5] - Porg[2]);
const double inv_BH_sep2 = 1.0 / BH_sep2;
for (int k0 = 0; k0 < nz; ++k0) {
for (int j0 = 0; j0 < ny; ++j0) {
for (int i0 = 0; i0 < nx; ++i0) {
const size_t idx = idx_ex(i0, j0, k0, ex);
const double xp = X[i0], yp = Y[j0], zp = Z[k0];
const double r1 = ((Porg[0]-xp)*(Porg[0]-xp) + (Porg[1]-yp)*(Porg[1]-yp) + (Porg[2]-zp)*(Porg[2]-zp)) * inv_BH_sep2;
const double r2 = ((Porg[3]-xp)*(Porg[3]-xp) + (Porg[4]-yp)*(Porg[4]-yp) + (Porg[5]-zp)*(Porg[5]-zp)) * inv_BH_sep2;
#if (GAUGE == 6)
const double reta_val = A + C1 / (1.0 + w1 * r1) + C2 / (1.0 + w2 * r2);
#else
const double reta_val = A + C1 * exp(-w1 * r1) + C2 * exp(-w2 * r2);
#endif
betax_rhs[idx] = FF * dtSfx[idx];
betay_rhs[idx] = FF * dtSfy[idx];
betaz_rhs[idx] = FF * dtSfz[idx];
dtSfx_rhs[idx] = Gamx_rhs[idx] - reta_val * dtSfx[idx];
dtSfy_rhs[idx] = Gamy_rhs[idx] - reta_val * dtSfy[idx];
dtSfz_rhs[idx] = Gamz_rhs[idx] - reta_val * dtSfz[idx];
}}}
}
else
{
fprintf(stderr, "z4c_rhs_c: GAUGE %d requires BHN=2, got BHN=%d\n", (int)GAUGE, BHN);
return 1;
}
}
#else
#error "z4c_rhs_c.C: unsupported GAUGE value"
#endif
lopsided(ex, X, Y, Z, gxx, gxx_rhs, betax, betay, betaz, Symmetry, SSS);
lopsided(ex, X, Y, Z, gxy, gxy_rhs, betax, betay, betaz, Symmetry, AAS);
lopsided(ex, X, Y, Z, gxz, gxz_rhs, betax, betay, betaz, Symmetry, ASA);
lopsided(ex, X, Y, Z, gyy, gyy_rhs, betax, betay, betaz, Symmetry, SSS);
lopsided(ex, X, Y, Z, gyz, gyz_rhs, betax, betay, betaz, Symmetry, SAA);
lopsided(ex, X, Y, Z, gzz, gzz_rhs, betax, betay, betaz, Symmetry, SSS);
lopsided(ex, X, Y, Z, Axx, Axx_rhs, betax, betay, betaz, Symmetry, SSS);
lopsided(ex, X, Y, Z, Axy, Axy_rhs, betax, betay, betaz, Symmetry, AAS);
lopsided(ex, X, Y, Z, Axz, Axz_rhs, betax, betay, betaz, Symmetry, ASA);
lopsided(ex, X, Y, Z, Ayy, Ayy_rhs, betax, betay, betaz, Symmetry, SSS);
lopsided(ex, X, Y, Z, Ayz, Ayz_rhs, betax, betay, betaz, Symmetry, SAA);
lopsided(ex, X, Y, Z, Azz, Azz_rhs, betax, betay, betaz, Symmetry, SSS);
lopsided(ex, X, Y, Z, chi_state, chi_rhs, betax, betay, betaz, Symmetry, SSS);
lopsided(ex, X, Y, Z, trK, trK_rhs, betax, betay, betaz, Symmetry, SSS);
lopsided(ex, X, Y, Z, Gamx, Gamx_rhs, betax, betay, betaz, Symmetry, ASS);
lopsided(ex, X, Y, Z, Gamy, Gamy_rhs, betax, betay, betaz, Symmetry, SAS);
lopsided(ex, X, Y, Z, Gamz, Gamz_rhs, betax, betay, betaz, Symmetry, SSA);
lopsided(ex, X, Y, Z, Lap, Lap_rhs, betax, betay, betaz, Symmetry, SSS);
lopsided(ex, X, Y, Z, betax, betax_rhs, betax, betay, betaz, Symmetry, ASS);
lopsided(ex, X, Y, Z, betay, betay_rhs, betax, betay, betaz, Symmetry, SAS);
lopsided(ex, X, Y, Z, betaz, betaz_rhs, betax, betay, betaz, Symmetry, SSA);
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
lopsided(ex, X, Y, Z, dtSfx, dtSfx_rhs, betax, betay, betaz, Symmetry, ASS);
lopsided(ex, X, Y, Z, dtSfy, dtSfy_rhs, betax, betay, betaz, Symmetry, SAS);
lopsided(ex, X, Y, Z, dtSfz, dtSfz_rhs, betax, betay, betaz, Symmetry, SSA);
#endif
lopsided(ex, X, Y, Z, TZ, TZ_rhs, betax, betay, betaz, Symmetry, SSS);
for (int idx = 0; idx < all; ++idx)
{
double Gamxa = 0.0, Gamya = 0.0, Gamza = 0.0;
z4c_contract_gamma(
gxx[idx], gxy[idx], gxz[idx], gyy[idx], gyz[idx], gzz[idx],
gxxx[idx], gxyx[idx], gxzx[idx], gyyx[idx], gyzx[idx], gzzx[idx],
gxxy[idx], gxyy[idx], gxzy[idx], gyyy[idx], gyzy[idx], gzzy[idx],
gxxz[idx], gxyz[idx], gxzz[idx], gyyz[idx], gyzz[idx], gzzz[idx],
Gamxa, Gamya, Gamza);
TZ_rhs[idx] -= alpn1[idx] * (TWO + kappa2) * kappa1 * TZ[idx];
trK_rhs[idx] += alpn1[idx] * kappa1 * (ONE - kappa2) * TZ[idx];
Gamx_rhs[idx] -= TWO * alpn1[idx] * kappa1 * (Gamx[idx] - Gamxa);
Gamy_rhs[idx] -= TWO * alpn1[idx] * kappa1 * (Gamy[idx] - Gamya);
Gamz_rhs[idx] -= TWO * alpn1[idx] * kappa1 * (Gamz[idx] - Gamza);
}
if (eps > 0.0)
{
kodis(ex, X, Y, Z, chi_state, chi_rhs, SSS, Symmetry, eps);
kodis(ex, X, Y, Z, trK, trK_rhs, SSS, Symmetry, eps);
kodis(ex, X, Y, Z, gxx, gxx_rhs, SSS, Symmetry, eps);
kodis(ex, X, Y, Z, gxy, gxy_rhs, AAS, Symmetry, eps);
kodis(ex, X, Y, Z, gxz, gxz_rhs, ASA, Symmetry, eps);
kodis(ex, X, Y, Z, gyy, gyy_rhs, SSS, Symmetry, eps);
kodis(ex, X, Y, Z, gyz, gyz_rhs, SAA, Symmetry, eps);
kodis(ex, X, Y, Z, gzz, gzz_rhs, SSS, Symmetry, eps);
kodis(ex, X, Y, Z, Axx, Axx_rhs, SSS, Symmetry, eps);
kodis(ex, X, Y, Z, Axy, Axy_rhs, AAS, Symmetry, eps);
kodis(ex, X, Y, Z, Axz, Axz_rhs, ASA, Symmetry, eps);
kodis(ex, X, Y, Z, Ayy, Ayy_rhs, SSS, Symmetry, eps);
kodis(ex, X, Y, Z, Ayz, Ayz_rhs, SAA, Symmetry, eps);
kodis(ex, X, Y, Z, Azz, Azz_rhs, SSS, Symmetry, eps);
kodis(ex, X, Y, Z, Gamx, Gamx_rhs, ASS, Symmetry, eps);
kodis(ex, X, Y, Z, Gamy, Gamy_rhs, SAS, Symmetry, eps);
kodis(ex, X, Y, Z, Gamz, Gamz_rhs, SSA, Symmetry, eps);
kodis(ex, X, Y, Z, Lap, Lap_rhs, SSS, Symmetry, eps);
kodis(ex, X, Y, Z, betax, betax_rhs, ASS, Symmetry, eps);
kodis(ex, X, Y, Z, betay, betay_rhs, SAS, Symmetry, eps);
kodis(ex, X, Y, Z, betaz, betaz_rhs, SSA, Symmetry, eps);
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
kodis(ex, X, Y, Z, dtSfx, dtSfx_rhs, ASS, Symmetry, eps);
kodis(ex, X, Y, Z, dtSfy, dtSfy_rhs, SAS, Symmetry, eps);
kodis(ex, X, Y, Z, dtSfz, dtSfz_rhs, SSA, Symmetry, eps);
#endif
kodis(ex, X, Y, Z, TZ, TZ_rhs, SSS, Symmetry, eps);
}
if (co == 0)
{
#if (ABV == 0)
f_ricci_gamma(ex, X, Y, Z,
chi_constraints,
dxx, gxy, gxz, dyy, gyz, dzz,
Gamx, Gamy, Gamz,
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
Symmetry);
#endif
f_constraint_bssn(ex, X, Y, Z,
chi_constraints, trK,
dxx, gxy, gxz, dyy, gyz, dzz,
Axx, Axy, Axz, Ayy, Ayz, Azz,
Gamx, Gamy, Gamz,
Lap, betax, betay, betaz, rho, Sx, Sy, Sz,
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
Symmetry);
}
return 0;
}
extern "C" int f_compute_rhs_Z4c(int *ex, double &T,
double *X, double *Y, double *Z,
double *chi, double *trK,
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
double *Gamx, double *Gamy, double *Gamz,
double *Lap, double *betax, double *betay, double *betaz,
double *dtSfx, double *dtSfy, double *dtSfz,
double *TZ,
double *chi_rhs, double *trK_rhs,
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
double *TZ_rhs,
double *rho, double *Sx, double *Sy, double *Sz,
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
double *Hcon, double *Mxcon, double *Mycon, double *Mzcon, double *Gmxcon, double *Gmycon, double *Gmzcon,
int &Symmetry, int &Lev, double &eps, int &co)
{
return compute_rhs_z4c_cartesian(
ex, T, X, Y, Z,
chi, chi, trK,
dxx, gxy, gxz, dyy, gyz, dzz,
Axx, Axy, Axz, Ayy, Ayz, Azz,
Gamx, Gamy, Gamz,
Lap, betax, betay, betaz,
dtSfx, dtSfy, dtSfz,
TZ,
chi_rhs, trK_rhs,
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
Gamx_rhs, Gamy_rhs, Gamz_rhs,
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
TZ_rhs,
rho, Sx, Sy, Sz,
Sxx, Sxy, Sxz, Syy, Syz, Szz,
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
Symmetry, Lev, eps, co);
}
extern "C" int f_compute_rhs_Z4cnot(int *ex, double &T,
double *X, double *Y, double *Z,
double *chi, double *trK,
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
double *Gamx, double *Gamy, double *Gamz,
double *Lap, double *betax, double *betay, double *betaz,
double *dtSfx, double *dtSfy, double *dtSfz,
double *TZ,
double *chi_rhs, double *trK_rhs,
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
double *TZ_rhs,
double *rho, double *Sx, double *Sy, double *Sz,
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
double *Hcon, double *Mxcon, double *Mycon, double *Mzcon, double *Gmxcon, double *Gmycon, double *Gmzcon,
int &Symmetry, int &Lev, double &eps, int &co, double &chitiny)
{
const int all = ex[0] * ex[1] * ex[2];
std::vector<double> chi_clamped(chi, chi + all);
f_lowerboundset(ex, chi_clamped.data(), chitiny);
const int ret = compute_rhs_z4c_cartesian(
ex, T, X, Y, Z,
chi_clamped.data(), chi, trK,
dxx, gxy, gxz, dyy, gyz, dzz,
Axx, Axy, Axz, Ayy, Ayz, Azz,
Gamx, Gamy, Gamz,
Lap, betax, betay, betaz,
dtSfx, dtSfy, dtSfz,
TZ,
chi_rhs, trK_rhs,
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
Gamx_rhs, Gamy_rhs, Gamz_rhs,
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
TZ_rhs,
rho, Sx, Sy, Sz,
Sxx, Sxy, Sxz, Syy, Syz, Szz,
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
Symmetry, Lev, eps, co);
if (ret != 0 || co != 0)
return ret;
#if (ABV == 0)
f_ricci_gamma(ex, X, Y, Z,
chi,
dxx, gxy, gxz, dyy, gyz, dzz,
Gamx, Gamy, Gamz,
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
Symmetry);
#endif
f_constraint_bssn(ex, X, Y, Z,
chi, trK,
dxx, gxy, gxz, dyy, gyz, dzz,
Axx, Axy, Axz, Ayy, Ayz, Azz,
Gamx, Gamy, Gamz,
Lap, betax, betay, betaz, rho, Sx, Sy, Sz,
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
Hcon, Mxcon, Mycon, Mzcon, Gmxcon, Gmycon, Gmzcon,
Symmetry);
return ret;
}