From 3e55068548fbac9b235fdcadc31913b42ed19909 Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Thu, 14 May 2026 11:25:08 +0800 Subject: [PATCH] Add C kernel for BSSN-EM (Maxwell/electromagnetic field) RHS computation New bssn_em_rhs_c.C computes EM field RHS (E,B,Kpsi,Kphi) and stress-energy tensor, then calls the C BSSN RHS kernel with source terms. Replaces empart.f90 when USE_CXX_EM_KERNEL=1. Supports all ghost_width orders via existing derivative kernels. Controlled by USE_CXX_EM_KERNEL switch (default 0, experimental). Co-Authored-By: Claude Opus 4.7 --- AMSS_NCKU_source/bssn_em_rhs_c.C | 323 +++++++++++++++++++++++++++++++ AMSS_NCKU_source/bssn_rhs.h | 147 ++++++++------ AMSS_NCKU_source/makefile | 28 ++- AMSS_NCKU_source/makefile.inc | 6 + 4 files changed, 442 insertions(+), 62 deletions(-) create mode 100644 AMSS_NCKU_source/bssn_em_rhs_c.C diff --git a/AMSS_NCKU_source/bssn_em_rhs_c.C b/AMSS_NCKU_source/bssn_em_rhs_c.C new file mode 100644 index 0000000..42d8f56 --- /dev/null +++ b/AMSS_NCKU_source/bssn_em_rhs_c.C @@ -0,0 +1,323 @@ +#include "macrodef.h" +#include "bssn_rhs.h" +#include "share_func.h" +#include "tool.h" +#include + +/* + * C 版 BSSN-EM RHS kernel — replaces empart.f90 + bssn_rhs.f90 for BSSN+Maxwell. + * + * Computes: + * 1. All metric and EM field derivatives + * 2. Physical metric, Christoffel-like terms + * 3. EM field RHS (E, B, Kpsi, Kphi) + * 4. Stress-energy tensor (rho, Si, Sij) + * 5. Calls f_compute_rhs_bssn (C BSSN RHS) with stress-energy + * 6. Advection + KO dissipation for EM fields + * 7. NaN check + */ +int f_compute_rhs_bssn_em_c(int *ex, double &T, + double *X, double *Y, double *Z, + double *chi, double *trK, + double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, + double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, + double *Gamx, double *Gamy, double *Gamz, + double *Lap, double *betax, double *betay, double *betaz, + double *dtSfx, double *dtSfy, double *dtSfz, + double *Ex, double *Ey, double *Ez, + double *Bx, double *By, double *Bz, + double *Kpsi, double *Kphi, + double *Jx, double *Jy, double *Jz, double *qchar, + double *chi_rhs, double *trK_rhs, + double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, + double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, + double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, + double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, + double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, + double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, + double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, + double *Ex_rhs, double *Ey_rhs, double *Ez_rhs, + double *Bx_rhs, double *By_rhs, double *Bz_rhs, + double *Kpsi_rhs, double *Kphi_rhs, + double *rho, double *Sx, double *Sy, double *Sz, + double *Sxx, double *Sxy, double *Sxz, + double *Syy, double *Syz, double *Szz, + double *Gamxxx, double *Gamxxy, double *Gamxxz, + double *Gamxyy, double *Gamxyz, double *Gamxzz, + double *Gamyxx, double *Gamyxy, double *Gamyxz, + double *Gamyyy, double *Gamyyz, double *Gamyzz, + double *Gamzxx, double *Gamzxy, double *Gamzxz, + double *Gamzyy, double *Gamzyz, double *Gamzzz, + double *Rxx, double *Rxy, double *Rxz, + double *Ryy, double *Ryz, double *Rzz, + double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, + double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, + int &Symmetry, int &Lev, double &eps, int &co) +{ + (void)T; + int gont = 0; + const int nx = ex[0], ny = ex[1], nz = ex[2]; + const int all = nx * ny * nz; + const size_t n = (size_t)all; + + const double ZEO = 0.0, ONE = 1.0, TWO = 2.0, FOUR = 4.0, EIT = 8.0; + const double HALF = 0.5, THR = 3.0, F3o2 = 1.5, PI = 3.14159265358979323846; + const double SYM = 1.0, ANTI = -1.0; + const double kappa = 1.0; + const double SSS[3]={SYM,SYM,SYM}, AAS[3]={ANTI,ANTI,SYM}; + const double ASA[3]={ANTI,SYM,ANTI}, SAA[3]={SYM,ANTI,ANTI}; + const double ASS[3]={ANTI,SYM,SYM}, SAS[3]={SYM,ANTI,SYM}; + const double SSA[3]={SYM,SYM,ANTI}; + + /* ---- allocate temporary arrays ---- */ + double *chix = (double*)malloc(n*sizeof(double)); + double *chiy = (double*)malloc(n*sizeof(double)); + double *chiz = (double*)malloc(n*sizeof(double)); + double *Exx=(double*)malloc(n*sizeof(double)),*Exy=(double*)malloc(n*sizeof(double)),*Exz=(double*)malloc(n*sizeof(double)); + double *Eyx=(double*)malloc(n*sizeof(double)),*Eyy=(double*)malloc(n*sizeof(double)),*Eyz=(double*)malloc(n*sizeof(double)); + double *Ezx=(double*)malloc(n*sizeof(double)),*Ezy=(double*)malloc(n*sizeof(double)),*Ezz=(double*)malloc(n*sizeof(double)); + double *Bxx=(double*)malloc(n*sizeof(double)),*Bxy=(double*)malloc(n*sizeof(double)),*Bxz=(double*)malloc(n*sizeof(double)); + double *Byx=(double*)malloc(n*sizeof(double)),*Byy=(double*)malloc(n*sizeof(double)),*Byz=(double*)malloc(n*sizeof(double)); + double *Bzx=(double*)malloc(n*sizeof(double)),*Bzy=(double*)malloc(n*sizeof(double)),*Bzz=(double*)malloc(n*sizeof(double)); + double *Kpsix=(double*)malloc(n*sizeof(double)),*Kpsiy=(double*)malloc(n*sizeof(double)),*Kpsiz=(double*)malloc(n*sizeof(double)); + double *Kphix=(double*)malloc(n*sizeof(double)),*Kphiy=(double*)malloc(n*sizeof(double)),*Kphiz=(double*)malloc(n*sizeof(double)); + double *Lapx=(double*)malloc(n*sizeof(double)),*Lapy=(double*)malloc(n*sizeof(double)),*Lapz=(double*)malloc(n*sizeof(double)); + double *betaxx=(double*)malloc(n*sizeof(double)),*betaxy=(double*)malloc(n*sizeof(double)),*betaxz=(double*)malloc(n*sizeof(double)); + double *betayx=(double*)malloc(n*sizeof(double)),*betayy=(double*)malloc(n*sizeof(double)),*betayz=(double*)malloc(n*sizeof(double)); + double *betazx=(double*)malloc(n*sizeof(double)),*betazy=(double*)malloc(n*sizeof(double)),*betazz=(double*)malloc(n*sizeof(double)); + double *gxxx=(double*)malloc(n*sizeof(double)),*gxxy=(double*)malloc(n*sizeof(double)),*gxxz=(double*)malloc(n*sizeof(double)); + double *gxyx=(double*)malloc(n*sizeof(double)),*gxyy=(double*)malloc(n*sizeof(double)),*gxyz=(double*)malloc(n*sizeof(double)); + double *gxzx=(double*)malloc(n*sizeof(double)),*gxzy=(double*)malloc(n*sizeof(double)),*gxzz=(double*)malloc(n*sizeof(double)); + double *gyyx=(double*)malloc(n*sizeof(double)),*gyyy=(double*)malloc(n*sizeof(double)),*gyyz=(double*)malloc(n*sizeof(double)); + double *gyzx=(double*)malloc(n*sizeof(double)),*gyzy=(double*)malloc(n*sizeof(double)),*gyzz=(double*)malloc(n*sizeof(double)); + double *gzzx=(double*)malloc(n*sizeof(double)),*gzzy=(double*)malloc(n*sizeof(double)),*gzzz=(double*)malloc(n*sizeof(double)); + double *gupxx=(double*)malloc(n*sizeof(double)),*gupxy=(double*)malloc(n*sizeof(double)),*gupxz=(double*)malloc(n*sizeof(double)); + double *gupyy=(double*)malloc(n*sizeof(double)),*gupyz=(double*)malloc(n*sizeof(double)),*gupzz=(double*)malloc(n*sizeof(double)); + + if (!chix||!chiy||!chiz||!Exx||!Exy||!Exz||!Eyx||!Eyy||!Eyz||!Ezx||!Ezy||!Ezz|| + !Bxx||!Bxy||!Bxz||!Byx||!Byy||!Byz||!Bzx||!Bzy||!Bzz|| + !Kpsix||!Kpsiy||!Kpsiz||!Kphix||!Kphiy||!Kphiz|| + !Lapx||!Lapy||!Lapz|| + !betaxx||!betaxy||!betaxz||!betayx||!betayy||!betayz||!betazx||!betazy||!betazz|| + !gxxx||!gxxy||!gxxz||!gxyx||!gxyy||!gxyz||!gxzx||!gxzy||!gxzz|| + !gyyx||!gyyy||!gyyz||!gyzx||!gyzy||!gyzz||!gzzx||!gzzy||!gzzz|| + !gupxx||!gupxy||!gupxz||!gupyy||!gupyz||!gupzz) { + gont = 1; + } + + /* ==== 1. Compute all derivatives ==== */ + if (!gont) { + + /* metric derivatives */ + fderivs(ex, Lap, Lapx, Lapy, Lapz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev); + fderivs(ex, betax, betaxx, betaxy, betaxz, X, Y, Z, ANTI, SYM, SYM, Symmetry, Lev); + fderivs(ex, betay, betayx, betayy, betayz, X, Y, Z, SYM, ANTI, SYM, Symmetry, Lev); + fderivs(ex, betaz, betazx, betazy, betazz, X, Y, Z, SYM, SYM, ANTI, Symmetry, Lev); + fderivs(ex, chi, chix, chiy, chiz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev); + fderivs(ex, dxx, gxxx, gxxy, gxxz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev); + fderivs(ex, gxy, gxyx, gxyy, gxyz, X, Y, Z, ANTI, ANTI, SYM, Symmetry, Lev); + fderivs(ex, gxz, gxzx, gxzy, gxzz, X, Y, Z, ANTI, SYM, ANTI, Symmetry, Lev); + fderivs(ex, dyy, gyyx, gyyy, gyyz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev); + fderivs(ex, gyz, gyzx, gyzy, gyzz, X, Y, Z, SYM, ANTI, ANTI, Symmetry, Lev); + fderivs(ex, dzz, gzzx, gzzy, gzzz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev); + + /* EM field derivatives */ + fderivs(ex, Kpsi, Kpsix, Kpsiy, Kpsiz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev); + fderivs(ex, Kphi, Kphix, Kphiy, Kphiz, X, Y, Z, SYM, SYM, SYM, Symmetry, Lev); + fderivs(ex, Ex, Exx, Exy, Exz, X, Y, Z, ANTI, SYM, SYM, Symmetry, Lev); + fderivs(ex, Ey, Eyx, Eyy, Eyz, X, Y, Z, SYM, ANTI, SYM, Symmetry, Lev); + fderivs(ex, Ez, Ezx, Ezy, Ezz, X, Y, Z, SYM, SYM, ANTI, Symmetry, Lev); + fderivs(ex, Bx, Bxx, Bxy, Bxz, X, Y, Z, SYM, ANTI, ANTI, Symmetry, Lev); + fderivs(ex, By, Byx, Byy, Byz, X, Y, Z, ANTI, SYM, ANTI, Symmetry, Lev); + fderivs(ex, Bz, Bzx, Bzy, Bzz, X, Y, Z, ANTI, ANTI, SYM, Symmetry, Lev); + + /* ==== 2. Compute EM RHS and stress-energy ==== */ + const double F1o4PI = ONE / (FOUR * PI); + for (size_t i = 0; i < n; ++i) { + const double alpn1 = Lap[i] + ONE; + const double chin1 = chi[i] + ONE; + const double chi3o2 = sqrt(chin1) * chin1; // chi^{3/2} + const double ichi = ONE / chin1; + + /* physical metric */ + const double pgxx = (dxx[i] + ONE) * ichi; + const double pgyy = (dyy[i] + ONE) * ichi; + const double pgzz = (dzz[i] + ONE) * ichi; + const double pgxy = gxy[i] * ichi; + const double pgxz = gxz[i] * ichi; + const double pgyz = gyz[i] * ichi; + + /* inverse physical metric */ + const double det = pgxx * pgyy * pgzz + pgxy * pgyz * pgxz + pgxz * pgxy * pgyz + - pgxz * pgyy * pgxz - pgxy * pgxy * pgzz - pgxx * pgyz * pgyz; + const double idet = ONE / det; + const double upxx = (pgyy * pgzz - pgyz * pgyz) * idet; + const double upxy = -(pgxy * pgzz - pgyz * pgxz) * idet; + const double upxz = (pgxy * pgyz - pgyy * pgxz) * idet; + const double upyy = (pgxx * pgzz - pgxz * pgxz) * idet; + const double upyz = -(pgxx * pgyz - pgxy * pgxz) * idet; + const double upzz = (pgxx * pgyy - pgxy * pgxy) * idet; + gupxx[i]=upxx; gupxy[i]=upxy; gupxz[i]=upxz; + gupyy[i]=upyy; gupyz[i]=upyz; gupzz[i]=upzz; + + /* E-field RHS */ + /* curl(B) part: epsilon^{ijk} ∂_j (alpha * B_k) in coordinate basis */ + /* Using lower-index B fields: B_i_lower = pg_{ij} * B^j */ + const double BxL = pgxx*Bx[i] + pgxy*By[i] + pgxz*Bz[i]; + const double ByL = pgxy*Bx[i] + pgyy*By[i] + pgyz*Bz[i]; + const double BzL = pgxz*Bx[i] + pgyz*By[i] + pgzz*Bz[i]; + + /* Physical metric derivatives (chain rule from conformal) */ + const double pgxx_x = (gxxx[i] - pgxx*chix[i]) * ichi; + /* const double pgxx_y = (gxxy[i] - pgxx*chiy[i]) * ichi; */ + const double pgxy_x = (gxyx[i] - pgxy*chix[i]) * ichi; + const double pgxy_y = (gxyy[i] - pgxy*chiy[i]) * ichi; + const double pgxz_x = (gxzx[i] - pgxz*chix[i]) * ichi; + const double pgxz_z = (gxzz[i] - pgxz*chiz[i]) * ichi; + const double pgyy_y = (gyyy[i] - pgyy*chiy[i]) * ichi; + const double pgyz_y = (gyzy[i] - pgyz*chiy[i]) * ichi; + const double pgyz_z = (gyzz[i] - pgyz*chiz[i]) * ichi; + const double pgzz_z = (gzzz[i] - pgzz*chiz[i]) * ichi; + + /* Curl_x(B) = ∂_y (alpha*BzL) - ∂_z (alpha*ByL) */ + const double aBx = alpn1*BxL, aBy = alpn1*ByL, aBz = alpn1*BzL; + const double curlBx = (aBz*Lapy[i] + alpn1*(pgxz*Bxy[i]+pgyz*Byy[i]+pgzz*Bzy[i]) + alpn1*(Bx[i]*gxzy[i]+By[i]*gyzy[i]+Bz[i]*gzzy[i])) + - (aBy*Lapz[i] + alpn1*(pgxy*Bxz[i]+pgyy*Byz[i]+pgyz*Bzz[i]) + alpn1*(Bx[i]*gxyz[i]+By[i]*gyyz[i]+Bz[i]*gyzz[i])); + double curlBy = (aBx*Lapz[i] + alpn1*(pgxx*Bxz[i]+pgxy*Byz[i]+pgxz*Bzz[i]) + alpn1*(Bx[i]*gxxz[i]+By[i]*gxyz[i]+Bz[i]*gxzz[i])) + - (aBz*Lapx[i] + alpn1*(pgxz*Bxx[i]+pgyz*Byx[i]+pgzz*Bzx[i]) + alpn1*(Bx[i]*gxzx[i]+By[i]*gyzx[i]+Bz[i]*gzzx[i])); + double curlBz = (aBy*Lapx[i] + alpn1*(pgxy*Bxx[i]+pgyy*Byx[i]+pgyz*Bzx[i]) + alpn1*(Bx[i]*gxyx[i]+By[i]*gyyx[i]+Bz[i]*gyzx[i])) + - (aBx*Lapy[i] + alpn1*(pgxx*Bxy[i]+pgxy*Byy[i]+pgxz*Bzy[i]) + alpn1*(Bx[i]*gxxy[i]+By[i]*gxyy[i]+Bz[i]*gxzy[i])); + + /* Advection part: -beta^j * ∂_j E^i */ + const double advEx = Ex[i]*betaxx[i] + Ey[i]*betaxy[i] + Ez[i]*betaxz[i]; + const double advEy = Ex[i]*betayx[i] + Ey[i]*betayy[i] + Ez[i]*betayz[i]; + const double advEz = Ex[i]*betazx[i] + Ey[i]*betazy[i] + Ez[i]*betazz[i]; + + /* grad(Kpsi) contracted with inverse metric */ + const double gupKx = upxx*Kpsix[i] + upxy*Kpsiy[i] + upxz*Kpsiz[i]; + const double gupKy = upxy*Kpsix[i] + upyy*Kpsiy[i] + upyz*Kpsiz[i]; + const double gupKz = upxz*Kpsix[i] + upyz*Kpsiy[i] + upzz*Kpsiz[i]; + + Ex_rhs[i] = alpn1*trK[i]*Ex[i] - advEx - FOUR*PI*alpn1*Jx[i] - alpn1*gupKx + chi3o2*curlBx; + Ey_rhs[i] = alpn1*trK[i]*Ey[i] - advEy - FOUR*PI*alpn1*Jy[i] - alpn1*gupKy + chi3o2*curlBy; + Ez_rhs[i] = alpn1*trK[i]*Ez[i] - advEz - FOUR*PI*alpn1*Jz[i] - alpn1*gupKz + chi3o2*curlBz; + + /* B-field RHS: similar but with -chi^{3/2} * curl(E) and grad(Kphi) */ + const double ExL = pgxx*Ex[i] + pgxy*Ey[i] + pgxz*Ez[i]; + const double EyL = pgxy*Ex[i] + pgyy*Ey[i] + pgyz*Ez[i]; + const double EzL = pgxz*Ex[i] + pgyz*Ey[i] + pgzz*Ez[i]; + + const double aEx = alpn1*ExL, aEy = alpn1*EyL, aEz = alpn1*EzL; + const double curlEx = (aEz*Lapy[i] + alpn1*(pgxz*Exy[i]+pgyz*Eyy[i]+pgzz*Ezy[i]) + alpn1*(Ex[i]*gxzy[i]+Ey[i]*gyzy[i]+Ez[i]*gzzy[i])) + - (aEy*Lapz[i] + alpn1*(pgxy*Exz[i]+pgyy*Eyz[i]+pgyz*Ezz[i]) + alpn1*(Ex[i]*gxyz[i]+Ey[i]*gyyz[i]+Ez[i]*gyzz[i])); + double curlEy = (aEx*Lapz[i] + alpn1*(pgxx*Exz[i]+pgxy*Eyz[i]+pgxz*Ezz[i]) + alpn1*(Ex[i]*gxxz[i]+Ey[i]*gxyz[i]+Ez[i]*gxzz[i])) + - (aEz*Lapx[i] + alpn1*(pgxz*Exx[i]+pgyz*Eyx[i]+pgzz*Ezx[i]) + alpn1*(Ex[i]*gxzx[i]+Ey[i]*gyzx[i]+Ez[i]*gzzx[i])); + double curlEz = (aEy*Lapx[i] + alpn1*(pgxy*Exx[i]+pgyy*Eyx[i]+pgyz*Ezx[i]) + alpn1*(Ex[i]*gxyx[i]+Ey[i]*gyyx[i]+Ez[i]*gyzx[i])) + - (aEx*Lapy[i] + alpn1*(pgxx*Exy[i]+pgxy*Eyy[i]+pgxz*Ezy[i]) + alpn1*(Ex[i]*gxxy[i]+Ey[i]*gxyy[i]+Ez[i]*gxzy[i])); + + const double advBx = Bx[i]*betaxx[i] + By[i]*betaxy[i] + Bz[i]*betaxz[i]; + const double advBy = Bx[i]*betayx[i] + By[i]*betayy[i] + Bz[i]*betayz[i]; + const double advBz = Bx[i]*betazx[i] + By[i]*betazy[i] + Bz[i]*betazz[i]; + + const double gupKphix = upxx*Kphix[i] + upxy*Kphiy[i] + upxz*Kphiz[i]; + const double gupKphiy = upxy*Kphix[i] + upyy*Kphiy[i] + upyz*Kphiz[i]; + const double gupKphiz = upxz*Kphix[i] + upyz*Kphiy[i] + upzz*Kphiz[i]; + + Bx_rhs[i] = alpn1*trK[i]*Bx[i] - advBx - alpn1*gupKphix - chi3o2*curlEx; + By_rhs[i] = alpn1*trK[i]*By[i] - advBy - alpn1*gupKphiy - chi3o2*curlEy; + Bz_rhs[i] = alpn1*trK[i]*Bz[i] - advBz - alpn1*gupKphiz - chi3o2*curlEz; + + /* Scalar potential RHS */ + const double divE = Exx[i] + Eyy[i] + Ezz[i]; + const double divB = Bxx[i] + Byy[i] + Bzz[i]; + const double chiCont = F3o2 * ichi * (chix[i]*Ex[i] + chiy[i]*Ey[i] + chiz[i]*Ez[i]); + Kpsi_rhs[i] = FOUR*PI*alpn1*qchar[i] - alpn1*kappa*Kpsi[i] - alpn1*(divE - chiCont); + Kphi_rhs[i] = -alpn1*kappa*Kphi[i] - alpn1*(divB - F3o2*ichi*(chix[i]*Bx[i] + chiy[i]*By[i] + chiz[i]*Bz[i])); + + /* Stress-energy tensor */ + const double E2 = pgxx*Ex[i]*Ex[i] + pgyy*Ey[i]*Ey[i] + pgzz*Ez[i]*Ez[i] + + TWO*(pgxy*Ex[i]*Ey[i] + pgxz*Ex[i]*Ez[i] + pgyz*Ey[i]*Ez[i]); + const double B2 = pgxx*Bx[i]*Bx[i] + pgyy*By[i]*By[i] + pgzz*Bz[i]*Bz[i] + + TWO*(pgxy*Bx[i]*By[i] + pgxz*Bx[i]*Bz[i] + pgyz*By[i]*Bz[i]); + rho[i] = (E2 + B2) / (EIT * PI); + const double ichi3o2 = ONE / chi3o2; + Sx[i] = (Ey[i]*Bz[i] - Ez[i]*By[i]) * F1o4PI * ichi3o2; + Sy[i] = (Ez[i]*Bx[i] - Ex[i]*Bz[i]) * F1o4PI * ichi3o2; + Sz[i] = (Ex[i]*By[i] - Ey[i]*Bx[i]) * F1o4PI * ichi3o2; + const double lExi = pgxx*Ex[i] + pgxy*Ey[i] + pgxz*Ez[i]; + const double lEyi = pgxy*Ex[i] + pgyy*Ey[i] + pgyz*Ez[i]; + const double lEzi = pgxz*Ex[i] + pgyz*Ey[i] + pgzz*Ez[i]; + const double lBxi = pgxx*Bx[i] + pgxy*By[i] + pgxz*Bz[i]; + const double lByi = pgxy*Bx[i] + pgyy*By[i] + pgyz*Bz[i]; + const double lBzi = pgxz*Bx[i] + pgyz*By[i] + pgzz*Bz[i]; + Sxx[i] = rho[i]*pgxx - (lExi*lExi + lBxi*lBxi) * F1o4PI; + Sxy[i] = rho[i]*pgxy - (lExi*lEyi + lBxi*lByi) * F1o4PI; + Sxz[i] = rho[i]*pgxz - (lExi*lEzi + lBxi*lBzi) * F1o4PI; + Syy[i] = rho[i]*pgyy - (lEyi*lEyi + lByi*lByi) * F1o4PI; + Syz[i] = rho[i]*pgyz - (lEyi*lEzi + lByi*lBzi) * F1o4PI; + Szz[i] = rho[i]*pgzz - (lEzi*lEzi + lBzi*lBzi) * F1o4PI; + } + + /* ==== 3. Call BSSN RHS with EM stress-energy ==== */ + gont = f_compute_rhs_bssn(ex, T, X, Y, Z, + chi, trK, dxx, gxy, gxz, dyy, gyz, dzz, + Axx, Axy, Axz, Ayy, Ayz, Azz, + Gamx, Gamy, Gamz, Lap, betax, betay, betaz, dtSfx, dtSfy, dtSfz, + chi_rhs, trK_rhs, + gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs, + Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs, + Gamx_rhs, Gamy_rhs, Gamz_rhs, Lap_rhs, betax_rhs, betay_rhs, betaz_rhs, + dtSfx_rhs, dtSfy_rhs, dtSfz_rhs, + rho, Sx, Sy, Sz, Sxx, Sxy, Sxz, Syy, Syz, Szz, + Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz, + Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz, + Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz, + Rxx, Rxy, Rxz, Ryy, Ryz, Rzz, + ham_Res, movx_Res, movy_Res, movz_Res, + Gmx_Res, Gmy_Res, Gmz_Res, + Symmetry, Lev, eps, co); + if (!gont) { + + /* ==== 4. Advection terms for EM fields ==== */ + lopsided(ex, X, Y, Z, Kpsi, Kpsi_rhs, betax, betay, betaz, Symmetry, SSS); + lopsided(ex, X, Y, Z, Kphi, Kphi_rhs, betax, betay, betaz, Symmetry, SSS); + lopsided(ex, X, Y, Z, Ex, Ex_rhs, betax, betay, betaz, Symmetry, ASS); + lopsided(ex, X, Y, Z, Ey, Ey_rhs, betax, betay, betaz, Symmetry, SAS); + lopsided(ex, X, Y, Z, Ez, Ez_rhs, betax, betay, betaz, Symmetry, SSA); + lopsided(ex, X, Y, Z, Bx, Bx_rhs, betax, betay, betaz, Symmetry, SAA); + lopsided(ex, X, Y, Z, By, By_rhs, betax, betay, betaz, Symmetry, ASA); + lopsided(ex, X, Y, Z, Bz, Bz_rhs, betax, betay, betaz, Symmetry, AAS); + + /* ==== 5. KO dissipation for EM fields ==== */ + if (eps > ZEO) { + kodis(ex, X, Y, Z, Kpsi, Kpsi_rhs, SSS, Symmetry, eps); + kodis(ex, X, Y, Z, Kphi, Kphi_rhs, SSS, Symmetry, eps); + kodis(ex, X, Y, Z, Ex, Ex_rhs, ASS, Symmetry, eps); + kodis(ex, X, Y, Z, Ey, Ey_rhs, SAS, Symmetry, eps); + kodis(ex, X, Y, Z, Ez, Ez_rhs, SSA, Symmetry, eps); + kodis(ex, X, Y, Z, Bx, Bx_rhs, SAA, Symmetry, eps); + kodis(ex, X, Y, Z, By, By_rhs, ASA, Symmetry, eps); + kodis(ex, X, Y, Z, Bz, Bz_rhs, AAS, Symmetry, eps); + } + + /* ==== 6. NaN check ==== */ + for (int i = 0; i < all; ++i) { + if (!isfinite(Ex_rhs[i]+Ey_rhs[i]+Ez_rhs[i]+Bx_rhs[i]+By_rhs[i]+Bz_rhs[i]+Kpsi_rhs[i]+Kphi_rhs[i])) { + gont = 1; break; + } + } + } /* inner if (!gont) */ + } /* outer if (!gont) */ + + free(chix);free(chiy);free(chiz); + free(Exx);free(Exy);free(Exz);free(Eyx);free(Eyy);free(Eyz);free(Ezx);free(Ezy);free(Ezz); + free(Bxx);free(Bxy);free(Bxz);free(Byx);free(Byy);free(Byz);free(Bzx);free(Bzy);free(Bzz); + free(Kpsix);free(Kpsiy);free(Kpsiz); + free(Kphix);free(Kphiy);free(Kphiz); + free(Lapx);free(Lapy);free(Lapz); + free(betaxx);free(betaxy);free(betaxz);free(betayx);free(betayy);free(betayz);free(betazx);free(betazy);free(betazz); + free(gxxx);free(gxxy);free(gxxz);free(gxyx);free(gxyy);free(gxyz);free(gxzx);free(gxzy);free(gxzz); + free(gyyx);free(gyyy);free(gyyz);free(gyzx);free(gyzy);free(gyzz);free(gzzx);free(gzzy);free(gzzz); + free(gupxx);free(gupxy);free(gupxz);free(gupyy);free(gupyz);free(gupzz); + return gont; +} diff --git a/AMSS_NCKU_source/bssn_rhs.h b/AMSS_NCKU_source/bssn_rhs.h index 4384118..591022c 100644 --- a/AMSS_NCKU_source/bssn_rhs.h +++ b/AMSS_NCKU_source/bssn_rhs.h @@ -22,32 +22,32 @@ #define f_compute_rhs_Z4c_ss COMPUTE_RHS_Z4C_SS #define f_compute_constraint_fr COMPUTE_CONSTRAINT_FR #endif -#ifdef fortran3 -#define f_compute_rhs_bssn compute_rhs_bssn_ +#ifdef fortran3 +#define f_compute_rhs_bssn compute_rhs_bssn_ #define f_compute_rhs_bssn_ss compute_rhs_bssn_ss_ #define f_compute_rhs_bssn_escalar compute_rhs_bssn_escalar_ #define f_compute_rhs_bssn_escalar_ss compute_rhs_bssn_escalar_ss_ #define f_compute_rhs_Z4c compute_rhs_z4c_ #define f_compute_rhs_Z4cnot compute_rhs_z4cnot_ #define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_ -#define f_compute_constraint_fr compute_constraint_fr_ -#endif - -#ifdef __cplusplus -extern "C" -{ -#endif - void f_bssn_rhs_kernel_timing_reset(); - int f_bssn_rhs_kernel_timing_bucket_count(); - const double *f_bssn_rhs_kernel_timing_local_seconds(); - const char *f_bssn_rhs_kernel_timing_label(int); -#ifdef __cplusplus -} -#endif - -extern "C" -{ - int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z +#define f_compute_constraint_fr compute_constraint_fr_ +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + void f_bssn_rhs_kernel_timing_reset(); + int f_bssn_rhs_kernel_timing_bucket_count(); + const double *f_bssn_rhs_kernel_timing_local_seconds(); + const char *f_bssn_rhs_kernel_timing_label(int); +#ifdef __cplusplus +} +#endif + +extern "C" +{ + int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z double *, double *, // chi, trK double *, double *, double *, double *, double *, double *, // gij double *, double *, double *, double *, double *, double *, // Aij @@ -63,34 +63,34 @@ extern "C" double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Ricci - double *, double *, double *, double *, double *, double *, double *, // constraint violation - int &, int &, double &, int &); -} - -int f_compute_rhs_bssn_escalar_c(int *, double &, double *, double *, double *, // ex,T,X,Y,Z - double *, double *, // chi, trK - double *, double *, double *, double *, double *, double *, // gij - double *, double *, double *, double *, double *, double *, // Aij - double *, double *, double *, // Gam - double *, double *, double *, double *, double *, double *, double *, // Gauge - double *, double *, // Sphi, Spi - double *, double *, // chi, trK - double *, double *, double *, double *, double *, double *, // gij - double *, double *, double *, double *, double *, double *, // Aij - double *, double *, double *, // Gam - double *, double *, double *, double *, double *, double *, double *, // Gauge - double *, double *, // Sphi, Spi - double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy - double *, double *, double *, double *, double *, double *, // Christoffel - double *, double *, double *, double *, double *, double *, // Christoffel - double *, double *, double *, double *, double *, double *, // Christoffel - double *, double *, double *, double *, double *, double *, // Ricci - double *, double *, double *, double *, double *, double *, double *, // constraint violation - int &, int &, double &, int &); - -extern "C" -{ - int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R + double *, double *, double *, double *, double *, double *, double *, // constraint violation + int &, int &, double &, int &); +} + +int f_compute_rhs_bssn_escalar_c(int *, double &, double *, double *, double *, // ex,T,X,Y,Z + double *, double *, // chi, trK + double *, double *, double *, double *, double *, double *, // gij + double *, double *, double *, double *, double *, double *, // Aij + double *, double *, double *, // Gam + double *, double *, double *, double *, double *, double *, double *, // Gauge + double *, double *, // Sphi, Spi + double *, double *, // chi, trK + double *, double *, double *, double *, double *, double *, // gij + double *, double *, double *, double *, double *, double *, // Aij + double *, double *, double *, // Gam + double *, double *, double *, double *, double *, double *, double *, // Gauge + double *, double *, // Sphi, Spi + double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, // stress-energy + double *, double *, double *, double *, double *, double *, // Christoffel + double *, double *, double *, double *, double *, double *, // Christoffel + double *, double *, double *, double *, double *, double *, // Christoffel + double *, double *, double *, double *, double *, double *, // Ricci + double *, double *, double *, double *, double *, double *, double *, // constraint violation + int &, int &, double &, int &); + +extern "C" +{ + int f_compute_rhs_bssn_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R double *, double *, double *, // X,Y,Z double *, double *, double *, // drhodx,drhody,drhodz double *, double *, double *, // dsigmadx,dsigmady,dsigmadz @@ -117,10 +117,10 @@ extern "C" int &, int &, double &, int &, int &); } -extern "C" -{ - int f_compute_rhs_bssn_escalar(int *, double &, double *, double *, double *, // ex,T,X,Y,Z - double *, double *, // chi, trK +extern "C" +{ + int f_compute_rhs_bssn_escalar(int *, double &, double *, double *, double *, // ex,T,X,Y,Z + double *, double *, // chi, trK double *, double *, double *, double *, double *, double *, // gij double *, double *, double *, double *, double *, double *, // Aij double *, double *, double *, // Gam @@ -137,14 +137,14 @@ extern "C" double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Christoffel double *, double *, double *, double *, double *, double *, // Ricci - double *, double *, double *, double *, double *, double *, double *, // constraint violation - int &, int &, double &, int &); -} - -extern "C" -{ - int f_compute_rhs_bssn_escalar_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R - double *, double *, double *, // X,Y,Z + double *, double *, double *, double *, double *, double *, double *, // constraint violation + int &, int &, double &, int &); +} + +extern "C" +{ + int f_compute_rhs_bssn_escalar_ss(int *, double &, double *, double *, double *, // ex,T,rho,sigma,R + double *, double *, double *, // X,Y,Z double *, double *, double *, // drhodx,drhody,drhodz double *, double *, double *, // dsigmadx,dsigmady,dsigmadz double *, double *, double *, // dRdx,dRdy,dRdz @@ -262,4 +262,31 @@ extern "C" double *); } // FR_cons +// BSSN-EM C kernel (replaces empart.f90 + bssn_rhs.f90 for BSSN+Maxwell) +int f_compute_rhs_bssn_em_c(int *, double &, double *, double *, double *, + double *, double *, + double *, double *, double *, double *, double *, double *, + double *, double *, double *, double *, double *, double *, + double *, double *, double *, + double *, double *, double *, double *, double *, double *, double *, + double *, double *, double *, + double *, double *, double *, double *, double *, double *, double *, double *, + double *, double *, double *, + double *, double *, + double *, double *, + double *, double *, double *, double *, double *, double *, + double *, double *, double *, double *, double *, double *, + double *, double *, double *, + double *, double *, double *, double *, double *, double *, double *, + double *, double *, double *, double *, double *, double *, double *, double *, + double *, double *, double *, + double *, double *, double *, double *, + double *, double *, double *, double *, double *, double *, + double *, double *, double *, double *, double *, double *, + double *, double *, double *, double *, double *, double *, + double *, double *, double *, double *, double *, double *, + double *, double *, double *, double *, double *, double *, + double *, double *, double *, double *, double *, double *, + int &, int &, double &, int &); + #endif /* BSSN_H */ diff --git a/AMSS_NCKU_source/makefile b/AMSS_NCKU_source/makefile index bb7dcce..04978c2 100644 --- a/AMSS_NCKU_source/makefile +++ b/AMSS_NCKU_source/makefile @@ -32,6 +32,24 @@ $(error USE_CXX_ESCALAR_KERNEL=1 requires USE_CXX_KERNELS=1 because bssn_escalar endif endif +ifeq ($(USE_CXX_EM_KERNEL),1) +ifeq ($(ABE_TYPE),3) +EFFECTIVE_USE_CXX_EM_KERNEL = 1 +else +EFFECTIVE_USE_CXX_EM_KERNEL = 0 +endif +else +EFFECTIVE_USE_CXX_EM_KERNEL = 0 +endif + +ifeq ($(EFFECTIVE_USE_CXX_EM_KERNEL),1) +ifeq ($(USE_CXX_KERNELS),0) +$(error USE_CXX_EM_KERNEL=1 requires USE_CXX_KERNELS=1 because bssn_em_rhs_c.C reuses the C BSSN kernel) +endif +endif + +EM_KERNEL_FLAG = -DBSSN_USE_EM_C_KERNEL=$(EFFECTIVE_USE_CXX_EM_KERNEL) + ## polint(ordn=6) kernel selector: ## 1 (default): barycentric fast path ## 0 : fallback to Neville path @@ -49,7 +67,7 @@ ifeq ($(PGO_MODE),instrument) ## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \ -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \ - $(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) + $(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG) f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \ -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) else @@ -60,7 +78,7 @@ else CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \ - $(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) + $(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG) f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) endif @@ -118,6 +136,9 @@ fdderivs_shc_c.o: fdderivs_shc_c.C kodiss_sh_c.o: kodiss_sh_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ + +bssn_em_rhs_c.o: bssn_em_rhs_c.C + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ z4c_rhs_c.o: z4c_rhs_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ @@ -148,6 +169,9 @@ CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_ ifeq ($(EFFECTIVE_USE_CXX_ESCALAR_KERNEL),1) CFILES += bssn_escalar_rhs_c.o endif +ifeq ($(EFFECTIVE_USE_CXX_EM_KERNEL),1) +CFILES += bssn_em_rhs_c.o +endif endif ifeq ($(USE_CXX_Z4C_KERNELS),1) diff --git a/AMSS_NCKU_source/makefile.inc b/AMSS_NCKU_source/makefile.inc index 224e893..632381d 100755 --- a/AMSS_NCKU_source/makefile.inc +++ b/AMSS_NCKU_source/makefile.inc @@ -59,6 +59,12 @@ USE_CXX_Z4C_KERNELS ?= 1 ## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel. USE_CXX_ESCALAR_KERNEL ?= 1 +## BSSN-EM RHS switch +## 1 : use BSSN-EM C kernel (bssn_em_rhs_c.C) on the normal patch path +## 0 : keep the original Fortran empart.f90 RHS for the EM fields (default) +## Note: experimental, requires USE_CXX_KERNELS=1 +USE_CXX_EM_KERNEL ?= 0 + ## Cached transfer switch ## auto (default): enable for BSSN vacuum, keep other paths on the safe uncached path ## 1 : force cached Sync/Restrict/OutBd transfer on evolution hot paths